Rebuild
This commit is contained in:
280
web/modules/custom/ufc/src/Services/FightImporter.php
Normal file
280
web/modules/custom/ufc/src/Services/FightImporter.php
Normal file
@@ -0,0 +1,280 @@
|
||||
<?php
|
||||
|
||||
namespace Drupal\ufc\Services;
|
||||
|
||||
use Drupal\Core\Datetime\DateFormatter;
|
||||
use Drupal\Core\Entity\EntityTypeManager;
|
||||
use Drupal\taxonomy\Entity\Term;
|
||||
use Drupal\node\Entity\Node;
|
||||
use GuzzleHttp\Client;
|
||||
use Symfony\Component\DomCrawler\Crawler;
|
||||
|
||||
class FightImporter {
|
||||
|
||||
const EVENTS_BASE = "https://www.espn.com/mma/schedule/_/year/";
|
||||
const EVENT_BASE = "https://www.espn.com";
|
||||
|
||||
/**
|
||||
* The Guzzle HTTP Client.
|
||||
*/
|
||||
protected $httpClient;
|
||||
|
||||
/**
|
||||
* The entity type manager.
|
||||
*/
|
||||
protected $entityTypeManager;
|
||||
|
||||
/**
|
||||
* The date formatter.
|
||||
*/
|
||||
protected $dateFormatter;
|
||||
|
||||
/**
|
||||
* Public constructor.
|
||||
*
|
||||
* @var \GuzzleHttp\Client $httpClient
|
||||
* The guzzle http client.
|
||||
*
|
||||
* @var \Drupal\Core\Entity\EntityTypeManager
|
||||
* The entity type manager.
|
||||
*
|
||||
* @var \Drupal\Core\Datetime\DateFormatter $dateFormatter
|
||||
* The date formatter service.
|
||||
*/
|
||||
public function __construct(
|
||||
Client $httpClient,
|
||||
EntityTypeManager $entityTypeManager,
|
||||
DateFormatter $dateFormatter
|
||||
) {
|
||||
$this->httpClient = $httpClient;
|
||||
$this->entityTypeManager = $entityTypeManager;
|
||||
$this->dateFormatter = $dateFormatter;
|
||||
}
|
||||
|
||||
/**
|
||||
* Import all events to taxonomy.
|
||||
*/
|
||||
public function importEvents(): void {
|
||||
// First delete all events :-).
|
||||
$this->removeExistingEvents();
|
||||
// Old fashioned for loop to target years.
|
||||
for ($i = 2000; $i <= 2024; $i++) {
|
||||
$year_event_url = self::EVENTS_BASE . "{$i}/league/ufc";
|
||||
$event_listing = $this->httpClient
|
||||
->get($year_event_url)->getBody()->getContents();
|
||||
$crawler = new Crawler($event_listing);
|
||||
$events = $crawler->filter('.Schedule__EventLeague--ufc tbody tr');
|
||||
foreach ($events as $event) {
|
||||
$term_build = $this->processEvent($event, $i);
|
||||
if (empty($term_build)) {
|
||||
continue;
|
||||
}
|
||||
if ($this->shouldSkipEvent($term_build['name'])) {
|
||||
\Drupal::logger('ufc')->warning("Skipping " . $term_build['name']);
|
||||
continue;
|
||||
}
|
||||
if (Term::create($term_build)->save()) {
|
||||
\Drupal::logger('ufc')->notice("Creating new term: " . $term_build['name']);
|
||||
}
|
||||
else {
|
||||
\Drupal::logger('ufc')->alert("Unable to save new event.");
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private function processEvent(\DOMElement $event, string $year): array {
|
||||
if ($event->childElementCount !== 4) {
|
||||
return [];
|
||||
}
|
||||
$term_build = [
|
||||
'vid' => 'ufc_events'
|
||||
];
|
||||
for ($i = 0; $i < 4; $i++) {
|
||||
if ($i === 0) {
|
||||
$event_date = $this->convertDate($event->childNodes[0]->textContent . " $year");
|
||||
$term_build['field_event_date'] = $event_date;
|
||||
}
|
||||
if ($i === 1) {
|
||||
$name = $event->childNodes[1]->textContent;
|
||||
$term_build['name'] = $name;
|
||||
$event_url = $event->childNodes[1]->firstChild->getAttribute('href');
|
||||
$term_build['field_event_url'] = $event_url;
|
||||
}
|
||||
}
|
||||
|
||||
return $term_build;
|
||||
}
|
||||
|
||||
/**
|
||||
* If this event should be skipped.
|
||||
*/
|
||||
private function shouldSkipEvent(string $term_name): bool {
|
||||
$events_to_avoid = [
|
||||
"Contender Series",
|
||||
"The Ultimate Fighter",
|
||||
"NEF: Fight Night",
|
||||
"TUF Brazil",
|
||||
];
|
||||
foreach ($events_to_avoid as $avoid) {
|
||||
if (str_contains($term_name, $avoid)) {
|
||||
return TRUE;
|
||||
}
|
||||
}
|
||||
return FALSE;
|
||||
}
|
||||
|
||||
/*
|
||||
* Converts a string into a database storable string.
|
||||
*
|
||||
* Output format: YYYY-MM-DD.
|
||||
*
|
||||
*/
|
||||
private function convertDate(string $date_str): string {
|
||||
return $this->dateFormatter->format(strtotime($date_str), 'custom', 'Y-m-d');
|
||||
}
|
||||
|
||||
/**
|
||||
* Clear out all prior events in the vocab.
|
||||
*/
|
||||
private function removeExistingEvents(): void {
|
||||
\Drupal::logger('ufc')->notice("Removing all former events.");
|
||||
$terms = $this->entityTypeManager->getStorage('taxonomy_term')->loadByProperties(['vid' => 'ufc_events']);
|
||||
foreach ($terms as $term) {
|
||||
$loaded = Term::load($term->id());
|
||||
$term_name = $term->name->value;
|
||||
\Drupal::logger('ufc')->notice("Removing $term_name.");
|
||||
try {
|
||||
$loaded->delete();
|
||||
\Drupal::logger('ufc')->notice("Operation successful.");
|
||||
} catch (\Exception $e) {
|
||||
\Drupal::logger('ufc')->alert($e->getMessage());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Create fights from events.
|
||||
*/
|
||||
public function createFights(): void {
|
||||
// Clear out past fights.
|
||||
$existing_fights = $this->entityTypeManager->getStorage('node')->loadByProperties(['type' => 'fight']);
|
||||
foreach ($existing_fights as $existing_fight) {
|
||||
$delete = $existing_fight->delete();
|
||||
if ($delete) {
|
||||
\Drupal::logger('ufc')->notice("Removed " . $existing_fight->getTitle());
|
||||
}
|
||||
}
|
||||
// Go get all events.
|
||||
$all_events = $this->entityTypeManager->getStorage('taxonomy_term')
|
||||
->loadByProperties(['vid' => 'ufc_events']);
|
||||
foreach ($all_events as $event) {
|
||||
$event_page_html = $this->httpClient
|
||||
->get(self::EVENT_BASE . $event->field_event_url->uri)
|
||||
->getBody()->getContents();
|
||||
$crawler = new Crawler($event_page_html);
|
||||
$fight_result_rows = $crawler->filter(".MMAGamestrip");
|
||||
foreach ($fight_result_rows as $fight_result_row) {
|
||||
$result = $this->processFightResultRow($fight_result_row);
|
||||
if (empty($result)) {
|
||||
continue;
|
||||
}
|
||||
$result['event'] = $event->id();
|
||||
$this->createFightNodeFromResult($result);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Create fight node from results of extracted fight.
|
||||
*/
|
||||
private function createFightNodeFromResult(array $result): void {
|
||||
// Result is fighter_1, fighter_2, winner -- all strings.
|
||||
// Need a way to look up fighters by name.
|
||||
$fighter_1_name = $result['fighter_1'];
|
||||
$fighter_2_name = $result['fighter_2'];
|
||||
$fight_winner = $result['winner'];
|
||||
$fighter_1_id = $this->getFighterIdByName($fighter_1_name);
|
||||
$fighter_2_id = $this->getFighterIdByName($fighter_2_name);
|
||||
$fight_winner_id = $this->getFighterIdByName($fight_winner);
|
||||
$fight = Node::create([
|
||||
'type' => 'fight',
|
||||
'title' => "$fighter_1_name vs. $fighter_2_name",
|
||||
'field_fighter_one' => [
|
||||
'target_id' => $fighter_1_id,
|
||||
],
|
||||
'field_fighter_two' => [
|
||||
'target_id' => $fighter_2_id,
|
||||
],
|
||||
'field_event' => [
|
||||
'target_id' => $result['event'],
|
||||
],
|
||||
'field_result' => [
|
||||
'target_id' => $fight_winner_id,
|
||||
],
|
||||
]);
|
||||
|
||||
if ($fight->save()) {
|
||||
\Drupal::logger('ufc')->notice("$fighter_1_name vs. $fighter_2_name Created");
|
||||
}
|
||||
else {
|
||||
\Drupal::logger('ufc')->alert("$fighter_1_name vs. $fighter_2_name FAILED");
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Retrieve a fighter ID by name.
|
||||
*/
|
||||
private function getFighterIdByName(string $name): int {
|
||||
$existing_node = reset($this->entityTypeManager->getStorage('node')->loadByProperties(['title' => $name]));
|
||||
if ($existing_node) {
|
||||
return $existing_node->id();
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
/**
|
||||
* Iterate over fight result rows to extract results.
|
||||
*/
|
||||
private function processFightResultRow(\DOMElement $row): array {
|
||||
$results = [
|
||||
'winner' => 0,
|
||||
];
|
||||
$comp_crawler = new Crawler($this->getInnerHtml($row));
|
||||
$competitors = $comp_crawler->filter(".MMACompetitor");
|
||||
$fighter_num = 1;
|
||||
// @todo - this is crazy. Maybe do another crawler instead.
|
||||
foreach ($competitors as $competitor) {
|
||||
foreach ($competitor->childNodes as $child) {
|
||||
if ($child->tagName == 'div') {
|
||||
foreach ($child->childNodes as $grandchild) {
|
||||
foreach ($grandchild->childNodes as $gg_child) {
|
||||
if ($gg_child->tagName == 'h2') {
|
||||
$results["fighter_$fighter_num"] = $gg_child->textContent;
|
||||
if ($competitor->childElementCount == 2) {
|
||||
$results['winner'] = $gg_child->textContent;
|
||||
}
|
||||
$fighter_num++;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return $results;
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the inner html from a DOMElement.
|
||||
*/
|
||||
private function getInnerHtml(\DOMElement $node) {
|
||||
$innerHTML= '';
|
||||
$children = $node->childNodes;
|
||||
foreach ($children as $child) {
|
||||
$innerHTML .= $child->ownerDocument->saveXML( $child );
|
||||
}
|
||||
|
||||
return $innerHTML;
|
||||
}
|
||||
}
|
||||
241
web/modules/custom/ufc/src/Services/FighterImporter.php
Normal file
241
web/modules/custom/ufc/src/Services/FighterImporter.php
Normal file
@@ -0,0 +1,241 @@
|
||||
<?php
|
||||
|
||||
namespace Drupal\ufc\Services;
|
||||
|
||||
use Drupal\Core\Entity\EntityTypeManager;
|
||||
use Drupal\Core\Cache\CacheBackendInterface;
|
||||
use Drupal\Core\Cache\Cache;
|
||||
use Drupal\media\Media;
|
||||
use Drupal\ufc\Fighter;
|
||||
use GuzzleHttp\Client;
|
||||
use Symfony\Component\DomCrawler\Crawler;
|
||||
|
||||
class FighterImporter {
|
||||
|
||||
/**
|
||||
* Guzzle http client service.
|
||||
* @var \GuzzleHttp\Client
|
||||
*/
|
||||
protected $httpClient;
|
||||
|
||||
/**
|
||||
* Entity type manager service.
|
||||
* @var \Drupal\Core\Entity\EntityTypeManager
|
||||
*/
|
||||
protected $entityTypeManager;
|
||||
|
||||
/**
|
||||
* Array of all fighters.
|
||||
* @var array[]
|
||||
*/
|
||||
protected $fighters = [];
|
||||
|
||||
/**
|
||||
* The current weight class.
|
||||
* @var string
|
||||
*/
|
||||
protected $weightClass;
|
||||
|
||||
/**
|
||||
* The UFC cache bin.
|
||||
*/
|
||||
protected $cache;
|
||||
|
||||
/**
|
||||
* The base url for fighter lists.
|
||||
* @var string
|
||||
*/
|
||||
const UFC_BASE = "https://www.ufc.com/athletes/all?filters%5B0%5D=weight_class%3";
|
||||
|
||||
/**
|
||||
* All applicable divisions and their keys on UFC.com.
|
||||
* @var array[]
|
||||
*/
|
||||
protected $divisions = [
|
||||
'heavyweight' => "A11",
|
||||
'light_heavyweight' => "A13",
|
||||
'middleweight' => "A14",
|
||||
'welterweight' => "A15",
|
||||
'lightweight' => "A12",
|
||||
'featherweight' => "A9",
|
||||
'bantamweight' => "A8",
|
||||
'flyweight' => "A10",
|
||||
];
|
||||
|
||||
/**
|
||||
* Public constructor for Fighter Importer.
|
||||
*
|
||||
* @param Client $httpClient
|
||||
* @param EntityTypeManager $entityTypeManager
|
||||
* @param Cache $cache
|
||||
*/
|
||||
public function __construct(
|
||||
Client $httpClient,
|
||||
EntityTypeManager $entityTypeManager,
|
||||
CacheBackendInterface $cache
|
||||
) {
|
||||
$this->httpClient = $httpClient;
|
||||
$this->entityTypeManager = $entityTypeManager;
|
||||
$this->cache = $cache;
|
||||
}
|
||||
|
||||
public function importFighters(): void {
|
||||
$test_run = FALSE;
|
||||
if ($test_run) {
|
||||
// Overriding to test!!!
|
||||
$fighter_name_text_on_ufc = "
|
||||
Georges St-Pierre
|
||||
";
|
||||
$fighter = new Fighter($this->httpClient);
|
||||
$fighter->first_name = 'scott';
|
||||
$fighter->last_name = 'adams';
|
||||
$fighter->scrapeDataFromFighterPage();
|
||||
$fighter_clone = clone ($fighter);
|
||||
unset($fighter_clone->fighter_page);
|
||||
unset($fighter_clone->crawler);
|
||||
dump($fighter_clone);
|
||||
}
|
||||
else {
|
||||
$fighters_by_div = self::getListOfCurrentFighters();
|
||||
// Process each fighter into system.
|
||||
foreach ($fighters_by_div as $division => $fighters) {
|
||||
$this->processDivision($division, $fighters);
|
||||
}
|
||||
}
|
||||
}
|
||||
/**
|
||||
* Process a division.
|
||||
*
|
||||
* @param mixed $div
|
||||
* @param mixed $fighters
|
||||
*/
|
||||
private function processDivision($div, $fighters): void {
|
||||
foreach ($fighters as $fighter_data) {
|
||||
$fighter = new Fighter($this->httpClient);
|
||||
$fighter->first_name = $fighter_data['firstname'];
|
||||
$fighter->last_name = $fighter_data['lastname'];
|
||||
$fighter->image = $fighter_data['image'];
|
||||
$fighter->class = $div;
|
||||
if (!$fighter->scrapeDataFromFighterPage($fighter_data['profile'])) {
|
||||
\Drupal::logger('ufc')->alert("FAILED: $fighter->first_name $fighter->last_name to " . $fighter_data['profile']);
|
||||
}
|
||||
// Check if node exists, by title.
|
||||
$fighter->createMediaEntityFromImage();
|
||||
$title = $fighter->first_name . " " . $fighter->last_name;
|
||||
$node_lookup = reset($this->entityTypeManager->getStorage('node')->loadByProperties(['title' => $title]));
|
||||
|
||||
if (!empty($node_lookup)) {
|
||||
// Update instead of create.
|
||||
$fighter->updatePlayerNode($node_lookup->id());
|
||||
\Drupal::logger('ufc')->notice("$title updated successfully.");
|
||||
}
|
||||
else {
|
||||
\Drupal::logger('ufc')->warning("No existing player found for $title...creating");
|
||||
$fighter->createPlayerNode();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Get list of current fighters.
|
||||
*
|
||||
* @return array $fighters
|
||||
*/
|
||||
public function getListOfCurrentFighters(): array {
|
||||
foreach ($this->divisions as $division => $div_base_url) {
|
||||
$division_url = self::UFC_BASE . $div_base_url;
|
||||
$this->weightClass = $division;
|
||||
echo "Starting import for " . $division . "\n";
|
||||
self::loopThroughFighterPages($division_url);
|
||||
}
|
||||
return $this->fighters;
|
||||
}
|
||||
|
||||
/**
|
||||
* There is a pager, loop through to get all fighters.
|
||||
*
|
||||
* @param string $base_url
|
||||
*/
|
||||
public function loopThroughFighterPages($base_url): void {
|
||||
// Here you are Dan.
|
||||
// Implement caching to store instead of needing fresh requests.
|
||||
for ($i=0; $i<=100; $i++) {
|
||||
$url = $base_url . "&page=$i";
|
||||
$cid = "ufc:" . $url;
|
||||
$request = $this->httpClient->request('GET', $url, ['verify' => false]);
|
||||
$content = $request->getBody()->getContents();
|
||||
$check = strpos($content, "No Result Found For");
|
||||
if (!$check) {
|
||||
\Drupal::logger('ufc')->notice("Extracting fighters from page $i.");
|
||||
self::extractFighters($content);
|
||||
continue;
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Extract fighters from an html string.
|
||||
*/
|
||||
public function extractFighters(string $input): void {
|
||||
// @todo REBUILD THIS NOW!!
|
||||
$fighter_list = [];
|
||||
$crawler = new Crawler($input);
|
||||
$athlete_flipcards = $crawler->filter('.c-listing-athlete-flipcard');
|
||||
$fighter_names = $athlete_flipcards->each(function (Crawler $crawler, $i) {
|
||||
return $crawler->filter('.c-listing-athlete__name')->text();
|
||||
});
|
||||
$fighter_profile_urls = $athlete_flipcards->each(function (Crawler $crawler, $i) {
|
||||
return $crawler->filter('.e-button--black')->attr('href');
|
||||
});
|
||||
|
||||
$fighter_images = $athlete_flipcards->each(function (Crawler $crawler, $i) {
|
||||
$imgs = $crawler->filter('img')->each(function ($i) {
|
||||
return $i->attr('src');
|
||||
});
|
||||
return $imgs;
|
||||
});
|
||||
|
||||
$count_fighter_names = count($fighter_names);
|
||||
$count_profile_urls = count($fighter_profile_urls);
|
||||
$count_images = count($fighter_images);
|
||||
// Make sure the arrays are all the same size.
|
||||
assert((
|
||||
($count_fighter_names == $count_profile_urls) &&
|
||||
($count_profile_urls == $count_images)
|
||||
));
|
||||
|
||||
foreach ($fighter_names as $key => $fighter) {
|
||||
$fighter_names[$key] = [
|
||||
'name' => $fighter,
|
||||
'profile' => $fighter_profile_urls[$key],
|
||||
'images' => $fighter_images[$key],
|
||||
];
|
||||
}
|
||||
|
||||
foreach ($fighter_names as $fighter_data) {
|
||||
$name_no_spaces = str_replace(" ", "", $fighter_data['name']);
|
||||
$weight_class_exists = array_key_exists($this->weightClass, $this->fighters);
|
||||
$fighter_exists = NULL;
|
||||
if ($weight_class_exists) {
|
||||
$fighter_exists = array_key_exists($name_no_spaces, $this->fighters[$this->weightClass]);
|
||||
}
|
||||
if (!$fighter_exists) {
|
||||
$split_name = explode(" ", $fighter_data['name'], 2);
|
||||
$this->fighters[$this->weightClass][$name_no_spaces]['firstname'] = $split_name[0] ?? " ";
|
||||
$this->fighters[$this->weightClass][$name_no_spaces]['lastname'] = $split_name[1] ?? " ";
|
||||
$this->fighters[$this->weightClass][$name_no_spaces]['profile'] = $fighter_data['profile'];
|
||||
}
|
||||
|
||||
// here you are dan, set the profile url of the fighter, then use that in parsing later on
|
||||
|
||||
if (!empty($fighter_data['images']) && count($fighter_data['images']) == 2) {
|
||||
$this->fighters[$this->weightClass][$name_no_spaces]['image'] = $fighter_data['images'][0];
|
||||
}
|
||||
else {
|
||||
$this->fighters[$this->weightClass][$name_no_spaces]['image'] = FALSE;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
Reference in New Issue
Block a user