httpClient = $httpClient; $this->entityTypeManager = $entityTypeManager; $this->dateFormatter = $dateFormatter; } /** * Import all events to taxonomy. */ public function importEvents(): void { // First delete all events :-). $this->removeExistingEvents(); // Old fashioned for loop to target years. for ($i = 2000; $i <= 2024; $i++) { $year_event_url = self::EVENTS_BASE . "{$i}/league/ufc"; $event_listing = $this->httpClient ->get($year_event_url)->getBody()->getContents(); $crawler = new Crawler($event_listing); $events = $crawler->filter('.Schedule__EventLeague--ufc tbody tr'); foreach ($events as $event) { $term_build = $this->processEvent($event, $i); if (empty($term_build)) { continue; } if ($this->shouldSkipEvent($term_build['name'])) { \Drupal::logger('ufc')->warning("Skipping " . $term_build['name']); continue; } if (Term::create($term_build)->save()) { \Drupal::logger('ufc')->notice("Creating new term: " . $term_build['name']); } else { \Drupal::logger('ufc')->alert("Unable to save new event."); } } } } private function processEvent(\DOMElement $event, string $year): array { if ($event->childElementCount !== 4) { return []; } $term_build = [ 'vid' => 'ufc_events' ]; for ($i = 0; $i < 4; $i++) { if ($i === 0) { $event_date = $this->convertDate($event->childNodes[0]->textContent . " $year"); $term_build['field_event_date'] = $event_date; } if ($i === 1) { $name = $event->childNodes[1]->textContent; $term_build['name'] = $name; $event_url = $event->childNodes[1]->firstChild->getAttribute('href'); $term_build['field_event_url'] = $event_url; } } return $term_build; } /** * If this event should be skipped. */ private function shouldSkipEvent(string $term_name): bool { $events_to_avoid = [ "Contender Series", "The Ultimate Fighter", "NEF: Fight Night", "TUF Brazil", ]; foreach ($events_to_avoid as $avoid) { if (str_contains($term_name, $avoid)) { return TRUE; } } return FALSE; } /* * Converts a string into a database storable string. * * Output format: YYYY-MM-DD. * */ private function convertDate(string $date_str): string { return $this->dateFormatter->format(strtotime($date_str), 'custom', 'Y-m-d'); } /** * Clear out all prior events in the vocab. */ private function removeExistingEvents(): void { \Drupal::logger('ufc')->notice("Removing all former events."); $terms = $this->entityTypeManager->getStorage('taxonomy_term')->loadByProperties(['vid' => 'ufc_events']); foreach ($terms as $term) { $loaded = Term::load($term->id()); $term_name = $term->name->value; \Drupal::logger('ufc')->notice("Removing $term_name."); try { $loaded->delete(); \Drupal::logger('ufc')->notice("Operation successful."); } catch (\Exception $e) { \Drupal::logger('ufc')->alert($e->getMessage()); } } } /** * Create fights from events. */ public function createFights(): void { // Clear out past fights. $existing_fights = $this->entityTypeManager->getStorage('node')->loadByProperties(['type' => 'fight']); foreach ($existing_fights as $existing_fight) { $delete = $existing_fight->delete(); if ($delete) { \Drupal::logger('ufc')->notice("Removed " . $existing_fight->getTitle()); } } // Go get all events. $all_events = $this->entityTypeManager->getStorage('taxonomy_term') ->loadByProperties(['vid' => 'ufc_events']); foreach ($all_events as $event) { $event_page_html = $this->httpClient ->get(self::EVENT_BASE . $event->field_event_url->uri) ->getBody()->getContents(); $crawler = new Crawler($event_page_html); $fight_result_rows = $crawler->filter(".MMAGamestrip"); foreach ($fight_result_rows as $fight_result_row) { $result = $this->processFightResultRow($fight_result_row); if (empty($result)) { continue; } $result['event'] = $event->id(); $this->createFightNodeFromResult($result); } } } /** * Create fight node from results of extracted fight. */ private function createFightNodeFromResult(array $result): void { // Result is fighter_1, fighter_2, winner -- all strings. // Need a way to look up fighters by name. $fighter_1_name = $result['fighter_1']; $fighter_2_name = $result['fighter_2']; $fight_winner = $result['winner']; $fighter_1_id = $this->getFighterIdByName($fighter_1_name); $fighter_2_id = $this->getFighterIdByName($fighter_2_name); $fight_winner_id = $this->getFighterIdByName($fight_winner); $fight = Node::create([ 'type' => 'fight', 'title' => "$fighter_1_name vs. $fighter_2_name", 'field_fighter_one' => [ 'target_id' => $fighter_1_id, ], 'field_fighter_two' => [ 'target_id' => $fighter_2_id, ], 'field_event' => [ 'target_id' => $result['event'], ], 'field_result' => [ 'target_id' => $fight_winner_id, ], ]); if ($fight->save()) { \Drupal::logger('ufc')->notice("$fighter_1_name vs. $fighter_2_name Created"); } else { \Drupal::logger('ufc')->alert("$fighter_1_name vs. $fighter_2_name FAILED"); } } /** * Retrieve a fighter ID by name. */ private function getFighterIdByName(string $name): int { $existing_node = reset($this->entityTypeManager->getStorage('node')->loadByProperties(['title' => $name])); if ($existing_node) { return $existing_node->id(); } return 0; } /** * Iterate over fight result rows to extract results. */ private function processFightResultRow(\DOMElement $row): array { $results = [ 'winner' => 0, ]; $comp_crawler = new Crawler($this->getInnerHtml($row)); $competitors = $comp_crawler->filter(".MMACompetitor"); $fighter_num = 1; // @todo - this is crazy. Maybe do another crawler instead. foreach ($competitors as $competitor) { foreach ($competitor->childNodes as $child) { if ($child->tagName == 'div') { foreach ($child->childNodes as $grandchild) { foreach ($grandchild->childNodes as $gg_child) { if ($gg_child->tagName == 'h2') { $results["fighter_$fighter_num"] = $gg_child->textContent; if ($competitor->childElementCount == 2) { $results['winner'] = $gg_child->textContent; } $fighter_num++; } } } } } } return $results; } /** * Get the inner html from a DOMElement. */ private function getInnerHtml(\DOMElement $node) { $innerHTML= ''; $children = $node->childNodes; foreach ($children as $child) { $innerHTML .= $child->ownerDocument->saveXML( $child ); } return $innerHTML; } }