dchadwick/web/modules/custom/ufc/src/Services/FightImporter.php

281 lines
8.1 KiB
PHP
Raw Normal View History

2024-04-09 01:47:04 +00:00
<?php
namespace Drupal\ufc\Services;
use Drupal\Core\Datetime\DateFormatter;
use Drupal\Core\Entity\EntityTypeManager;
use Drupal\taxonomy\Entity\Term;
use Drupal\node\Entity\Node;
use GuzzleHttp\Client;
use Symfony\Component\DomCrawler\Crawler;
class FightImporter {
const EVENTS_BASE = "https://www.espn.com/mma/schedule/_/year/";
const EVENT_BASE = "https://www.espn.com";
/**
* The Guzzle HTTP Client.
*/
protected $httpClient;
/**
* The entity type manager.
*/
protected $entityTypeManager;
/**
* The date formatter.
*/
protected $dateFormatter;
/**
* Public constructor.
*
* @var \GuzzleHttp\Client $httpClient
* The guzzle http client.
*
* @var \Drupal\Core\Entity\EntityTypeManager
* The entity type manager.
*
* @var \Drupal\Core\Datetime\DateFormatter $dateFormatter
* The date formatter service.
*/
public function __construct(
Client $httpClient,
EntityTypeManager $entityTypeManager,
DateFormatter $dateFormatter
) {
$this->httpClient = $httpClient;
$this->entityTypeManager = $entityTypeManager;
$this->dateFormatter = $dateFormatter;
}
/**
* Import all events to taxonomy.
*/
public function importEvents(): void {
// First delete all events :-).
$this->removeExistingEvents();
// Old fashioned for loop to target years.
for ($i = 2000; $i <= 2024; $i++) {
$year_event_url = self::EVENTS_BASE . "{$i}/league/ufc";
$event_listing = $this->httpClient
->get($year_event_url)->getBody()->getContents();
$crawler = new Crawler($event_listing);
$events = $crawler->filter('.Schedule__EventLeague--ufc tbody tr');
foreach ($events as $event) {
$term_build = $this->processEvent($event, $i);
if (empty($term_build)) {
continue;
}
if ($this->shouldSkipEvent($term_build['name'])) {
\Drupal::logger('ufc')->warning("Skipping " . $term_build['name']);
continue;
}
if (Term::create($term_build)->save()) {
\Drupal::logger('ufc')->notice("Creating new term: " . $term_build['name']);
}
else {
\Drupal::logger('ufc')->alert("Unable to save new event.");
}
}
}
}
private function processEvent(\DOMElement $event, string $year): array {
if ($event->childElementCount !== 4) {
return [];
}
$term_build = [
'vid' => 'ufc_events'
];
for ($i = 0; $i < 4; $i++) {
if ($i === 0) {
$event_date = $this->convertDate($event->childNodes[0]->textContent . " $year");
$term_build['field_event_date'] = $event_date;
}
if ($i === 1) {
$name = $event->childNodes[1]->textContent;
$term_build['name'] = $name;
$event_url = $event->childNodes[1]->firstChild->getAttribute('href');
$term_build['field_event_url'] = $event_url;
}
}
return $term_build;
}
/**
* If this event should be skipped.
*/
private function shouldSkipEvent(string $term_name): bool {
$events_to_avoid = [
"Contender Series",
"The Ultimate Fighter",
"NEF: Fight Night",
"TUF Brazil",
];
foreach ($events_to_avoid as $avoid) {
if (str_contains($term_name, $avoid)) {
return TRUE;
}
}
return FALSE;
}
/*
* Converts a string into a database storable string.
*
* Output format: YYYY-MM-DD.
*
*/
private function convertDate(string $date_str): string {
return $this->dateFormatter->format(strtotime($date_str), 'custom', 'Y-m-d');
}
/**
* Clear out all prior events in the vocab.
*/
private function removeExistingEvents(): void {
\Drupal::logger('ufc')->notice("Removing all former events.");
$terms = $this->entityTypeManager->getStorage('taxonomy_term')->loadByProperties(['vid' => 'ufc_events']);
foreach ($terms as $term) {
$loaded = Term::load($term->id());
$term_name = $term->name->value;
\Drupal::logger('ufc')->notice("Removing $term_name.");
try {
$loaded->delete();
\Drupal::logger('ufc')->notice("Operation successful.");
} catch (\Exception $e) {
\Drupal::logger('ufc')->alert($e->getMessage());
}
}
}
/**
* Create fights from events.
*/
public function createFights(): void {
// Clear out past fights.
$existing_fights = $this->entityTypeManager->getStorage('node')->loadByProperties(['type' => 'fight']);
foreach ($existing_fights as $existing_fight) {
$delete = $existing_fight->delete();
if ($delete) {
\Drupal::logger('ufc')->notice("Removed " . $existing_fight->getTitle());
}
}
// Go get all events.
$all_events = $this->entityTypeManager->getStorage('taxonomy_term')
->loadByProperties(['vid' => 'ufc_events']);
foreach ($all_events as $event) {
$event_page_html = $this->httpClient
->get(self::EVENT_BASE . $event->field_event_url->uri)
->getBody()->getContents();
$crawler = new Crawler($event_page_html);
$fight_result_rows = $crawler->filter(".MMAGamestrip");
foreach ($fight_result_rows as $fight_result_row) {
$result = $this->processFightResultRow($fight_result_row);
if (empty($result)) {
continue;
}
$result['event'] = $event->id();
$this->createFightNodeFromResult($result);
}
}
}
/**
* Create fight node from results of extracted fight.
*/
private function createFightNodeFromResult(array $result): void {
// Result is fighter_1, fighter_2, winner -- all strings.
// Need a way to look up fighters by name.
$fighter_1_name = $result['fighter_1'];
$fighter_2_name = $result['fighter_2'];
$fight_winner = $result['winner'];
$fighter_1_id = $this->getFighterIdByName($fighter_1_name);
$fighter_2_id = $this->getFighterIdByName($fighter_2_name);
$fight_winner_id = $this->getFighterIdByName($fight_winner);
$fight = Node::create([
'type' => 'fight',
'title' => "$fighter_1_name vs. $fighter_2_name",
'field_fighter_one' => [
'target_id' => $fighter_1_id,
],
'field_fighter_two' => [
'target_id' => $fighter_2_id,
],
'field_event' => [
'target_id' => $result['event'],
],
'field_result' => [
'target_id' => $fight_winner_id,
],
]);
if ($fight->save()) {
\Drupal::logger('ufc')->notice("$fighter_1_name vs. $fighter_2_name Created");
}
else {
\Drupal::logger('ufc')->alert("$fighter_1_name vs. $fighter_2_name FAILED");
}
}
/**
* Retrieve a fighter ID by name.
*/
private function getFighterIdByName(string $name): int {
$existing_node = reset($this->entityTypeManager->getStorage('node')->loadByProperties(['title' => $name]));
if ($existing_node) {
return $existing_node->id();
}
return 0;
}
/**
* Iterate over fight result rows to extract results.
*/
private function processFightResultRow(\DOMElement $row): array {
$results = [
'winner' => 0,
];
$comp_crawler = new Crawler($this->getInnerHtml($row));
$competitors = $comp_crawler->filter(".MMACompetitor");
$fighter_num = 1;
// @todo - this is crazy. Maybe do another crawler instead.
foreach ($competitors as $competitor) {
foreach ($competitor->childNodes as $child) {
if ($child->tagName == 'div') {
foreach ($child->childNodes as $grandchild) {
foreach ($grandchild->childNodes as $gg_child) {
if ($gg_child->tagName == 'h2') {
$results["fighter_$fighter_num"] = $gg_child->textContent;
if ($competitor->childElementCount == 2) {
$results['winner'] = $gg_child->textContent;
}
$fighter_num++;
}
}
}
}
}
}
return $results;
}
/**
* Get the inner html from a DOMElement.
*/
private function getInnerHtml(\DOMElement $node) {
$innerHTML= '';
$children = $node->childNodes;
foreach ($children as $child) {
$innerHTML .= $child->ownerDocument->saveXML( $child );
}
return $innerHTML;
}
}