Pushing overhaul.

This commit is contained in:
dan612
2025-12-06 16:34:43 -05:00
parent 09043d7884
commit 8cee39a6df
19 changed files with 34347 additions and 143 deletions

View File

@@ -24,8 +24,12 @@ class UfcCommands extends DrushCommands {
// First check for the item in cache.
$fighter_list = \Drupal::cache()->get($this->cacheId);
if (!$fighter_list) {
$fighter_list = $fighter_importer->getListOfCurrentFighters();
\Drupal::cache()->set($this->cacheId, $fighter_list, time() + 86400);
/* $fighter_list = $fighter_importer->getListOfCurrentFighters(); */
$api_list = \Drupal::httpClient()->get('https://www.ufc.com/jsonapi/node/athlete')->getBody()->getContents();
$fighter_list = json_decode($api_list)->data;
dump($fighter_list);
exit();
\Drupal::cache()->set($this->cacheId, $fighter_list, time() + 300);
}
else {
$fighter_list = $fighter_list->data;

View File

@@ -45,22 +45,7 @@ class FighterImporter {
* The base url for fighter lists.
* @var string
*/
const UFC_BASE = "https://www.ufc.com/athletes/all?filters%5B0%5D=weight_class%3";
/**
* All applicable divisions and their keys on UFC.com.
* @var array[]
*/
protected $divisions = [
'heavyweight' => "A11",
'light_heavyweight' => "A13",
'middleweight' => "A14",
'welterweight' => "A15",
'lightweight' => "A12",
'featherweight' => "A9",
'bantamweight' => "A8",
'flyweight' => "A10",
];
const UFC_BASE = "https://www.ufc.com/jsonapi/node/athlete";
/**
* Public constructor for Fighter Importer.
@@ -80,62 +65,14 @@ class FighterImporter {
}
public function importFighters(): void {
$test_run = FALSE;
if ($test_run) {
// Overriding to test!!!
$fighter_name_text_on_ufc = "
Georges St-Pierre
";
$fighter = new Fighter($this->httpClient);
$fighter->first_name = 'scott';
$fighter->last_name = 'adams';
$fighter->scrapeDataFromFighterPage();
$fighter_clone = clone ($fighter);
unset($fighter_clone->fighter_page);
unset($fighter_clone->crawler);
dump($fighter_clone);
}
else {
$fighters_by_div = self::getListOfCurrentFighters();
// Process each fighter into system.
foreach ($fighters_by_div as $division => $fighters) {
$this->processDivision($division, $fighters);
}
}
}
/**
* Process a division.
*
* @param mixed $div
* @param mixed $fighters
*/
public static function processDivision($div, $fighters): void {
\Drupal::logger('ufc')->notice("Starting to update $div");
foreach ($fighters as $fighter_data) {
$fighter = new Fighter(\Drupal::httpClient());
$fighter->first_name = $fighter_data['firstname'];
$fighter->last_name = $fighter_data['lastname'];
$fighter->image = $fighter_data['image'];
$fighter->class = $div;
if (!$fighter->scrapeDataFromFighterPage($fighter_data['profile'])) {
\Drupal::logger('ufc')->alert("FAILED: $fighter->first_name $fighter->last_name to " . $fighter_data['profile']);
}
// Check if node exists, by title.
$fighter->createMediaEntityFromImage();
$title = $fighter->first_name . " " . $fighter->last_name;
$node_lookup = reset(\Drupal::entityTypeManager()->getStorage('node')->loadByProperties(['title' => $title]));
if (!empty($node_lookup)) {
// Update instead of create.
$fighter->updatePlayerNode($node_lookup->id());
\Drupal::logger('ufc')->notice("$title updated successfully.");
}
else {
\Drupal::logger('ufc')->warning("No existing player found for $title...creating");
$fighter->createPlayerNode();
}
// Get JSON File from API.
$api_athletes = $this->cache->get('ufc_api_athletes');
if (!$api_athletes) {
echo "Go get api data!";
exit();
}
// This includes all fighters in data.
// Iterate over data to import all fighters.
}
/**
@@ -160,7 +97,7 @@ class FighterImporter {
*/
public function loopThroughFighterPages($base_url): void {
// Implement caching to store instead of needing fresh requests.
for ($i=0; $i<=100; $i++) {
for ($i=0; $i<=10; $i++) {
$ua = 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/17.4.1 Safari/605.1.15';
$headers = [
'referer' => true,
@@ -169,81 +106,23 @@ class FighterImporter {
'User-Agent' => $ua,
'Accept' => 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8',
'Accept-Encoding' => 'gzip, deflate, br',
'Cache-Control' => 'no-cache',
'Pragma' => 'no-cache',
'Referer' => 'https://www.ufc.com/athletes/all',
]
];
$url = $base_url . "&page=$i";
/* $cid = "ufc:" . $url; */
$request = $this->httpClient->request('GET', $url, $headers);
$content = $request->getBody()->getContents();
$url = $base_url . "&page=$i&_=" . microtime(true);
$response = $this->httpClient->request('GET', $url, $headers);
dump($url);
$content = $response->getBody()->getContents();
dump($response->getStatusCode());
$invalid_page = strpos($content, "No Result Found For");
if (!$invalid_page) {
\Drupal::logger('ufc')->notice("Extracting fighters from page $i.");
self::extractFighters($content);
continue;
}
break;
}
}
/**
* Extract fighters from an html string.
*/
public function extractFighters(string $input): void {
$fighter_list = [];
$crawler = new Crawler($input);
$athlete_flipcards = $crawler->filter('.c-listing-athlete-flipcard');
$fighter_names = $athlete_flipcards->each(function (Crawler $crawler, $i) {
return $crawler->filter('.c-listing-athlete__name')->text();
});
$fighter_profile_urls = $athlete_flipcards->each(function (Crawler $crawler, $i) {
return $crawler->filter('.e-button--black')->attr('href');
});
$fighter_images = $athlete_flipcards->each(function (Crawler $crawler, $i) {
$imgs = $crawler->filter('img')->each(function ($i) {
return $i->attr('src');
});
return $imgs;
});
$count_fighter_names = count($fighter_names);
$count_profile_urls = count($fighter_profile_urls);
$count_images = count($fighter_images);
// Make sure the arrays are all the same size.
assert((
($count_fighter_names == $count_profile_urls) &&
($count_profile_urls == $count_images)
));
foreach ($fighter_names as $key => $fighter) {
$fighter_names[$key] = [
'name' => $fighter,
'profile' => $fighter_profile_urls[$key],
'images' => $fighter_images[$key],
];
}
foreach ($fighter_names as $fighter_data) {
$name_no_spaces = str_replace(" ", "", $fighter_data['name']);
$weight_class_exists = array_key_exists($this->weightClass, $this->fighters);
$fighter_exists = NULL;
if ($weight_class_exists) {
$fighter_exists = array_key_exists($name_no_spaces, $this->fighters[$this->weightClass]);
}
if (!$fighter_exists) {
$split_name = explode(" ", $fighter_data['name'], 2);
$this->fighters[$this->weightClass][$name_no_spaces]['firstname'] = $split_name[0] ?? " ";
$this->fighters[$this->weightClass][$name_no_spaces]['lastname'] = $split_name[1] ?? " ";
$this->fighters[$this->weightClass][$name_no_spaces]['profile'] = $fighter_data['profile'];
}
if (!empty($fighter_data['images']) && count($fighter_data['images']) == 2) {
$this->fighters[$this->weightClass][$name_no_spaces]['image'] = $fighter_data['images'][0];
}
else {
$this->fighters[$this->weightClass][$name_no_spaces]['image'] = FALSE;
}
}
}
}