2024-04-09 01:47:04 +00:00
|
|
|
<?php
|
|
|
|
|
|
|
|
|
|
namespace Drupal\ufc\Services;
|
|
|
|
|
|
|
|
|
|
use Drupal\Core\Entity\EntityTypeManager;
|
|
|
|
|
use Drupal\Core\Cache\CacheBackendInterface;
|
|
|
|
|
use Drupal\Core\Cache\Cache;
|
|
|
|
|
use Drupal\media\Media;
|
|
|
|
|
use Drupal\ufc\Fighter;
|
|
|
|
|
use GuzzleHttp\Client;
|
|
|
|
|
use Symfony\Component\DomCrawler\Crawler;
|
|
|
|
|
|
|
|
|
|
class FighterImporter {
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* Guzzle http client service.
|
|
|
|
|
* @var \GuzzleHttp\Client
|
|
|
|
|
*/
|
|
|
|
|
protected $httpClient;
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* Entity type manager service.
|
|
|
|
|
* @var \Drupal\Core\Entity\EntityTypeManager
|
|
|
|
|
*/
|
|
|
|
|
protected $entityTypeManager;
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* Array of all fighters.
|
|
|
|
|
* @var array[]
|
|
|
|
|
*/
|
2024-04-20 15:13:53 -07:00
|
|
|
public $fighters = [];
|
2024-04-09 01:47:04 +00:00
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* The current weight class.
|
|
|
|
|
* @var string
|
|
|
|
|
*/
|
|
|
|
|
protected $weightClass;
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* The UFC cache bin.
|
|
|
|
|
*/
|
|
|
|
|
protected $cache;
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* The base url for fighter lists.
|
|
|
|
|
* @var string
|
|
|
|
|
*/
|
|
|
|
|
const UFC_BASE = "https://www.ufc.com/athletes/all?filters%5B0%5D=weight_class%3";
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* All applicable divisions and their keys on UFC.com.
|
|
|
|
|
* @var array[]
|
|
|
|
|
*/
|
|
|
|
|
protected $divisions = [
|
|
|
|
|
'heavyweight' => "A11",
|
|
|
|
|
'light_heavyweight' => "A13",
|
|
|
|
|
'middleweight' => "A14",
|
|
|
|
|
'welterweight' => "A15",
|
|
|
|
|
'lightweight' => "A12",
|
|
|
|
|
'featherweight' => "A9",
|
|
|
|
|
'bantamweight' => "A8",
|
|
|
|
|
'flyweight' => "A10",
|
|
|
|
|
];
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* Public constructor for Fighter Importer.
|
|
|
|
|
*
|
|
|
|
|
* @param Client $httpClient
|
|
|
|
|
* @param EntityTypeManager $entityTypeManager
|
|
|
|
|
* @param Cache $cache
|
|
|
|
|
*/
|
|
|
|
|
public function __construct(
|
|
|
|
|
Client $httpClient,
|
|
|
|
|
EntityTypeManager $entityTypeManager,
|
|
|
|
|
CacheBackendInterface $cache
|
|
|
|
|
) {
|
|
|
|
|
$this->httpClient = $httpClient;
|
|
|
|
|
$this->entityTypeManager = $entityTypeManager;
|
|
|
|
|
$this->cache = $cache;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
public function importFighters(): void {
|
|
|
|
|
$test_run = FALSE;
|
|
|
|
|
if ($test_run) {
|
|
|
|
|
// Overriding to test!!!
|
|
|
|
|
$fighter_name_text_on_ufc = "
|
|
|
|
|
Georges St-Pierre
|
|
|
|
|
";
|
|
|
|
|
$fighter = new Fighter($this->httpClient);
|
|
|
|
|
$fighter->first_name = 'scott';
|
|
|
|
|
$fighter->last_name = 'adams';
|
|
|
|
|
$fighter->scrapeDataFromFighterPage();
|
|
|
|
|
$fighter_clone = clone ($fighter);
|
|
|
|
|
unset($fighter_clone->fighter_page);
|
|
|
|
|
unset($fighter_clone->crawler);
|
|
|
|
|
dump($fighter_clone);
|
|
|
|
|
}
|
|
|
|
|
else {
|
|
|
|
|
$fighters_by_div = self::getListOfCurrentFighters();
|
|
|
|
|
// Process each fighter into system.
|
2025-03-23 08:44:18 -04:00
|
|
|
foreach ($fighters_by_div as $division => $fighters) {
|
|
|
|
|
$this->processDivision($division, $fighters);
|
|
|
|
|
}
|
2024-04-09 01:47:04 +00:00
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
/**
|
|
|
|
|
* Process a division.
|
|
|
|
|
*
|
|
|
|
|
* @param mixed $div
|
|
|
|
|
* @param mixed $fighters
|
|
|
|
|
*/
|
2025-03-23 08:44:18 -04:00
|
|
|
public static function processDivision($div, $fighters): void {
|
2025-03-15 17:25:17 -04:00
|
|
|
\Drupal::logger('ufc')->notice("Starting to update $div");
|
|
|
|
|
|
2024-04-09 01:47:04 +00:00
|
|
|
foreach ($fighters as $fighter_data) {
|
2025-03-15 17:25:17 -04:00
|
|
|
$fighter = new Fighter(\Drupal::httpClient());
|
2024-04-09 01:47:04 +00:00
|
|
|
$fighter->first_name = $fighter_data['firstname'];
|
|
|
|
|
$fighter->last_name = $fighter_data['lastname'];
|
|
|
|
|
$fighter->image = $fighter_data['image'];
|
|
|
|
|
$fighter->class = $div;
|
|
|
|
|
if (!$fighter->scrapeDataFromFighterPage($fighter_data['profile'])) {
|
|
|
|
|
\Drupal::logger('ufc')->alert("FAILED: $fighter->first_name $fighter->last_name to " . $fighter_data['profile']);
|
|
|
|
|
}
|
|
|
|
|
// Check if node exists, by title.
|
|
|
|
|
$fighter->createMediaEntityFromImage();
|
|
|
|
|
$title = $fighter->first_name . " " . $fighter->last_name;
|
2025-03-15 17:25:17 -04:00
|
|
|
$node_lookup = reset(\Drupal::entityTypeManager()->getStorage('node')->loadByProperties(['title' => $title]));
|
2024-04-09 01:47:04 +00:00
|
|
|
|
|
|
|
|
if (!empty($node_lookup)) {
|
|
|
|
|
// Update instead of create.
|
|
|
|
|
$fighter->updatePlayerNode($node_lookup->id());
|
|
|
|
|
\Drupal::logger('ufc')->notice("$title updated successfully.");
|
|
|
|
|
}
|
|
|
|
|
else {
|
|
|
|
|
\Drupal::logger('ufc')->warning("No existing player found for $title...creating");
|
|
|
|
|
$fighter->createPlayerNode();
|
|
|
|
|
}
|
|
|
|
|
}
|
2025-03-15 17:25:17 -04:00
|
|
|
$context['results']['processed']++;
|
2024-04-09 01:47:04 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* Get list of current fighters.
|
|
|
|
|
*
|
|
|
|
|
* @return array $fighters
|
|
|
|
|
*/
|
|
|
|
|
public function getListOfCurrentFighters(): array {
|
|
|
|
|
foreach ($this->divisions as $division => $div_base_url) {
|
|
|
|
|
$division_url = self::UFC_BASE . $div_base_url;
|
|
|
|
|
$this->weightClass = $division;
|
|
|
|
|
echo "Starting import for " . $division . "\n";
|
|
|
|
|
self::loopThroughFighterPages($division_url);
|
|
|
|
|
}
|
|
|
|
|
return $this->fighters;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* There is a pager, loop through to get all fighters.
|
|
|
|
|
*
|
|
|
|
|
* @param string $base_url
|
|
|
|
|
*/
|
|
|
|
|
public function loopThroughFighterPages($base_url): void {
|
|
|
|
|
// Implement caching to store instead of needing fresh requests.
|
|
|
|
|
for ($i=0; $i<=100; $i++) {
|
2024-04-20 15:13:53 -07:00
|
|
|
$ua = 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/17.4.1 Safari/605.1.15';
|
|
|
|
|
$headers = [
|
|
|
|
|
'referer' => true,
|
|
|
|
|
'verify' => false,
|
|
|
|
|
'headers' => [
|
2025-03-15 17:25:17 -04:00
|
|
|
'User-Agent' => $ua,
|
2024-04-20 15:13:53 -07:00
|
|
|
'Accept' => 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8',
|
|
|
|
|
'Accept-Encoding' => 'gzip, deflate, br',
|
|
|
|
|
]
|
|
|
|
|
];
|
2024-04-09 01:47:04 +00:00
|
|
|
$url = $base_url . "&page=$i";
|
2024-04-20 15:13:53 -07:00
|
|
|
/* $cid = "ufc:" . $url; */
|
|
|
|
|
$request = $this->httpClient->request('GET', $url, $headers);
|
2024-04-09 01:47:04 +00:00
|
|
|
$content = $request->getBody()->getContents();
|
2024-09-21 10:53:38 -04:00
|
|
|
$invalid_page = strpos($content, "No Result Found For");
|
|
|
|
|
if (!$invalid_page) {
|
2024-04-09 01:47:04 +00:00
|
|
|
\Drupal::logger('ufc')->notice("Extracting fighters from page $i.");
|
|
|
|
|
self::extractFighters($content);
|
|
|
|
|
continue;
|
|
|
|
|
}
|
|
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2024-04-20 15:13:53 -07:00
|
|
|
public function quickTest($str) {
|
|
|
|
|
return "hello$str";
|
|
|
|
|
}
|
|
|
|
|
|
2024-04-09 01:47:04 +00:00
|
|
|
/**
|
|
|
|
|
* Extract fighters from an html string.
|
|
|
|
|
*/
|
|
|
|
|
public function extractFighters(string $input): void {
|
|
|
|
|
$fighter_list = [];
|
|
|
|
|
$crawler = new Crawler($input);
|
|
|
|
|
$athlete_flipcards = $crawler->filter('.c-listing-athlete-flipcard');
|
|
|
|
|
$fighter_names = $athlete_flipcards->each(function (Crawler $crawler, $i) {
|
|
|
|
|
return $crawler->filter('.c-listing-athlete__name')->text();
|
|
|
|
|
});
|
|
|
|
|
$fighter_profile_urls = $athlete_flipcards->each(function (Crawler $crawler, $i) {
|
|
|
|
|
return $crawler->filter('.e-button--black')->attr('href');
|
|
|
|
|
});
|
|
|
|
|
|
|
|
|
|
$fighter_images = $athlete_flipcards->each(function (Crawler $crawler, $i) {
|
|
|
|
|
$imgs = $crawler->filter('img')->each(function ($i) {
|
|
|
|
|
return $i->attr('src');
|
|
|
|
|
});
|
|
|
|
|
return $imgs;
|
|
|
|
|
});
|
|
|
|
|
|
|
|
|
|
$count_fighter_names = count($fighter_names);
|
|
|
|
|
$count_profile_urls = count($fighter_profile_urls);
|
|
|
|
|
$count_images = count($fighter_images);
|
|
|
|
|
// Make sure the arrays are all the same size.
|
|
|
|
|
assert((
|
|
|
|
|
($count_fighter_names == $count_profile_urls) &&
|
|
|
|
|
($count_profile_urls == $count_images)
|
|
|
|
|
));
|
|
|
|
|
|
|
|
|
|
foreach ($fighter_names as $key => $fighter) {
|
|
|
|
|
$fighter_names[$key] = [
|
|
|
|
|
'name' => $fighter,
|
|
|
|
|
'profile' => $fighter_profile_urls[$key],
|
|
|
|
|
'images' => $fighter_images[$key],
|
|
|
|
|
];
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
foreach ($fighter_names as $fighter_data) {
|
|
|
|
|
$name_no_spaces = str_replace(" ", "", $fighter_data['name']);
|
|
|
|
|
$weight_class_exists = array_key_exists($this->weightClass, $this->fighters);
|
|
|
|
|
$fighter_exists = NULL;
|
|
|
|
|
if ($weight_class_exists) {
|
|
|
|
|
$fighter_exists = array_key_exists($name_no_spaces, $this->fighters[$this->weightClass]);
|
|
|
|
|
}
|
|
|
|
|
if (!$fighter_exists) {
|
|
|
|
|
$split_name = explode(" ", $fighter_data['name'], 2);
|
|
|
|
|
$this->fighters[$this->weightClass][$name_no_spaces]['firstname'] = $split_name[0] ?? " ";
|
|
|
|
|
$this->fighters[$this->weightClass][$name_no_spaces]['lastname'] = $split_name[1] ?? " ";
|
|
|
|
|
$this->fighters[$this->weightClass][$name_no_spaces]['profile'] = $fighter_data['profile'];
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// here you are dan, set the profile url of the fighter, then use that in parsing later on
|
|
|
|
|
|
|
|
|
|
if (!empty($fighter_data['images']) && count($fighter_data['images']) == 2) {
|
|
|
|
|
$this->fighters[$this->weightClass][$name_no_spaces]['image'] = $fighter_data['images'][0];
|
|
|
|
|
}
|
|
|
|
|
else {
|
|
|
|
|
$this->fighters[$this->weightClass][$name_no_spaces]['image'] = FALSE;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
}
|