From 034dc92721cba02129f22e4415579951591cd2e1 Mon Sep 17 00:00:00 2001 From: dan612 Date: Sun, 7 Dec 2025 12:54:42 -0500 Subject: [PATCH] More updates to import from ufc.com json api. --- .../custom/ufc/src/Commands/UfcCommands.php | 30 +++---- .../Plugin/QueueWorker/FighterQueueWorker.php | 38 ++++---- .../ufc/src/Services/FighterImporter.php | 88 ++++++++++++++----- web/sites/development.services.yml | 32 +++---- 4 files changed, 108 insertions(+), 80 deletions(-) diff --git a/web/modules/custom/ufc/src/Commands/UfcCommands.php b/web/modules/custom/ufc/src/Commands/UfcCommands.php index f196d88..dd8ac75 100644 --- a/web/modules/custom/ufc/src/Commands/UfcCommands.php +++ b/web/modules/custom/ufc/src/Commands/UfcCommands.php @@ -5,6 +5,7 @@ use Drupal\ufc\Services\FighterImporter; use Drupal\ufc\Services\FightImporter; use Drupal\Core\Batch\BatchBuilder; +use Drupal\Core\Cache\CacheBackendInterface; use Drupal\node\Entity\Node; use Drush\Commands\DrushCommands; use Drush\Attributes as CLI; @@ -12,10 +13,13 @@ class UfcCommands extends DrushCommands { + /** + * The UFC fighter list cache ID. + */ protected $cacheId = 'ufc:fighter-list'; /** - * Import fighters from UFC.com. + * Import fighters from UFC.com JSON:API. */ #[CLI\Command(name: 'ufc:import-fighters', aliases: ['impft'])] public function importFighters(): void { @@ -24,30 +28,20 @@ public function importFighters(): void { // First check for the item in cache. $fighter_list = \Drupal::cache()->get($this->cacheId); if (!$fighter_list) { - /* $fighter_list = $fighter_importer->getListOfCurrentFighters(); */ - $api_list = \Drupal::httpClient()->get('https://www.ufc.com/jsonapi/node/athlete')->getBody()->getContents(); - $fighter_list = json_decode($api_list)->data; - dump($fighter_list); - exit(); - \Drupal::cache()->set($this->cacheId, $fighter_list, time() + 300); + dump("Getting fighters from UFC.com JSON:API"); + $fighter_list = $fighter_importer->getListOfCurrentFighters(); + \Drupal::cache()->set($this->cacheId, $fighter_list, CacheBackendInterface::CACHE_PERMANENT); } else { $fighter_list = $fighter_list->data; } $fighter_import_queue = \Drupal::queue('fighter_import'); - foreach ($fighter_list as $division => $fighters) { - foreach ($fighters as $fighter) { - $queue_item = new \stdClass(); - $queue_item->first_name = $fighter['firstname']; - $queue_item->last_name = $fighter['lastname']; - $queue_item->image = $fighter['image']; - $queue_item->division = $division; - $queue_item->profile = $fighter['profile']; - $fighter_import_queue->createItem($queue_item); - } + foreach ($fighter_list as $fighter) { + $queue_item = new \stdClass(); + $queue_item->id = $fighter['id']; + $fighter_import_queue->createItem($queue_item); } - } /** diff --git a/web/modules/custom/ufc/src/Plugin/QueueWorker/FighterQueueWorker.php b/web/modules/custom/ufc/src/Plugin/QueueWorker/FighterQueueWorker.php index f4b5f49..3a0d87d 100644 --- a/web/modules/custom/ufc/src/Plugin/QueueWorker/FighterQueueWorker.php +++ b/web/modules/custom/ufc/src/Plugin/QueueWorker/FighterQueueWorker.php @@ -95,28 +95,22 @@ public function processItem($data): void { * The item to process. */ private function doProcess($item): bool { - $fighter = new Fighter(\Drupal::httpClient()); - $fighter->first_name = $item->first_name; - $fighter->last_name = $item->last_name; - $fighter->image = $item->image; - $fighter->class = $item->division; - if (!$fighter->scrapeDataFromFighterPage($item->profile)) { - \Drupal::logger('ufc')->alert("FAILED: $fighter->first_name $fighter->last_name to " . $item->profile); - } - // Check if node exists, by title. - $fighter->createMediaEntityFromImage(); - $title = $fighter->first_name . " " . $fighter->last_name; - $node_lookup = reset(\Drupal::entityTypeManager()->getStorage('node')->loadByProperties(['title' => $title])); - - if (!empty($node_lookup)) { - // Update instead of create. - $fighter->updatePlayerNode($node_lookup->id()); - \Drupal::logger('ufc')->notice("$title updated successfully."); - } - else { - \Drupal::logger('ufc')->warning("No existing player found for $title...creating"); - $fighter->createPlayerNode(); - } + // @todo: + // Item->id will be the ID of the fighter. + // Need to fetch fighter list from cache. + // Then get the corred fighter based on the id. + // Then update or create the fighter node, including any media. + // these values will need to be retrieves from the existing data. + /* $node_lookup = reset(\Drupal::entityTypeManager()->getStorage('node')->loadByProperties(['title' => $title])); */ + /* if (!empty($node_lookup)) { */ + /* // Update instead of create. */ + /* $fighter->updatePlayerNode($node_lookup->id()); */ + /* \Drupal::logger('ufc')->notice("$title updated successfully."); */ + /* } */ + /* else { */ + /* \Drupal::logger('ufc')->warning("No existing player found for $title...creating"); */ + /* $fighter->createPlayerNode(); */ + /* } */ return TRUE; } diff --git a/web/modules/custom/ufc/src/Services/FighterImporter.php b/web/modules/custom/ufc/src/Services/FighterImporter.php index 95bc6cf..ad385e7 100644 --- a/web/modules/custom/ufc/src/Services/FighterImporter.php +++ b/web/modules/custom/ufc/src/Services/FighterImporter.php @@ -24,22 +24,18 @@ class FighterImporter { */ protected $entityTypeManager; + /** + * The UFC cache bin. + */ + protected $cache; + /** * Array of all fighters. * @var array[] */ public $fighters = []; - /** - * The current weight class. - * @var string - */ - protected $weightClass; - - /** - * The UFC cache bin. - */ - protected $cache; + public $cookieJar; /** * The base url for fighter lists. @@ -64,6 +60,64 @@ public function __construct( $this->cache = $cache; } + /** + * Get list of current fighters. + * + * @return array $fighters + */ + public function getListOfCurrentFighters(): array { + $this->addFightersFromUrl(self::UFC_BASE); + return $this->fighters; + } + + /** + * Add fighters from a URL. + * + * @param string $url + * The URL to scrape. + */ + public function addFightersFromUrl(string $url) { + sleep(10); + dump("Requesting: " . $url); + $proxied_url = 'http://192.168.0.13:8888/?url=' . urlencode($url); + $ua = 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/17.4.1 Safari/605.1.15'; + $headers = [ + 'referer' => true, + 'verify' => false, + 'headers' => [ + 'User-Agent' => $ua, + 'Accept' => 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8', + 'Accept-Encoding' => 'gzip, deflate, br', + 'Cache-Control' => 'no-cache', + 'Pragma' => 'no-cache', + 'Referer' => 'https://www.ufc.com/athletes/all', + ] + ]; + $response = $this->httpClient->request('GET', $proxied_url, $headers); + $content = $response->getBody()->getContents(); + $content_arr = json_decode($content, TRUE); + + if (!$content_arr) { + exit("something went wrong"); + } + + $athletes = $content_arr['data']; + $count = count($athletes); + dump("Found " . $count . " fighters."); + foreach ($athletes as $athlete) { + $this->fighters[] = $athlete; + } + $next_page = $content_arr['links']['next']['href'] ?? FALSE; + if ($next_page) { + $this->addFightersFromUrl($next_page); + } + } + + /** + * There is a pager, loop through to get all fighters. + * + * @param string $base_url + */ public function importFighters(): void { // Get JSON File from API. $api_athletes = $this->cache->get('ufc_api_athletes'); @@ -75,20 +129,6 @@ public function importFighters(): void { // Iterate over data to import all fighters. } - /** - * Get list of current fighters. - * - * @return array $fighters - */ - public function getListOfCurrentFighters(): array { - foreach ($this->divisions as $division => $div_base_url) { - $division_url = self::UFC_BASE . $div_base_url; - $this->weightClass = $division; - echo "Starting import for " . $division . "\n"; - self::loopThroughFighterPages($division_url); - } - return $this->fighters; - } /** * There is a pager, loop through to get all fighters. diff --git a/web/sites/development.services.yml b/web/sites/development.services.yml index 4208c58..f35a52e 100644 --- a/web/sites/development.services.yml +++ b/web/sites/development.services.yml @@ -12,19 +12,19 @@ # This means that if you want to override any value of a parameter, the # whole parameter array needs to be copied from # sites/default/default.services.yml or from core/core.services.yml file. -# parameters: -# http.response.debug_cacheability_headers: true -# services: -# cache.backend.null: -# class: Drupal\Core\Cache\NullBackendFactory -# logger.channel.config_schema: -# parent: logger.channel_base -# arguments: [ 'config_schema' ] -# config.schema_checker: -# class: Drupal\Core\Config\Development\LenientConfigSchemaChecker -# arguments: -# - '@config.typed' -# - '@messenger' -# - '@logger.channel.config_schema' -# tags: -# - { name: event_subscriber } +parameters: + http.response.debug_cacheability_headers: true +services: + cache.backend.null: + class: Drupal\Core\Cache\NullBackendFactory + logger.channel.config_schema: + parent: logger.channel_base + arguments: [ 'config_schema' ] + config.schema_checker: + class: Drupal\Core\Config\Development\LenientConfigSchemaChecker + arguments: + - '@config.typed' + - '@messenger' + - '@logger.channel.config_schema' + tags: + - { name: event_subscriber }