This commit is contained in:
calcu1on
2025-03-15 17:25:17 -04:00
parent 9dbfbeecef
commit d38d4aabc1
105 changed files with 2469 additions and 1484 deletions

View File

@@ -2,37 +2,166 @@
namespace Drupal\ufc\Commands;
use Drupal\ufc\Services\FighterImporter;
use Drupal\ufc\Services\FightImporter;
use Drupal\Core\Batch\BatchBuilder;
use Drupal\node\Entity\Node;
use Drush\Commands\DrushCommands;
use Drush\Attributes as CLI;
use Symfony\Component\DomCrawler\Crawler;
class UfcCommands extends DrushCommands {
protected $cacheId = 'ufc:fighter-list';
/**
* Import fighters from UFC.com.
*/
#[CLI\Command(name: 'ufc:import-fighters', aliases: ['impft'])]
public function importFighters(): void {
// Fighter importer service.
$fighter_importer = \Drupal::service('ufc.import_fighters');
// First check for the item in cache.
$fighter_list = \Drupal::cache()->get($this->cacheId);
if (!$fighter_list) {
$fighter_list = $fighter_importer->getListOfCurrentFighters();
\Drupal::cache()->set($this->cacheId, $fighter_list, time() + 86400);
}
else {
$fighter_list = $fighter_list->data;
}
// Add each division to a batch process.
$batch = new BatchBuilder();
$batch->setTitle("Starting Complete Fighter Import")
->setFinishCallback([UfcCommands::class, 'importFinished'])
->setInitMessage("Importing...");
foreach ($fighter_list as $division => $fighters) {
$args = [
$division,
$fighters,
];
$batch->addOperation([FighterImporter::class, 'processDivision'], $args);
}
// Set and run the batch.
batch_set($batch->toArray());
drush_backend_batch_process();
}
/**
* Import events based on ESPN.com.
*/
#[CLI\Command(name: 'ufc:import-events', aliases: ['impev'])]
public function importEvents(): void {
$fight_importer = \Drupal::service("ufc.import_fights");
$event_list = [];
$events_base= "https://www.espn.com/mma/schedule/_/year/";
$event_base = "https://www.espn.com";
// Old fashioned for loop to target years.
for ($i = 2000; $i <= 2025; $i++) {
$year_event_url = $events_base . "{$i}/league/ufc";
$event_listing = \Drupal::httpClient()
->get($year_event_url)->getBody()->getContents();
$crawler = new Crawler($event_listing);
$events = $crawler->filter('.Schedule__EventLeague--ufc tbody tr');
foreach ($events as $event) {
$event_list[$i][] = $event->ownerDocument->saveHTML($event);
}
}
// Add each division to a batch process.
$batch = new BatchBuilder();
$batch->setTitle("Starting Event Import")
->setFinishCallback([UfcCommands::class, 'eventImportFinished'])
->setInitMessage("Importing events...");
foreach ($event_list as $year => $events) {
foreach ($events as $event) {
$args = [
$event,
$year,
];
$batch->addOperation([FightImporter::class, 'processEvent'], $args);
}
}
// Set and run the batch.
batch_set($batch->toArray());
drush_backend_batch_process();
}
/**
* Update fights with their event date.
*
* @command ufc:update-fight-dates
* @aliases ufc-ufd
*/
public function updateFightDates() {
#[CLI\Command(name: 'ufc:update-fight-dates', aliases: ['ufd'])]
public function updateFightDates(): void {
// Get all fights
// For each fight
// Go get event date.
// Add to new field
// Save.
$fights = \Drupal::entityTypeManager()->getStorage('node')->loadByProperties(['type' => 'fight']);
// Add each division to a batch process.
$batch = new BatchBuilder();
$batch->setTitle("Starting Fight Update")
->setFinishCallback([UfcCommands::class, 'eventImportFinished'])
->setInitMessage("Importing events...");
foreach ($fights as $fight) {
$fight->field_fight_date = $this->getEventDateTimestampFromFight($fight);
if ($fight->save()) {
$this->output->writeln($fight->title->value . " updated successfully.");
}
else {
$this->output->writeln($fight->title->value . " failed.");
}
$args = [
$fight,
];
$batch->addOperation([UfcCommands::class, 'updateFight'], $args);
}
// Set and run the batch.
batch_set($batch->toArray());
drush_backend_batch_process();
}
public static function updateFight($fight, $context) {
$fight->field_fight_date = [UfcCommands::class, 'getEventDateTimestampFromFight', $fight];
if ($fight->save()) {
\Drupal::messenger()->addMessage($fight->title->value . " updated successfully.");
}
else {
\Drupal::messenger()->addMessage($fight->title->value . " failed.");
}
}
/**
* Create Fights.
*/
#[CLI\Command(name: 'ufc:create-fights', aliases: ['cf'])]
public function createUfcFights(): void {
$existing_fights = \Drupal::entityTypeManager()->getStorage('node')->loadByProperties(['type' => 'fight']);
foreach ($existing_fights as $existing_fight) {
$delete = $existing_fight->delete();
if ($delete) {
\Drupal::logger('ufc')->notice("Removed " . $existing_fight->getTitle());
}
}
// Get all events - add each to queue, fights created per event.
$events = \Drupal::entityTypeManager()->getStorage('taxonomy_term')->loadByProperties(['vid' => 'ufc_events']);
// Add each division to a batch process.
$batch = new BatchBuilder();
$batch->setTitle("Creating fights in batches....")
->setFinishCallback([UfcCommands::class, 'importFinished'])
->setInitMessage("Processing");
foreach ($events as $event) {
if ($event->field_event_url->uri == '/mma/fightcenter/_/id/600037492/league/ufc') {
continue;
}
$args = [
$event,
];
$batch->addOperation([FightImporter::class, 'createFightsByEvent'], $args);
}
// Set and run the batch.
batch_set($batch->toArray());
drush_backend_batch_process();
}
private function getEventDateTimestampFromFight(Node $fight_node) {
/**
* Gets the event date as a timestamp.
*/
private function getEventDateTimestampFromFight(Node $fight_node): int|bool {
// Get field value.
$event_date_value = $fight_node->field_event->entity->field_event_date->value;
assert($event_date_value, !null);
@@ -40,4 +169,40 @@ class UfcCommands extends DrushCommands {
return strtotime($event_date_value);
}
/**
* Handle batch completion.
*
* @param bool $success
* TRUE if all batch API tasks were completed successfully.
* @param array $results
* An results array from the batch processing operations.
* @param array $operations
* A list of the operations that had not been completed.
* @param string $elapsed
* Batch.inc kindly provides the elapsed processing time in seconds.
*/
public static function importFinished(bool $success, array $results, array $operations, string $elapsed): void {
// Finish the batch here.
$message = "Processed " . $results['processed'] . " divisions.";
\Drupal::messenger()->addMessage($message);
}
/**
* Handle batch completion.
*
* @param bool $success
* TRUE if all batch API tasks were completed successfully.
* @param array $results
* An results array from the batch processing operations.
* @param array $operations
* A list of the operations that had not been completed.
* @param string $elapsed
* Batch.inc kindly provides the elapsed processing time in seconds.
*/
public static function eventImportFinished(bool $success, array $results, array $operations, string $elapsed): void {
// Finish the batch here.
$message = "Processed " . $results['processed'] . " events.";
\Drupal::messenger()->addMessage($message);
}
}

View File

@@ -7,6 +7,7 @@ use Drupal\Core\Entity\EntityTypeManager;
use Drupal\taxonomy\Entity\Term;
use Drupal\node\Entity\Node;
use GuzzleHttp\Client;
use GuzzleHttp\Exception\RequestException;
use Symfony\Component\DomCrawler\Crawler;
class FightImporter {
@@ -56,9 +57,9 @@ class FightImporter {
*/
public function importEvents(): void {
// First delete all events :-).
$this->removeExistingEvents();
/* $this->removeExistingEvents(); */
// Old fashioned for loop to target years.
for ($i = 2000; $i <= 2024; $i++) {
for ($i = 2011; $i <= 2025; $i++) {
$year_event_url = self::EVENTS_BASE . "{$i}/league/ufc";
$event_listing = $this->httpClient
->get($year_event_url)->getBody()->getContents();
@@ -83,7 +84,10 @@ class FightImporter {
}
}
private function processEvent(\DOMElement $event, string $year): array {
/**
* Process an event into the system.
*/
public static function processEvent(\DOMElement $event, string $year): array {
if ($event->childElementCount !== 4) {
return [];
}
@@ -92,7 +96,8 @@ class FightImporter {
];
for ($i = 0; $i < 4; $i++) {
if ($i === 0) {
$event_date = $this->convertDate($event->childNodes[0]->textContent . " $year");
$date_str = $event->childNodes[0]->textContent . " $year";
$event_date = \Drupal::service('date.formatter')->format(strtotime($date_str), 'custom', 'Y-m-d');
$term_build['field_event_date'] = $event_date;
}
if ($i === 1) {
@@ -156,32 +161,25 @@ class FightImporter {
/**
* Create fights from events.
*/
public function createFights(): void {
// Clear out past fights.
$existing_fights = $this->entityTypeManager->getStorage('node')->loadByProperties(['type' => 'fight']);
foreach ($existing_fights as $existing_fight) {
$delete = $existing_fight->delete();
if ($delete) {
\Drupal::logger('ufc')->notice("Removed " . $existing_fight->getTitle());
}
}
// Go get all events.
$all_events = $this->entityTypeManager->getStorage('taxonomy_term')
->loadByProperties(['vid' => 'ufc_events']);
foreach ($all_events as $event) {
$event_page_html = $this->httpClient
public static function createFightsByEvent($event, $context): void {
try {
$event_page_html = \Drupal::httpClient()
->get(self::EVENT_BASE . $event->field_event_url->uri)
->getBody()->getContents();
$crawler = new Crawler($event_page_html);
$fight_result_rows = $crawler->filter(".MMAGamestrip");
foreach ($fight_result_rows as $fight_result_row) {
$result = $this->processFightResultRow($fight_result_row);
$result = \Drupal::service('ufc.import_fights')->processFightResultRow($fight_result_row);
if (empty($result)) {
continue;
}
$result['event'] = $event->id();
$this->createFightNodeFromResult($result);
\Drupal::service('ufc.import_fights')->createFightNodeFromResult($result);
}
$context['results']['processed']++;
} catch (RequestException $e) {
$context['results']['failed']++;
dump($e->getMessage());
}
}
@@ -236,7 +234,7 @@ class FightImporter {
/**
* Iterate over fight result rows to extract results.
*/
private function processFightResultRow(\DOMElement $row): array {
public function processFightResultRow(\DOMElement $row): array {
$results = [
'winner' => 0,
];

View File

@@ -98,9 +98,9 @@ class FighterImporter {
else {
$fighters_by_div = self::getListOfCurrentFighters();
// Process each fighter into system.
foreach ($fighters_by_div as $division => $fighters) {
$this->processDivision($division, $fighters);
}
/* foreach ($fighters_by_div as $division => $fighters) { */
/* $this->processDivision($division, $fighters); */
/* } */
}
}
/**
@@ -109,9 +109,11 @@ class FighterImporter {
* @param mixed $div
* @param mixed $fighters
*/
private function processDivision($div, $fighters): void {
public static function processDivision($div, $fighters, &$context): void {
\Drupal::logger('ufc')->notice("Starting to update $div");
foreach ($fighters as $fighter_data) {
$fighter = new Fighter($this->httpClient);
$fighter = new Fighter(\Drupal::httpClient());
$fighter->first_name = $fighter_data['firstname'];
$fighter->last_name = $fighter_data['lastname'];
$fighter->image = $fighter_data['image'];
@@ -122,7 +124,7 @@ class FighterImporter {
// Check if node exists, by title.
$fighter->createMediaEntityFromImage();
$title = $fighter->first_name . " " . $fighter->last_name;
$node_lookup = reset($this->entityTypeManager->getStorage('node')->loadByProperties(['title' => $title]));
$node_lookup = reset(\Drupal::entityTypeManager()->getStorage('node')->loadByProperties(['title' => $title]));
if (!empty($node_lookup)) {
// Update instead of create.
@@ -134,6 +136,7 @@ class FighterImporter {
$fighter->createPlayerNode();
}
}
$context['results']['processed']++;
}
/**
@@ -157,7 +160,6 @@ class FighterImporter {
* @param string $base_url
*/
public function loopThroughFighterPages($base_url): void {
// Here you are Dan.
// Implement caching to store instead of needing fresh requests.
for ($i=0; $i<=100; $i++) {
$ua = 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/17.4.1 Safari/605.1.15';
@@ -165,7 +167,7 @@ class FighterImporter {
'referer' => true,
'verify' => false,
'headers' => [
'User-Agent' => 'DC SCRAPER/v1.0',
'User-Agent' => $ua,
'Accept' => 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8',
'Accept-Encoding' => 'gzip, deflate, br',
]

View File

@@ -20,10 +20,12 @@ class WebScraperTest extends UnitTestCase {
}
/**
* @dataProvider extractFighterNamesDataProvider
*
*/
public function testExtractFighterNames($test_input) {
$this->assertEquals("1", 2, "These do not match.");
public function testGetCurrentFighters() {
$fighters = $this->fighterImporter->getListOfCurrentFighters();
dump($fighters);
/* $this->assertEquals("1", 2, "These do not match."); */
}
}