http_client = $httpClient; } /** * Parent method to scrape all data from fighter profile. * * @return void */ public function scrapeDataFromFighterPage(string $profile_uri): bool { $url = "https://www.ufc.com$profile_uri"; $this->setFighterPage($url); if (!$this->checkValidFighter()) { return FALSE; } $this->setAge(); $this->scrapeBio(); $this->setFighterRecord(); $this->setAccuracy(); $this->setAverages(); $this->setWinsBreakdown(); $this->setStrikesByPosition(); return TRUE; } /** * Get contents of the fighter page. * * @return void */ public function setFighterPage(string $url): void { try { $request = $this->http_client->request('GET', $url, ['verify' => FALSE]); $this->fighter_page = $request->getBody()->getContents(); $this->crawler = new Crawler($this->fighter_page); } catch (\Exception $e) { echo 'Caught exception: ', $e->getMessage(), "\n"; // exit(); } } /** * Checks is the fighters has stats to pull before proceeding. */ public function checkValidFighter(): bool { $athlete_stats = $this->crawler->filter('h2.stats-records__title')->count(); if ($athlete_stats == 0) { return FALSE; } return TRUE; } /** * Get fighter age. * * @return bool */ public function setAge(): void { $age = $this->crawler->filter('.field--name-age')->innerText(); if (strlen($age) < 1) { $this->age = 0; return; } else { assert(ctype_digit($age)); $this->age = (int) $age; } } /** * Gets data from the bio section of a fighter page. */ public function scrapeBio(): void { $three_cols = $this->crawler->filter('.c-bio__row--3col'); $data = []; if (!$three_cols) { return; } foreach ($three_cols as $three_col) { // Extract data from nodevalue. $cleaned = str_replace([" ", "\n"], "", $three_col->nodeValue); $pattern = '/[0-9]+\.[0-9]+/s'; preg_match_all($pattern, $cleaned, $matches); $data[] = $matches; } $height_weight = $data[0][0] ?? FALSE; $arm_leg_reach = $data[1][0] ?? FALSE; if ($height_weight && count($height_weight) == 2) { $this->height = (float) $height_weight[0]; $this->weight = (float) $height_weight[1]; } if ($arm_leg_reach && count($arm_leg_reach) == 2) { $this->reach = (float) $arm_leg_reach[0]; $this->leg_reach = (float) $arm_leg_reach[1]; } } /** * Extracts the fighter record. * * @return void */ public function setFighterRecord(): void { $record = $this->crawler->filter(".hero-profile__division-body")->innerText(); // 20-3-0 (W-L-D) $parts = explode(" ", $record); $record_details = $parts[0] ?? FALSE; if (!$record_details) { return; } // 20-3-0 $record_chunks = explode("-", $record_details); $this->wins = (int) $record_chunks[0]; $this->losses = (int) $record_chunks[1]; $this->ties = (int) $record_chunks[2]; } /** * Extracts the striking accuracy. * * @return void */ public function setAccuracy(): void { $circles = $this->crawler->filter('.e-chart-circle'); foreach ($circles as $circle) { foreach ($circle->childNodes as $child) { $text = strtolower($child->textContent); if (str_contains($text, "accuracy")) { $cir_title = $text; } } if (!isset($cir_title)) { continue; } if (str_contains($cir_title, "striking")) { $this->striking_accuracy = $this->extractAccuracyFromString($cir_title); } if (str_contains($cir_title, "takedown")) { $this->grappling_accuracy = $this->extractAccuracyFromString($cir_title); } } } /** * Extracts averages from the statistics section. * * @return void */ public function setAverages(): void { $compare_wrappers = $this->crawler->filter(".c-stat-compare--no-bar"); if (is_null($compare_wrappers)) { return; } foreach ($compare_wrappers as $compare_wrapper) { if ($compare_wrapper && !property_exists($compare_wrapper, "childNodes")) { continue; } foreach ($compare_wrapper->childNodes as $child) { $this->extractAndSetAverage($child); } } } /** * Extracts wins per type. * * @return void */ public function setWinsBreakdown(): void { $athlete_stats = $this->crawler->filter('.hero-profile__stat-numb'); $stats = []; foreach ($athlete_stats as $stat) { $stats[] = (int) $stat->textContent; } if (count($stats) == 2) { $this->knockouts = $stats[0] ?? 0; $this->first_round_finishes = $stats[1] ?? 0; } if (count($stats) == 3) { $this->knockouts = $stats[0] ?? 0; $this->submissions = $stats[1] ?? 0; $this->first_round_finishes = $stats[2] ?? 0; } $this->decisions = 0; } /** * Extract strikes by position. */ public function setStrikesByPosition(): void { $three_stats = $this->crawler->filter('.stats-records--three-column'); foreach ($three_stats as $three_stat) { $text = strtolower($three_stat->textContent); if (str_contains($text, "sig. str. by position")) { foreach ($three_stat->childNodes as $child) { if ($child->nodeName == 'div') { foreach ($child->childNodes as $grandchild) { $grand_text = strtolower($grandchild->textContent); if (str_contains($grand_text, "standing")) { $this->setStrikes($child->lastElementChild->nodeValue); } } } } } if (str_contains($text, "sig. str. by target")) { $this->strikes_to_head = (int) $this->crawler->filter('text#e-stat-body_x5F__x5F_head_value')->innerText(); $this->strikes_to_body = (int) $this->crawler->filter('text#e-stat-body_x5F__x5F_body_value')->innerText(); $this->strikes_to_leg = (int) $this->crawler->filter('text#e-stat-body_x5F__x5F_leg_value')->innerText(); } } } /** * Helper to get number out of string. * * @param [type] $string * @return void */ protected function extractNumber($string): string { $no_tags = strip_tags($string); preg_match_all('!\d+!', $no_tags, $matches); $number = implode('.', $matches[0]); return $number; } /** * Creates a player node. * * @return void */ public function createPlayerNode(): void { $division_id = self::transformWeightClass(); $title = $this->first_name . " " . $this->last_name; $node = Node::create([ 'type' => 'fighter', 'title' => $title, 'field_division' => self::getTermByName($division_id), 'field_first_name' => $this->first_name, 'field_last_name' => $this->last_name, 'field_wins' => $this->wins, 'field_losses' => $this->losses, 'field_ties' => $this->ties, 'field_player_photo' => [ 'target_id' => $this->image_id, ], 'field_striking_accuracy' => $this->striking_accuracy, 'field_grappling_accuracy' => $this->grappling_accuracy, 'field_strikes_per_minute' => $this->strikes_per_min, 'field_absorbed_per_min' => $this->absorbed_per_min, 'field_takedowns_per_15' => $this->takedowns_per_15, 'field_knockdown_ratio' => $this->knockdown_ratio, 'field_knockouts' => $this->knockouts, 'field_decisions' => $this->decisions, 'field_submissions' => $this->submissions, 'field_age' => $this->age, 'field_reach' => $this->reach, 'field_leg_reach' => $this->leg_reach, 'field_height' => $this->height, 'field_weight' => $this->weight, 'field_submission_avg_per_15' => $this->submission_avg_per_15, 'field_takedown_defense' => $this->takedown_defense, 'field_sig_strike_defense' => $this->sig_strike_defense, 'field_average_fight_time' => $this->average_fight_time, 'field_standing_strikes' => $this->standing_strikes, 'field_clinch_strikes' => $this->clinch_strikes, 'field_ground_strikes' => $this->ground_strikes, 'field_strikes_to_head' => $this->strikes_to_head, 'field_strikes_to_body' => $this->strikes_to_body, 'field_strikes_to_leg' => $this->strikes_to_leg, 'field_first_round_finishes' => $this->first_round_finishes, ]); $node->status = 1; $node->enforceIsNew(); try { $node->save(); \Drupal::logger('ufc')->notice("$title created successfully."); } catch (e) { \Drupal::logger('ufc')->error("Unable to create new plyer node for $title."); } $node->save(); } /** * Updates a player node. * * @param $nid * @return void */ public function updatePlayerNode($nid): void { $node_storage = \Drupal::entityTypeManager()->getStorage('node'); $node = $node_storage->load($nid); $node->field_wins = $this->wins; $node->field_losses = $this->losses; $node->field_ties = $this->ties; $node->field_striking_accuracy = $this->striking_accuracy; $node->field_grappling_accuracy = $this->grappling_accuracy; $node->field_strikes_per_minute = $this->strikes_per_min; $node->field_absorbed_per_min = $this->absorbed_per_min; $node->field_takedowns_per_15 = $this->takedowns_per_15; $node->field_knockdown_ratio = $this->knockdown_ratio; $node->field_knockouts = $this->knockouts; $node->field_decisions = $this->decisions; $node->field_submissions = $this->submissions; $node->field_age = $this->age; $node->field_reach = $this->reach; $node->field_leg_reach = $this->leg_reach; $node->field_height = $this->height; $node->field_weight = $this->weight; $node->field_submission_avg_per_15 = $this->submission_avg_per_15; $node->field_takedown_defense = $this->takedown_defense; $node->field_sig_strike_defense = $this->sig_strike_defense; $node->field_average_fight_time = $this->average_fight_time; $node->field_standing_strikes = $this->standing_strikes; $node->field_clinch_strikes = $this->clinch_strikes; $node->field_ground_strikes = $this->ground_strikes; $node->field_strikes_to_head = $this->strikes_to_head; $node->field_strikes_to_body = $this->strikes_to_body; $node->field_strikes_to_leg = $this->strikes_to_leg; $node->field_first_round_finishes = $this->first_round_finishes; $node->save(); } /** * Helper function to transform the weight class. * * @return void */ public function transformWeightClass(): string { $weight_class = $this->class; $stripped = str_replace("_", " ", $weight_class); $upper = ucwords($stripped); return $upper; } /** * Helper function to retrieve taxo term by name. * * @param int $term_name * @return void */ public function getTermByName($term_name): int { // Get taxonomy term storage. $taxonomyStorage = \Drupal::service('entity_type.manager')->getStorage('taxonomy_term'); // Set name properties. $properties = []; $properties['name'] = $term_name; $properties['vid'] = 'ufc_divisions'; // Load taxonomy term by properties. $terms = $taxonomyStorage->loadByProperties($properties); $term = reset($terms); if (!$term) { $new_term = Term::create([ 'name' => $term_name, 'vid' => 'ufc_divisions' ])->save(); return $new_term; }; return $term->id(); } /** * Create media for the player headshot. * * @return void */ public function createMediaEntityFromImage() { $file_name = self::extractFileName(); if (empty($file_name)) { $rand = rand(0,100000000) . '.png'; $file_name = "default-headshot_$rand"; } if (!empty($this->image)) { $file_data = file_get_contents($this->image); } if (empty($file_data)) { $file_data = file_get_contents("public://player-headshots/default-headshot.jpeg"); } $file = \Drupal::service('file.repository')->writeData($file_data, "public://player-headshots/$file_name", FileSystemInterface::EXISTS_RENAME); $media_image = Media::create([ 'bundle' => 'image', 'name' => $file_name, 'field_media_image' => [ 'target_id' => $file->id(), ], ]); $media_image->save(); $this->image_id = $media_image->id(); } /** * Helper to extract file name for images. * * @return void */ public function extractFilename() { if (!empty($this->image)) { preg_match('/[A-Z_0-9]*.png/', $this->image, $matches); $file_name = reset($matches); return $file_name; } return FALSE; } }