492 lines
14 KiB
PHP
Raw Normal View History

2024-03-13 15:48:45 +00:00
<?php
namespace Drupal\ufc;
2024-04-09 01:47:04 +00:00
use Drupal\Core\File\FileSystemInterface;
2024-03-13 15:48:45 +00:00
use Drupal\media\Entity\Media;
use Drupal\node\Entity\Node;
use Drupal\taxonomy\Entity\Term;
2024-04-09 01:47:04 +00:00
use Drupal\ufc\Traits\NameConversionTrait;
use Drupal\ufc\Traits\ScraperHelperTrait;
use GuzzleHttp\Client;
use Symfony\Component\DomCrawler\Crawler;
2024-03-13 15:48:45 +00:00
class Fighter {
2024-04-09 01:47:04 +00:00
use NameConversionTrait;
use ScraperHelperTrait;
public $crawler;
public $http_client;
2024-03-13 15:48:45 +00:00
public $name;
public $first_name;
public $last_name;
public $height;
public $weight;
public $age;
public $reach;
public $leg_reach;
public $image;
public $image_id;
public $class;
public $wins;
public $losses;
public $ties;
public $fighter_page;
public $striking_accuracy;
public $grappling_accuracy;
public $strikes_per_min;
public $absorbed_per_min;
public $takedowns_per_15;
public $knockdown_ratio;
public $knockouts;
public $decisions;
public $submissions;
2024-04-09 01:47:04 +00:00
public $submission_avg_per_15;
public $takedown_defense;
public $sig_strike_defense;
public $average_fight_time;
public $standing_strikes;
public $clinch_strikes;
public $ground_strikes;
public $strikes_to_head;
public $strikes_to_body;
public $strikes_to_leg;
public $first_round_finishes;
2024-03-13 15:48:45 +00:00
/**
* Public constructor.
*
* @param Client $httpClient
*/
public function __construct(Client $httpClient) {
$this->http_client = $httpClient;
}
/**
2024-04-09 01:47:04 +00:00
* Parent method to scrape all data from fighter profile.
2024-03-13 15:48:45 +00:00
*
2024-04-09 01:47:04 +00:00
* @return void
2024-03-13 15:48:45 +00:00
*/
2024-04-09 01:47:04 +00:00
public function scrapeDataFromFighterPage(string $profile_uri): bool {
$url = "https://www.ufc.com$profile_uri";
$this->setFighterPage($url);
if (!$this->checkValidFighter()) {
return FALSE;
2024-03-13 15:48:45 +00:00
}
2024-04-09 01:47:04 +00:00
$this->setAge();
$this->scrapeBio();
$this->setFighterRecord();
$this->setAccuracy();
$this->setAverages();
$this->setWinsBreakdown();
$this->setStrikesByPosition();
return TRUE;
2024-03-13 15:48:45 +00:00
}
2024-04-09 01:47:04 +00:00
2024-03-13 15:48:45 +00:00
/**
* Get contents of the fighter page.
*
* @return void
*/
2024-04-09 01:47:04 +00:00
public function setFighterPage(string $url): void {
2024-03-13 15:48:45 +00:00
try {
2024-04-09 01:47:04 +00:00
$request = $this->http_client->request('GET', $url, ['verify' => FALSE]);
2024-03-13 15:48:45 +00:00
$this->fighter_page = $request->getBody()->getContents();
2024-04-09 01:47:04 +00:00
$this->crawler = new Crawler($this->fighter_page);
2024-03-13 15:48:45 +00:00
} catch (\Exception $e) {
echo 'Caught exception: ', $e->getMessage(), "\n";
// exit();
}
2024-04-09 01:47:04 +00:00
}
2024-03-13 15:48:45 +00:00
2024-04-09 01:47:04 +00:00
/**
* Checks is the fighters has stats to pull before proceeding.
*/
public function checkValidFighter(): bool {
$athlete_stats = $this->crawler->filter('h2.stats-records__title')->count();
if ($athlete_stats == 0) {
2024-03-13 15:48:45 +00:00
return FALSE;
}
2024-04-09 01:47:04 +00:00
return TRUE;
2024-03-13 15:48:45 +00:00
}
/**
* Get fighter age.
*
2024-04-09 01:47:04 +00:00
* @return bool
2024-03-13 15:48:45 +00:00
*/
2024-04-09 01:47:04 +00:00
public function setAge(): void {
$age = $this->crawler->filter('.field--name-age')->innerText();
if (strlen($age) < 1) {
$this->age = 0;
return;
} else {
assert(ctype_digit($age));
$this->age = (int) $age;
}
2024-03-13 15:48:45 +00:00
}
2024-04-09 01:47:04 +00:00
/**
* Gets data from the bio section of a fighter page.
*/
public function scrapeBio(): void {
$three_cols = $this->crawler->filter('.c-bio__row--3col');
$data = [];
if (!$three_cols) {
return;
}
foreach ($three_cols as $three_col) {
// Extract data from nodevalue.
$cleaned = str_replace([" ", "\n"], "", $three_col->nodeValue);
$pattern = '/[0-9]+\.[0-9]+/s';
preg_match_all($pattern, $cleaned, $matches);
$data[] = $matches;
}
$height_weight = $data[0][0] ?? FALSE;
$arm_leg_reach = $data[1][0] ?? FALSE;
if ($height_weight && count($height_weight) == 2) {
$this->height = (float) $height_weight[0];
$this->weight = (float) $height_weight[1];
}
if ($arm_leg_reach && count($arm_leg_reach) == 2) {
$this->reach = (float) $arm_leg_reach[0];
$this->leg_reach = (float) $arm_leg_reach[1];
}
2024-03-13 15:48:45 +00:00
}
/**
* Extracts the fighter record.
*
* @return void
*/
2024-04-09 01:47:04 +00:00
public function setFighterRecord(): void {
$record = $this->crawler->filter(".hero-profile__division-body")->innerText();
// 20-3-0 (W-L-D)
$parts = explode(" ", $record);
$record_details = $parts[0] ?? FALSE;
if (!$record_details) {
return;
}
// 20-3-0
$record_chunks = explode("-", $record_details);
$this->wins = (int) $record_chunks[0];
$this->losses = (int) $record_chunks[1];
$this->ties = (int) $record_chunks[2];
2024-03-13 15:48:45 +00:00
}
2024-04-09 01:47:04 +00:00
2024-03-13 15:48:45 +00:00
/**
* Extracts the striking accuracy.
*
* @return void
*/
2024-04-09 01:47:04 +00:00
public function setAccuracy(): void {
$circles = $this->crawler->filter('.e-chart-circle');
if (empty($circles)) {
return;
}
2024-04-09 01:47:04 +00:00
foreach ($circles as $circle) {
if (is_null($circle)) {
continue;
}
2024-04-09 01:47:04 +00:00
foreach ($circle->childNodes as $child) {
$text = strtolower($child->textContent);
if (str_contains($text, "accuracy")) {
$cir_title = $text;
}
}
if (!isset($cir_title)) {
continue;
}
if (str_contains($cir_title, "striking")) {
$this->striking_accuracy = $this->extractAccuracyFromString($cir_title);
}
if (str_contains($cir_title, "takedown")) {
$this->grappling_accuracy = $this->extractAccuracyFromString($cir_title);
}
}
2024-03-13 15:48:45 +00:00
}
/**
2024-04-09 01:47:04 +00:00
* Extracts averages from the statistics section.
2024-03-13 15:48:45 +00:00
*
* @return void
*/
2024-04-09 01:47:04 +00:00
public function setAverages(): void {
$compare_wrappers = $this->crawler->filter(".c-stat-compare--no-bar");
if (is_null($compare_wrappers)) {
return;
}
foreach ($compare_wrappers as $compare_wrapper) {
if ($compare_wrapper && !property_exists($compare_wrapper, "childNodes")) {
continue;
}
foreach ($compare_wrapper->childNodes as $child) {
$this->extractAndSetAverage($child);
}
}
2024-03-13 15:48:45 +00:00
}
/**
2024-04-09 01:47:04 +00:00
* Extracts wins per type.
2024-03-13 15:48:45 +00:00
*
* @return void
*/
2024-04-09 01:47:04 +00:00
public function setWinsBreakdown(): void {
$athlete_stats = $this->crawler->filter('.hero-profile__stat-numb');
$stats = [];
foreach ($athlete_stats as $stat) {
$stats[] = (int) $stat->textContent;
}
if (count($stats) == 2) {
$this->knockouts = $stats[0] ?? 0;
$this->first_round_finishes = $stats[1] ?? 0;
}
if (count($stats) == 3) {
$this->knockouts = $stats[0] ?? 0;
$this->submissions = $stats[1] ?? 0;
$this->first_round_finishes = $stats[2] ?? 0;
}
$this->decisions = 0;
2024-03-13 15:48:45 +00:00
}
/**
2024-04-09 01:47:04 +00:00
* Extract strikes by position.
2024-03-13 15:48:45 +00:00
*/
2024-04-09 01:47:04 +00:00
public function setStrikesByPosition(): void {
$three_stats = $this->crawler->filter('.stats-records--three-column');
foreach ($three_stats as $three_stat) {
$text = strtolower($three_stat->textContent);
if (str_contains($text, "sig. str. by position")) {
foreach ($three_stat->childNodes as $child) {
if ($child->nodeName == 'div') {
foreach ($child->childNodes as $grandchild) {
$grand_text = strtolower($grandchild->textContent);
if (str_contains($grand_text, "standing")) {
$this->setStrikes($child->lastElementChild->nodeValue);
}
}
}
}
}
if (str_contains($text, "sig. str. by target")) {
$this->strikes_to_head = (int) $this->crawler->filter('text#e-stat-body_x5F__x5F_head_value')->innerText();
$this->strikes_to_body = (int) $this->crawler->filter('text#e-stat-body_x5F__x5F_body_value')->innerText();
$this->strikes_to_leg = (int) $this->crawler->filter('text#e-stat-body_x5F__x5F_leg_value')->innerText();
}
}
2024-03-13 15:48:45 +00:00
}
/**
* Helper to get number out of string.
*
* @param [type] $string
* @return void
*/
2024-04-09 01:47:04 +00:00
protected function extractNumber($string): string {
2024-03-13 15:48:45 +00:00
$no_tags = strip_tags($string);
preg_match_all('!\d+!', $no_tags, $matches);
$number = implode('.', $matches[0]);
2024-04-09 01:47:04 +00:00
return $number;
2024-03-13 15:48:45 +00:00
}
2024-04-09 01:47:04 +00:00
/**
2024-03-13 15:48:45 +00:00
* Creates a player node.
*
* @return void
*/
2024-04-09 01:47:04 +00:00
public function createPlayerNode(): void {
2024-03-13 15:48:45 +00:00
$division_id = self::transformWeightClass();
$title = $this->first_name . " " . $this->last_name;
$node = Node::create([
'type' => 'fighter',
'title' => $title,
'field_division' => self::getTermByName($division_id),
'field_first_name' => $this->first_name,
'field_last_name' => $this->last_name,
'field_wins' => $this->wins,
'field_losses' => $this->losses,
'field_ties' => $this->ties,
'field_player_photo' => [
'target_id' => $this->image_id,
],
'field_striking_accuracy' => $this->striking_accuracy,
'field_grappling_accuracy' => $this->grappling_accuracy,
'field_strikes_per_minute' => $this->strikes_per_min,
'field_absorbed_per_min' => $this->absorbed_per_min,
'field_takedowns_per_15' => $this->takedowns_per_15,
'field_knockdown_ratio' => $this->knockdown_ratio,
'field_knockouts' => $this->knockouts,
'field_decisions' => $this->decisions,
'field_submissions' => $this->submissions,
'field_age' => $this->age,
'field_reach' => $this->reach,
'field_leg_reach' => $this->leg_reach,
'field_height' => $this->height,
2024-04-09 01:47:04 +00:00
'field_weight' => $this->weight,
'field_submission_avg_per_15' => $this->submission_avg_per_15,
'field_takedown_defense' => $this->takedown_defense,
'field_sig_strike_defense' => $this->sig_strike_defense,
'field_average_fight_time' => $this->average_fight_time,
'field_standing_strikes' => $this->standing_strikes,
'field_clinch_strikes' => $this->clinch_strikes,
'field_ground_strikes' => $this->ground_strikes,
'field_strikes_to_head' => $this->strikes_to_head,
'field_strikes_to_body' => $this->strikes_to_body,
'field_strikes_to_leg' => $this->strikes_to_leg,
'field_first_round_finishes' => $this->first_round_finishes,
2024-03-13 15:48:45 +00:00
]);
$node->status = 1;
$node->enforceIsNew();
2024-04-09 01:47:04 +00:00
try {
$node->save();
\Drupal::logger('ufc')->notice("$title created successfully.");
} catch (e) {
\Drupal::logger('ufc')->error("Unable to create new plyer node for $title.");
}
2024-03-13 15:48:45 +00:00
$node->save();
2024-04-09 01:47:04 +00:00
2024-03-13 15:48:45 +00:00
}
/**
* Updates a player node.
*
* @param $nid
* @return void
*/
2024-04-09 01:47:04 +00:00
public function updatePlayerNode($nid): void {
2024-03-13 15:48:45 +00:00
$node_storage = \Drupal::entityTypeManager()->getStorage('node');
$node = $node_storage->load($nid);
$node->field_wins = $this->wins;
$node->field_losses = $this->losses;
$node->field_ties = $this->ties;
$node->field_striking_accuracy = $this->striking_accuracy;
$node->field_grappling_accuracy = $this->grappling_accuracy;
$node->field_strikes_per_minute = $this->strikes_per_min;
$node->field_absorbed_per_min = $this->absorbed_per_min;
$node->field_takedowns_per_15 = $this->takedowns_per_15;
$node->field_knockdown_ratio = $this->knockdown_ratio;
$node->field_knockouts = $this->knockouts;
$node->field_decisions = $this->decisions;
$node->field_submissions = $this->submissions;
$node->field_age = $this->age;
$node->field_reach = $this->reach;
$node->field_leg_reach = $this->leg_reach;
$node->field_height = $this->height;
$node->field_weight = $this->weight;
2024-04-09 01:47:04 +00:00
$node->field_submission_avg_per_15 = $this->submission_avg_per_15;
$node->field_takedown_defense = $this->takedown_defense;
$node->field_sig_strike_defense = $this->sig_strike_defense;
$node->field_average_fight_time = $this->average_fight_time;
$node->field_standing_strikes = $this->standing_strikes;
$node->field_clinch_strikes = $this->clinch_strikes;
$node->field_ground_strikes = $this->ground_strikes;
$node->field_strikes_to_head = $this->strikes_to_head;
$node->field_strikes_to_body = $this->strikes_to_body;
$node->field_strikes_to_leg = $this->strikes_to_leg;
$node->field_first_round_finishes = $this->first_round_finishes;
2024-03-13 15:48:45 +00:00
$node->save();
}
/**
* Helper function to transform the weight class.
*
* @return void
*/
2024-04-09 01:47:04 +00:00
public function transformWeightClass(): string {
2024-03-13 15:48:45 +00:00
$weight_class = $this->class;
$stripped = str_replace("_", " ", $weight_class);
$upper = ucwords($stripped);
2024-04-09 01:47:04 +00:00
2024-03-13 15:48:45 +00:00
return $upper;
}
/**
* Helper function to retrieve taxo term by name.
*
2024-04-09 01:47:04 +00:00
* @param int $term_name
2024-03-13 15:48:45 +00:00
* @return void
*/
2024-04-09 01:47:04 +00:00
public function getTermByName($term_name): int {
2024-03-13 15:48:45 +00:00
// Get taxonomy term storage.
2024-04-09 01:47:04 +00:00
$taxonomyStorage = \Drupal::service('entity_type.manager')->getStorage('taxonomy_term');
2024-03-13 15:48:45 +00:00
// Set name properties.
2024-04-09 01:47:04 +00:00
$properties = [];
2024-03-13 15:48:45 +00:00
$properties['name'] = $term_name;
$properties['vid'] = 'ufc_divisions';
// Load taxonomy term by properties.
$terms = $taxonomyStorage->loadByProperties($properties);
$term = reset($terms);
if (!$term) {
$new_term = Term::create([
'name' => $term_name,
'vid' => 'ufc_divisions'
])->save();
return $new_term;
};
return $term->id();
}
/**
* Create media for the player headshot.
*
* @return void
*/
public function createMediaEntityFromImage() {
$file_name = self::extractFileName();
if (empty($file_name)) {
$rand = rand(0,100000000) . '.png';
$file_name = "default-headshot_$rand";
}
if (!empty($this->image)) {
$file_data = file_get_contents($this->image);
}
if (empty($file_data)) {
2024-04-09 01:47:04 +00:00
$file_data = file_get_contents("public://player-headshots/default-headshot.jpeg");
2024-03-13 15:48:45 +00:00
}
2024-04-09 01:47:04 +00:00
$file = \Drupal::service('file.repository')->writeData($file_data, "public://player-headshots/$file_name", FileSystemInterface::EXISTS_RENAME);
2024-03-13 15:48:45 +00:00
$media_image = Media::create([
'bundle' => 'image',
'name' => $file_name,
'field_media_image' => [
'target_id' => $file->id(),
],
]);
$media_image->save();
$this->image_id = $media_image->id();
}
/**
* Helper to extract file name for images.
*
* @return void
*/
public function extractFilename() {
if (!empty($this->image)) {
preg_match('/[A-Z_0-9]*.png/', $this->image, $matches);
$file_name = reset($matches);
return $file_name;
}
return FALSE;
}
2024-04-09 01:47:04 +00:00
}