This commit is contained in:
Dan Chadwick
2024-04-09 01:47:04 +00:00
parent 3bcbe3b783
commit 3cfd95ee81
219 changed files with 47894 additions and 3767 deletions

View File

@@ -2,14 +2,23 @@
namespace Drupal\ufc;
use Drupal\Core\File\FileSystemInterface;
use Drupal\media\Entity\Media;
use Drupal\node\Entity\Node;
use Drupal\Component\Utility\Html;
use GuzzleHttp\Client;
use Drupal\taxonomy\Entity\Term;
use Drupal\ufc\Traits\NameConversionTrait;
use Drupal\ufc\Traits\ScraperHelperTrait;
use GuzzleHttp\Client;
use Symfony\Component\DomCrawler\Crawler;
class Fighter {
use NameConversionTrait;
use ScraperHelperTrait;
public $crawler;
public $http_client;
public $name;
public $first_name;
public $last_name;
@@ -24,7 +33,6 @@ class Fighter {
public $wins;
public $losses;
public $ties;
public $http_client;
public $fighter_page;
public $striking_accuracy;
public $grappling_accuracy;
@@ -35,6 +43,18 @@ class Fighter {
public $knockouts;
public $decisions;
public $submissions;
public $submission_avg_per_15;
public $takedown_defense;
public $sig_strike_defense;
public $average_fight_time;
public $standing_strikes;
public $clinch_strikes;
public $ground_strikes;
public $strikes_to_head;
public $strikes_to_body;
public $strikes_to_leg;
public $first_round_finishes;
/**
* Public constructor.
@@ -46,209 +66,96 @@ class Fighter {
}
/**
* Get fighters url from name.
* Parent method to scrape all data from fighter profile.
*
* @return string
* @return void
*/
public function getFighterUrl() {
// Manual overrides for name problems go here.
if ($this->first_name == 'Khaos') {
$this->first_name = 'Kalinn';
public function scrapeDataFromFighterPage(string $profile_uri): bool {
$url = "https://www.ufc.com$profile_uri";
$this->setFighterPage($url);
if (!$this->checkValidFighter()) {
return FALSE;
}
if ($this->first_name == 'J') {
$this->first_name = 'JP';
$this->last_name = 'Buys';
}
if ($this->last_name == 'Mc Kee') {
$this->last_name = 'McKee';
}
if ($this->last_name == 'Mc Gee') {
$this->last_name = 'McGee';
}
if ($this->last_name == 'Mc Gregor') {
$this->last_name = 'mcgregor';
}
if ($this->last_name == "O' Malley") {
$this->last_name = 'Omalley';
}
if ($this->first_name == "Don'") {
$this->first_name = 'dontale';
$this->last_name = 'mayes';
}
if ($this->first_name == "Marc-") {
$this->first_name = 'Marc';
}
if ($this->first_name == "A") {
$this->first_name = 'AJ';
$this->last_name = 'Dobson';
}
if ($this->first_name == "C") {
$this->first_name = 'CB';
$this->last_name = 'Dollaway';
}
if ($this->last_name == "Della Maddalena") {
$this->last_name = 'Della';
}
if ($this->first_name == "Elizeudos") {
$this->first_name = 'elizeu';
$this->last_name = 'dos-santos';
}
if ($this->last_name == "La Flare") {
$this->last_name = 'laflare';
}
if ($this->first_name == "JoelÁlvarez") {
$this->first_name = 'Joel';
}
if ($this->last_name == "J Brown") {
$this->first_name = 'TJ';
$this->last_name = 'Brown';
}
if ($this->first_name == "Alexda") {
$this->first_name = "alex-da";
}
if ($this->last_name == "Mc Kinney") {
$this->last_name = "mckinney";
}
if ($this->last_name == "Van Camp") {
$this->last_name = "vancamp";
}
if ($this->last_name == "J Laramie") {
$this->first_name = "TJ";
$this->last_name = "Laramie";
}
if ($this->last_name == "Al- Qaisi") {
$this->last_name = "alqaisi";
}
if ($this->first_name == "Alatengheili") {
$this->first_name = "heili";
$this->last_name = "alateng";
}
if ($this->last_name == "J Dillashaw") {
$this->first_name = "TJ";
$this->last_name = "Dillashaw";
}
if ($this->first_name == "Andersondos") {
$this->first_name = "Anderson-dos";
}
if ($this->last_name == "Silvade Andrade") {
$this->last_name = "Silva-de-andrade";
}
if ($this->first_name == "Ode'") {
$this->first_name = "ode";
}
if ($this->first_name == "Sumudaerji") {
$this->first_name = "su";
$this->last_name = "mudaerji";
}
$hyphens = str_replace(" ", "-", $this->last_name);
$suffix = $this->first_name . "-" . $hyphens;
if ($this->first_name == "Aoriqileng") {
$suffix = $this->first_name;
}
$url = "https://www.ufc.com/athlete/$suffix";
$trim = rtrim($url);
return $url;
$this->setAge();
$this->scrapeBio();
$this->setFighterRecord();
$this->setAccuracy();
$this->setAverages();
$this->setWinsBreakdown();
$this->setStrikesByPosition();
return TRUE;
}
/**
* Get contents of the fighter page.
*
* @return void
*/
public function getFighterPage() {
public function setFighterPage(string $url): void {
try {
$request = $this->http_client->request('GET', $this->getFighterUrl(), ['verify' => FALSE]);
$request = $this->http_client->request('GET', $url, ['verify' => FALSE]);
$this->fighter_page = $request->getBody()->getContents();
$this->crawler = new Crawler($this->fighter_page);
} catch (\Exception $e) {
echo 'Caught exception: ', $e->getMessage(), "\n";
// exit();
}
}
/*
USE THIS FOR TESTING:
$player_url = "https://www.ufc.com/athlete/anthony-hamilton";
try {
$request = $this->http_client->request('GET', $player_url);
}
catch (\Exception $e) {
/**
* Checks is the fighters has stats to pull before proceeding.
*/
public function checkValidFighter(): bool {
$athlete_stats = $this->crawler->filter('h2.stats-records__title')->count();
if ($athlete_stats == 0) {
return FALSE;
}
$content = $request->getBody()->getContents();
*/
return TRUE;
}
/**
* Get fighter age.
*
* @return void
* @return bool
*/
public function getAge() {
$pattern = '/<div class="field field--name-age(.*)<\/div>/';
preg_match($pattern, $this->fighter_page, $matches);
preg_match_all('!\d+!', $matches[0], $age);
$fighter_age = reset($age[0]);
$this->age = (float) $fighter_age;
public function setAge(): void {
$age = $this->crawler->filter('.field--name-age')->innerText();
if (strlen($age) < 1) {
$this->age = 0;
return;
} else {
assert(ctype_digit($age));
$this->age = (int) $age;
}
}
public function getBio() {
$pattern = '/<div class="c-bio__text">[0-9]+\.[0-9]+/s';
preg_match_all($pattern, $this->fighter_page, $matches);
$matches = reset($matches);
// Get height.
$pattern = '/[0-9]+\.[0-9]+/';
preg_match($pattern, $matches[0], $height);
$this->height = reset($height);
// Get weight.
$pattern = '/[0-9]+\.[0-9]+/';
preg_match($pattern, $matches[1], $weight);
$this->weight = reset($weight);
// Get reach.
$pattern = '/[0-9]+\.[0-9]+/';
preg_match($pattern, $matches[2], $reach);
$this->reach = reset($reach);
// Get leg reach.
$pattern = '/[0-9]+\.[0-9]+/';
preg_match($pattern, $matches[3], $leg_reach);
$this->leg_reach = reset($leg_reach);
/**
* Gets data from the bio section of a fighter page.
*/
public function scrapeBio(): void {
$three_cols = $this->crawler->filter('.c-bio__row--3col');
$data = [];
if (!$three_cols) {
return;
}
foreach ($three_cols as $three_col) {
// Extract data from nodevalue.
$cleaned = str_replace([" ", "\n"], "", $three_col->nodeValue);
$pattern = '/[0-9]+\.[0-9]+/s';
preg_match_all($pattern, $cleaned, $matches);
$data[] = $matches;
}
$height_weight = $data[0][0] ?? FALSE;
$arm_leg_reach = $data[1][0] ?? FALSE;
if ($height_weight && count($height_weight) == 2) {
$this->height = (float) $height_weight[0];
$this->weight = (float) $height_weight[1];
}
if ($arm_leg_reach && count($arm_leg_reach) == 2) {
$this->reach = (float) $arm_leg_reach[0];
$this->leg_reach = (float) $arm_leg_reach[1];
}
}
/**
@@ -256,37 +163,46 @@ class Fighter {
*
* @return void
*/
public function getFighterRecord() {
$pattern = "/[0-9]+[-][0-9]+[-][0-9]+.\(W-L-D\)/";
preg_match($pattern, $this->fighter_page, $matches);
$r = reset($matches);
$r = str_replace(" (W-L-D)", "", $r);
$record_chunks = explode("-", $r);
$this->wins = $record_chunks[0];
$this->losses = $record_chunks[1];
$this->ties = $record_chunks[2];
public function setFighterRecord(): void {
$record = $this->crawler->filter(".hero-profile__division-body")->innerText();
// 20-3-0 (W-L-D)
$parts = explode(" ", $record);
$record_details = $parts[0] ?? FALSE;
if (!$record_details) {
return;
}
// 20-3-0
$record_chunks = explode("-", $record_details);
$this->wins = (int) $record_chunks[0];
$this->losses = (int) $record_chunks[1];
$this->ties = (int) $record_chunks[2];
}
/**
* Extracts the striking accuracy.
*
* @return void
*/
public function getStrikingAccuracy() {
$pattern = '/<title>Striking accuracy.(.*)<\/title>/';
preg_match($pattern, $this->fighter_page, $matches);
$this->striking_accuracy = str_replace('%', '', $matches[1]);
}
/**
* Extracts the grappling accuracy.
*
* @return void
*/
public function getGrapplingAccuracy() {
$pattern = '/<title>Grappling accuracy.(.*)<\/title>/';
preg_match($pattern, $this->fighter_page, $matches);
$this->grappling_accuracy = str_replace('%', '', $matches[1]);
public function setAccuracy(): void {
$circles = $this->crawler->filter('.e-chart-circle');
foreach ($circles as $circle) {
foreach ($circle->childNodes as $child) {
$text = strtolower($child->textContent);
if (str_contains($text, "accuracy")) {
$cir_title = $text;
}
}
if (!isset($cir_title)) {
continue;
}
if (str_contains($cir_title, "striking")) {
$this->striking_accuracy = $this->extractAccuracyFromString($cir_title);
}
if (str_contains($cir_title, "takedown")) {
$this->grappling_accuracy = $this->extractAccuracyFromString($cir_title);
}
}
}
/**
@@ -294,15 +210,19 @@ class Fighter {
*
* @return void
*/
public function getAverages() {
$pattern = '/<div class="c-stat-compare__group-1(.*)(<\/div>)/sU';
preg_match_all($pattern, $this->fighter_page, $matches);
$this->strikes_per_min = $this->extractNumber($matches[1][0]);
$this->takedowns_per_15 = $this->extractNumber($matches[1][1]);
$this->knockdown_ratio = $this->extractNumber($matches[1][3]);
$pattern = '/<div class="c-stat-compare__group-2(.*)(<\/div>)/sU';
preg_match_all($pattern, $this->fighter_page, $matches);
$this->absorbed_per_min = $this->extractNumber($matches[1][0]);
public function setAverages(): void {
$compare_wrappers = $this->crawler->filter(".c-stat-compare--no-bar");
if (is_null($compare_wrappers)) {
return;
}
foreach ($compare_wrappers as $compare_wrapper) {
if ($compare_wrapper && !property_exists($compare_wrapper, "childNodes")) {
continue;
}
foreach ($compare_wrapper->childNodes as $child) {
$this->extractAndSetAverage($child);
}
}
}
/**
@@ -310,12 +230,50 @@ class Fighter {
*
* @return void
*/
public function getWinsBreakdown() {
$pattern = '/<div class="c-stat-3bar__value">(.*)<\/div>/sU';
preg_match_all($pattern, $this->fighter_page, $matches);
$this->knockouts = $this->extractWinByType($matches[0][3]);
$this->decisions = $this->extractWinByType($matches[0][4]);
$this->submissions = $this->extractWinByType($matches[0][5]);
public function setWinsBreakdown(): void {
$athlete_stats = $this->crawler->filter('.hero-profile__stat-numb');
$stats = [];
foreach ($athlete_stats as $stat) {
$stats[] = (int) $stat->textContent;
}
if (count($stats) == 2) {
$this->knockouts = $stats[0] ?? 0;
$this->first_round_finishes = $stats[1] ?? 0;
}
if (count($stats) == 3) {
$this->knockouts = $stats[0] ?? 0;
$this->submissions = $stats[1] ?? 0;
$this->first_round_finishes = $stats[2] ?? 0;
}
$this->decisions = 0;
}
/**
* Extract strikes by position.
*/
public function setStrikesByPosition(): void {
$three_stats = $this->crawler->filter('.stats-records--three-column');
foreach ($three_stats as $three_stat) {
$text = strtolower($three_stat->textContent);
if (str_contains($text, "sig. str. by position")) {
foreach ($three_stat->childNodes as $child) {
if ($child->nodeName == 'div') {
foreach ($child->childNodes as $grandchild) {
$grand_text = strtolower($grandchild->textContent);
if (str_contains($grand_text, "standing")) {
$this->setStrikes($child->lastElementChild->nodeValue);
}
}
}
}
}
if (str_contains($text, "sig. str. by target")) {
$this->strikes_to_head = (int) $this->crawler->filter('text#e-stat-body_x5F__x5F_head_value')->innerText();
$this->strikes_to_body = (int) $this->crawler->filter('text#e-stat-body_x5F__x5F_body_value')->innerText();
$this->strikes_to_leg = (int) $this->crawler->filter('text#e-stat-body_x5F__x5F_leg_value')->innerText();
}
}
}
/**
@@ -324,31 +282,20 @@ class Fighter {
* @param [type] $string
* @return void
*/
protected function extractNumber($string) {
protected function extractNumber($string): string {
$no_tags = strip_tags($string);
preg_match_all('!\d+!', $no_tags, $matches);
$number = implode('.', $matches[0]);
return $number;
}
/**
* Helper to extract number from number and percentage.
*
* @param [type] $string
* @return void
*/
protected function extractWinByType($string) {
$no_tags = strip_tags($string);
preg_match_all('!\d+!', $no_tags, $matches);
return $matches[0][0];
}
/**
/**
* Creates a player node.
*
* @return void
*/
public function createPlayerNode() {
public function createPlayerNode(): void {
$division_id = self::transformWeightClass();
$title = $this->first_name . " " . $this->last_name;
$node = Node::create([
@@ -376,12 +323,31 @@ class Fighter {
'field_reach' => $this->reach,
'field_leg_reach' => $this->leg_reach,
'field_height' => $this->height,
'field_weight' => $this->weight
'field_weight' => $this->weight,
'field_submission_avg_per_15' => $this->submission_avg_per_15,
'field_takedown_defense' => $this->takedown_defense,
'field_sig_strike_defense' => $this->sig_strike_defense,
'field_average_fight_time' => $this->average_fight_time,
'field_standing_strikes' => $this->standing_strikes,
'field_clinch_strikes' => $this->clinch_strikes,
'field_ground_strikes' => $this->ground_strikes,
'field_strikes_to_head' => $this->strikes_to_head,
'field_strikes_to_body' => $this->strikes_to_body,
'field_strikes_to_leg' => $this->strikes_to_leg,
'field_first_round_finishes' => $this->first_round_finishes,
]);
$node->status = 1;
$node->enforceIsNew();
try {
$node->save();
\Drupal::logger('ufc')->notice("$title created successfully.");
} catch (e) {
\Drupal::logger('ufc')->error("Unable to create new plyer node for $title.");
}
$node->save();
}
/**
@@ -390,7 +356,7 @@ class Fighter {
* @param $nid
* @return void
*/
public function updatePlayerNode($nid) {
public function updatePlayerNode($nid): void {
$node_storage = \Drupal::entityTypeManager()->getStorage('node');
$node = $node_storage->load($nid);
$node->field_wins = $this->wins;
@@ -410,9 +376,19 @@ class Fighter {
$node->field_leg_reach = $this->leg_reach;
$node->field_height = $this->height;
$node->field_weight = $this->weight;
$node->field_submission_avg_per_15 = $this->submission_avg_per_15;
$node->field_takedown_defense = $this->takedown_defense;
$node->field_sig_strike_defense = $this->sig_strike_defense;
$node->field_average_fight_time = $this->average_fight_time;
$node->field_standing_strikes = $this->standing_strikes;
$node->field_clinch_strikes = $this->clinch_strikes;
$node->field_ground_strikes = $this->ground_strikes;
$node->field_strikes_to_head = $this->strikes_to_head;
$node->field_strikes_to_body = $this->strikes_to_body;
$node->field_strikes_to_leg = $this->strikes_to_leg;
$node->field_first_round_finishes = $this->first_round_finishes;
$node->save();
}
/**
@@ -420,24 +396,26 @@ class Fighter {
*
* @return void
*/
public function transformWeightClass() {
public function transformWeightClass(): string {
$weight_class = $this->class;
$stripped = str_replace("_", " ", $weight_class);
$upper = ucwords($stripped);
return $upper;
}
/**
* Helper function to retrieve taxo term by name.
*
* @param [type] $term_name
* @param int $term_name
* @return void
*/
public function getTermByName($term_name) {
public function getTermByName($term_name): int {
// Get taxonomy term storage.
$taxonomyStorage = \Drupal::service('entity.manager')->getStorage('taxonomy_term');
$taxonomyStorage = \Drupal::service('entity_type.manager')->getStorage('taxonomy_term');
// Set name properties.
$properties = [];
$properties['name'] = $term_name;
$properties['vid'] = 'ufc_divisions';
@@ -453,7 +431,6 @@ class Fighter {
return $new_term;
};
return $term->id();
}
@@ -475,13 +452,10 @@ class Fighter {
}
if (empty($file_data)) {
$file_data = file_get_contents("public://player-headshots/headshot-default.png");
$file_data = file_get_contents("public://player-headshots/default-headshot.jpeg");
}
$file = file_save_data(
$file_data,
"public://player-headshots/$file_name", FILE_EXISTS_RENAME);
$file = \Drupal::service('file.repository')->writeData($file_data, "public://player-headshots/$file_name", FileSystemInterface::EXISTS_RENAME);
$media_image = Media::create([
'bundle' => 'image',
'name' => $file_name,
@@ -508,4 +482,4 @@ class Fighter {
return FALSE;
}
}
}