492 lines
		
	
	
		
			14 KiB
		
	
	
	
		
			PHP
		
	
	
	
	
	
			
		
		
	
	
			492 lines
		
	
	
		
			14 KiB
		
	
	
	
		
			PHP
		
	
	
	
	
	
<?php
 | 
						|
 | 
						|
namespace Drupal\ufc;
 | 
						|
 | 
						|
use Drupal\Core\File\FileSystemInterface;
 | 
						|
use Drupal\media\Entity\Media;
 | 
						|
use Drupal\node\Entity\Node;
 | 
						|
use Drupal\taxonomy\Entity\Term;
 | 
						|
use Drupal\ufc\Traits\NameConversionTrait;
 | 
						|
use Drupal\ufc\Traits\ScraperHelperTrait;
 | 
						|
use GuzzleHttp\Client;
 | 
						|
use Symfony\Component\DomCrawler\Crawler;
 | 
						|
 | 
						|
class Fighter {
 | 
						|
 | 
						|
  use NameConversionTrait;
 | 
						|
  use ScraperHelperTrait;
 | 
						|
 | 
						|
  public $crawler;
 | 
						|
  public $http_client;
 | 
						|
 | 
						|
  public $name;
 | 
						|
  public $first_name;
 | 
						|
  public $last_name;
 | 
						|
  public $height;
 | 
						|
  public $weight;
 | 
						|
  public $age;
 | 
						|
  public $reach;
 | 
						|
  public $leg_reach;
 | 
						|
  public $image;
 | 
						|
  public $image_id;
 | 
						|
  public $class;
 | 
						|
  public $wins;
 | 
						|
  public $losses;
 | 
						|
  public $ties;
 | 
						|
  public $fighter_page;
 | 
						|
  public $striking_accuracy;
 | 
						|
  public $grappling_accuracy;
 | 
						|
  public $strikes_per_min;
 | 
						|
  public $absorbed_per_min;
 | 
						|
  public $takedowns_per_15;
 | 
						|
  public $knockdown_ratio;
 | 
						|
  public $knockouts;
 | 
						|
  public $decisions;
 | 
						|
  public $submissions;
 | 
						|
  public $submission_avg_per_15;
 | 
						|
  public $takedown_defense;
 | 
						|
  public $sig_strike_defense;
 | 
						|
  public $average_fight_time;
 | 
						|
  public $standing_strikes;
 | 
						|
  public $clinch_strikes;
 | 
						|
  public $ground_strikes;
 | 
						|
  public $strikes_to_head;
 | 
						|
  public $strikes_to_body;
 | 
						|
  public $strikes_to_leg;
 | 
						|
  public $first_round_finishes;
 | 
						|
 | 
						|
 | 
						|
  /**
 | 
						|
   * Public constructor.
 | 
						|
   *
 | 
						|
   * @param Client $httpClient
 | 
						|
   */
 | 
						|
  public function __construct(Client $httpClient) {
 | 
						|
    $this->http_client = $httpClient;
 | 
						|
  }
 | 
						|
 | 
						|
  /**
 | 
						|
   * Parent method to scrape all data from fighter profile.
 | 
						|
   *
 | 
						|
   * @return void
 | 
						|
   */
 | 
						|
  public function scrapeDataFromFighterPage(string $profile_uri): bool {
 | 
						|
    $url = "https://www.ufc.com$profile_uri";
 | 
						|
    $this->setFighterPage($url);
 | 
						|
    if (!$this->checkValidFighter()) {
 | 
						|
      return FALSE;
 | 
						|
    }
 | 
						|
    $this->setAge();
 | 
						|
    $this->scrapeBio();
 | 
						|
    $this->setFighterRecord();
 | 
						|
    $this->setAccuracy();
 | 
						|
    $this->setAverages();
 | 
						|
    $this->setWinsBreakdown();
 | 
						|
    $this->setStrikesByPosition();
 | 
						|
    return TRUE;
 | 
						|
  }
 | 
						|
 | 
						|
 | 
						|
  /**
 | 
						|
   * Get contents of the fighter page.
 | 
						|
   *
 | 
						|
   * @return void
 | 
						|
   */
 | 
						|
  public function setFighterPage(string $url): void {
 | 
						|
    try {
 | 
						|
      $request = $this->http_client->request('GET', $url, ['verify' => FALSE]);
 | 
						|
      $this->fighter_page = $request->getBody()->getContents();
 | 
						|
      $this->crawler = new Crawler($this->fighter_page);
 | 
						|
    } catch (\Exception $e) {
 | 
						|
      echo 'Caught exception: ',  $e->getMessage(), "\n";
 | 
						|
      // exit();
 | 
						|
    }
 | 
						|
  }
 | 
						|
 | 
						|
  /**
 | 
						|
   * Checks is the fighters has stats to pull before proceeding.
 | 
						|
   */
 | 
						|
  public function checkValidFighter(): bool {
 | 
						|
    $athlete_stats = $this->crawler->filter('h2.stats-records__title')->count();
 | 
						|
    if ($athlete_stats == 0) {
 | 
						|
      return FALSE;
 | 
						|
    }
 | 
						|
    return TRUE;
 | 
						|
  }
 | 
						|
 | 
						|
  /**
 | 
						|
   * Get fighter age.
 | 
						|
   *
 | 
						|
   * @return bool
 | 
						|
   */
 | 
						|
  public function setAge(): void {
 | 
						|
    $age = $this->crawler->filter('.field--name-age')->innerText();
 | 
						|
    if (strlen($age) < 1) {
 | 
						|
      $this->age = 0;
 | 
						|
      return;
 | 
						|
    } else {
 | 
						|
      assert(ctype_digit($age));
 | 
						|
      $this->age = (int) $age;
 | 
						|
    }
 | 
						|
  }
 | 
						|
 | 
						|
  /**
 | 
						|
   * Gets data from the bio section of a fighter page.
 | 
						|
   */
 | 
						|
  public function scrapeBio(): void {
 | 
						|
    $three_cols = $this->crawler->filter('.c-bio__row--3col');
 | 
						|
    $data = [];
 | 
						|
    if (!$three_cols) {
 | 
						|
      return;
 | 
						|
    }
 | 
						|
    foreach ($three_cols as $three_col) {
 | 
						|
      // Extract data from nodevalue.
 | 
						|
      $cleaned = str_replace([" ", "\n"], "", $three_col->nodeValue);
 | 
						|
      $pattern = '/[0-9]+\.[0-9]+/s';
 | 
						|
      preg_match_all($pattern, $cleaned, $matches);
 | 
						|
      $data[] = $matches;
 | 
						|
    }
 | 
						|
    $height_weight = $data[0][0] ?? FALSE;
 | 
						|
    $arm_leg_reach = $data[1][0] ?? FALSE;
 | 
						|
    if ($height_weight && count($height_weight) == 2) {
 | 
						|
      $this->height = (float) $height_weight[0];
 | 
						|
      $this->weight = (float) $height_weight[1];
 | 
						|
    }
 | 
						|
    if ($arm_leg_reach && count($arm_leg_reach) == 2) {
 | 
						|
      $this->reach = (float) $arm_leg_reach[0];
 | 
						|
      $this->leg_reach = (float) $arm_leg_reach[1];
 | 
						|
    }
 | 
						|
  }
 | 
						|
 | 
						|
  /**
 | 
						|
   * Extracts the fighter record.
 | 
						|
   *
 | 
						|
   * @return void
 | 
						|
   */
 | 
						|
  public function setFighterRecord(): void {
 | 
						|
    $record = $this->crawler->filter(".hero-profile__division-body")->innerText();
 | 
						|
    // 20-3-0 (W-L-D)
 | 
						|
    $parts = explode(" ", $record);
 | 
						|
    $record_details = $parts[0] ?? FALSE;
 | 
						|
    if (!$record_details) {
 | 
						|
      return;
 | 
						|
    }
 | 
						|
    // 20-3-0
 | 
						|
    $record_chunks = explode("-", $record_details);
 | 
						|
    $this->wins = (int) $record_chunks[0];
 | 
						|
    $this->losses = (int) $record_chunks[1];
 | 
						|
    $this->ties = (int) $record_chunks[2];
 | 
						|
  }
 | 
						|
 | 
						|
 | 
						|
  /**
 | 
						|
   * Extracts the striking accuracy.
 | 
						|
   *
 | 
						|
   * @return void
 | 
						|
   */
 | 
						|
  public function setAccuracy(): void {
 | 
						|
    $circles = $this->crawler->filter('.e-chart-circle');
 | 
						|
    if (empty($circles)) {
 | 
						|
      return;
 | 
						|
    }
 | 
						|
    foreach ($circles as $circle) {
 | 
						|
      if (is_null($circle)) {
 | 
						|
        continue;
 | 
						|
      }
 | 
						|
      foreach ($circle->childNodes as $child) {
 | 
						|
        $text = strtolower($child->textContent);
 | 
						|
        if (str_contains($text, "accuracy")) {
 | 
						|
          $cir_title = $text;
 | 
						|
        }
 | 
						|
      }
 | 
						|
      if (!isset($cir_title)) {
 | 
						|
        continue;
 | 
						|
      }
 | 
						|
      if (str_contains($cir_title, "striking")) {
 | 
						|
        $this->striking_accuracy = $this->extractAccuracyFromString($cir_title);
 | 
						|
      }
 | 
						|
      if (str_contains($cir_title, "takedown")) {
 | 
						|
        $this->grappling_accuracy = $this->extractAccuracyFromString($cir_title);
 | 
						|
      }
 | 
						|
    }
 | 
						|
  }
 | 
						|
 | 
						|
  /**
 | 
						|
   * Extracts averages from the statistics section.
 | 
						|
   *
 | 
						|
   * @return void
 | 
						|
   */
 | 
						|
  public function setAverages(): void {
 | 
						|
    $compare_wrappers = $this->crawler->filter(".c-stat-compare--no-bar");
 | 
						|
    if (is_null($compare_wrappers)) {
 | 
						|
      return;
 | 
						|
    }
 | 
						|
    foreach ($compare_wrappers as $compare_wrapper) {
 | 
						|
      if ($compare_wrapper && !property_exists($compare_wrapper, "childNodes")) {
 | 
						|
        continue;
 | 
						|
      }
 | 
						|
      foreach ($compare_wrapper->childNodes as $child) {
 | 
						|
        $this->extractAndSetAverage($child);
 | 
						|
      }
 | 
						|
    }
 | 
						|
  }
 | 
						|
 | 
						|
  /**
 | 
						|
   * Extracts wins per type.
 | 
						|
   *
 | 
						|
   * @return void
 | 
						|
   */
 | 
						|
  public function setWinsBreakdown(): void {
 | 
						|
    $athlete_stats = $this->crawler->filter('.hero-profile__stat-numb');
 | 
						|
    $stats = [];
 | 
						|
    foreach ($athlete_stats as $stat) {
 | 
						|
      $stats[] = (int) $stat->textContent;
 | 
						|
    }
 | 
						|
    if (count($stats) == 2) {
 | 
						|
      $this->knockouts = $stats[0] ?? 0;
 | 
						|
      $this->first_round_finishes = $stats[1] ?? 0;
 | 
						|
    }
 | 
						|
    if (count($stats) == 3) {
 | 
						|
      $this->knockouts = $stats[0] ?? 0;
 | 
						|
      $this->submissions = $stats[1] ?? 0;
 | 
						|
      $this->first_round_finishes = $stats[2] ?? 0;
 | 
						|
    }
 | 
						|
 | 
						|
    $this->decisions = 0;
 | 
						|
  }
 | 
						|
 | 
						|
  /**
 | 
						|
   * Extract strikes by position.
 | 
						|
   */
 | 
						|
  public function setStrikesByPosition(): void {
 | 
						|
    $three_stats = $this->crawler->filter('.stats-records--three-column');
 | 
						|
    foreach ($three_stats as $three_stat) {
 | 
						|
      $text = strtolower($three_stat->textContent);
 | 
						|
      if (str_contains($text, "sig. str. by position")) {
 | 
						|
        foreach ($three_stat->childNodes as $child) {
 | 
						|
          if ($child->nodeName == 'div') {
 | 
						|
            foreach ($child->childNodes as $grandchild) {
 | 
						|
              $grand_text = strtolower($grandchild->textContent);
 | 
						|
              if (str_contains($grand_text, "standing")) {
 | 
						|
                $this->setStrikes($child->lastElementChild->nodeValue);
 | 
						|
              }
 | 
						|
            }
 | 
						|
          }
 | 
						|
        }
 | 
						|
      }
 | 
						|
      if (str_contains($text, "sig. str. by target")) {
 | 
						|
        $this->strikes_to_head = (int) $this->crawler->filter('text#e-stat-body_x5F__x5F_head_value')->innerText();
 | 
						|
        $this->strikes_to_body = (int) $this->crawler->filter('text#e-stat-body_x5F__x5F_body_value')->innerText();
 | 
						|
        $this->strikes_to_leg = (int) $this->crawler->filter('text#e-stat-body_x5F__x5F_leg_value')->innerText();
 | 
						|
      }
 | 
						|
    }
 | 
						|
  }
 | 
						|
 | 
						|
  /**
 | 
						|
   * Helper to get number out of string.
 | 
						|
   *
 | 
						|
   * @param [type] $string
 | 
						|
   * @return void
 | 
						|
   */
 | 
						|
  protected function extractNumber($string): string {
 | 
						|
    $no_tags = strip_tags($string);
 | 
						|
    preg_match_all('!\d+!', $no_tags, $matches);
 | 
						|
    $number = implode('.', $matches[0]);
 | 
						|
 | 
						|
    return $number;
 | 
						|
  }
 | 
						|
 | 
						|
   /**
 | 
						|
   * Creates a player node.
 | 
						|
   *
 | 
						|
   * @return void
 | 
						|
   */
 | 
						|
  public function createPlayerNode(): void {
 | 
						|
    $division_id = self::transformWeightClass();
 | 
						|
    $title = $this->first_name . " " . $this->last_name;
 | 
						|
    $node = Node::create([
 | 
						|
      'type' => 'fighter',
 | 
						|
      'title' => $title,
 | 
						|
      'field_division' => self::getTermByName($division_id),
 | 
						|
      'field_first_name' => $this->first_name,
 | 
						|
      'field_last_name' => $this->last_name,
 | 
						|
      'field_wins' => $this->wins,
 | 
						|
      'field_losses' => $this->losses,
 | 
						|
      'field_ties' => $this->ties,
 | 
						|
      'field_player_photo' => [
 | 
						|
        'target_id' => $this->image_id,
 | 
						|
      ],
 | 
						|
      'field_striking_accuracy' => $this->striking_accuracy,
 | 
						|
      'field_grappling_accuracy' => $this->grappling_accuracy,
 | 
						|
      'field_strikes_per_minute' => $this->strikes_per_min,
 | 
						|
      'field_absorbed_per_min' => $this->absorbed_per_min,
 | 
						|
      'field_takedowns_per_15' => $this->takedowns_per_15,
 | 
						|
      'field_knockdown_ratio' => $this->knockdown_ratio,
 | 
						|
      'field_knockouts' => $this->knockouts,
 | 
						|
      'field_decisions' => $this->decisions,
 | 
						|
      'field_submissions' => $this->submissions,
 | 
						|
      'field_age' => $this->age,
 | 
						|
      'field_reach' => $this->reach,
 | 
						|
      'field_leg_reach' => $this->leg_reach,
 | 
						|
      'field_height' => $this->height,
 | 
						|
      'field_weight' => $this->weight,
 | 
						|
      'field_submission_avg_per_15' => $this->submission_avg_per_15,
 | 
						|
      'field_takedown_defense' => $this->takedown_defense,
 | 
						|
      'field_sig_strike_defense' => $this->sig_strike_defense,
 | 
						|
      'field_average_fight_time' => $this->average_fight_time,
 | 
						|
      'field_standing_strikes' => $this->standing_strikes,
 | 
						|
      'field_clinch_strikes' => $this->clinch_strikes,
 | 
						|
      'field_ground_strikes' => $this->ground_strikes,
 | 
						|
      'field_strikes_to_head' => $this->strikes_to_head,
 | 
						|
      'field_strikes_to_body' => $this->strikes_to_body,
 | 
						|
      'field_strikes_to_leg' => $this->strikes_to_leg,
 | 
						|
      'field_first_round_finishes' => $this->first_round_finishes,
 | 
						|
    ]);
 | 
						|
    $node->status = 1;
 | 
						|
    $node->enforceIsNew();
 | 
						|
 | 
						|
    try {
 | 
						|
      $node->save();
 | 
						|
      \Drupal::logger('ufc')->notice("$title created successfully.");
 | 
						|
    } catch (e) {
 | 
						|
      \Drupal::logger('ufc')->error("Unable to create new plyer node for $title.");
 | 
						|
    }
 | 
						|
 | 
						|
    $node->save();
 | 
						|
 | 
						|
  }
 | 
						|
 | 
						|
  /**
 | 
						|
   * Updates a player node.
 | 
						|
   *
 | 
						|
   * @param $nid
 | 
						|
   * @return void
 | 
						|
   */
 | 
						|
  public function updatePlayerNode($nid): void {
 | 
						|
    $node_storage = \Drupal::entityTypeManager()->getStorage('node');
 | 
						|
    $node = $node_storage->load($nid);
 | 
						|
    $node->field_wins = $this->wins;
 | 
						|
    $node->field_losses = $this->losses;
 | 
						|
    $node->field_ties = $this->ties;
 | 
						|
    $node->field_striking_accuracy = $this->striking_accuracy;
 | 
						|
    $node->field_grappling_accuracy = $this->grappling_accuracy;
 | 
						|
    $node->field_strikes_per_minute = $this->strikes_per_min;
 | 
						|
    $node->field_absorbed_per_min = $this->absorbed_per_min;
 | 
						|
    $node->field_takedowns_per_15 = $this->takedowns_per_15;
 | 
						|
    $node->field_knockdown_ratio = $this->knockdown_ratio;
 | 
						|
    $node->field_knockouts = $this->knockouts;
 | 
						|
    $node->field_decisions = $this->decisions;
 | 
						|
    $node->field_submissions = $this->submissions;
 | 
						|
    $node->field_age = $this->age;
 | 
						|
    $node->field_reach = $this->reach;
 | 
						|
    $node->field_leg_reach = $this->leg_reach;
 | 
						|
    $node->field_height = $this->height;
 | 
						|
    $node->field_weight = $this->weight;
 | 
						|
    $node->field_submission_avg_per_15 = $this->submission_avg_per_15;
 | 
						|
    $node->field_takedown_defense = $this->takedown_defense;
 | 
						|
    $node->field_sig_strike_defense = $this->sig_strike_defense;
 | 
						|
    $node->field_average_fight_time = $this->average_fight_time;
 | 
						|
    $node->field_standing_strikes = $this->standing_strikes;
 | 
						|
    $node->field_clinch_strikes = $this->clinch_strikes;
 | 
						|
    $node->field_ground_strikes = $this->ground_strikes;
 | 
						|
    $node->field_strikes_to_head = $this->strikes_to_head;
 | 
						|
    $node->field_strikes_to_body = $this->strikes_to_body;
 | 
						|
    $node->field_strikes_to_leg = $this->strikes_to_leg;
 | 
						|
    $node->field_first_round_finishes = $this->first_round_finishes;
 | 
						|
 | 
						|
    $node->save();
 | 
						|
  }
 | 
						|
 | 
						|
  /**
 | 
						|
   * Helper function to transform the weight class.
 | 
						|
   *
 | 
						|
   * @return void
 | 
						|
   */
 | 
						|
  public function transformWeightClass(): string {
 | 
						|
    $weight_class = $this->class;
 | 
						|
    $stripped = str_replace("_", " ", $weight_class);
 | 
						|
    $upper = ucwords($stripped);
 | 
						|
 | 
						|
    return $upper;
 | 
						|
  }
 | 
						|
 | 
						|
  /**
 | 
						|
   * Helper function to retrieve taxo term by name.
 | 
						|
   *
 | 
						|
   * @param int $term_name
 | 
						|
   * @return void
 | 
						|
   */
 | 
						|
  public function getTermByName($term_name): int {
 | 
						|
    // Get taxonomy term storage.
 | 
						|
    $taxonomyStorage = \Drupal::service('entity_type.manager')->getStorage('taxonomy_term');
 | 
						|
 | 
						|
    // Set name properties.
 | 
						|
    $properties = [];
 | 
						|
    $properties['name'] = $term_name;
 | 
						|
    $properties['vid'] = 'ufc_divisions';
 | 
						|
 | 
						|
    // Load taxonomy term by properties.
 | 
						|
    $terms = $taxonomyStorage->loadByProperties($properties);
 | 
						|
    $term = reset($terms);
 | 
						|
 | 
						|
    if (!$term) {
 | 
						|
      $new_term = Term::create([
 | 
						|
        'name' => $term_name,
 | 
						|
        'vid' => 'ufc_divisions'
 | 
						|
      ])->save();
 | 
						|
 | 
						|
      return $new_term;
 | 
						|
    };
 | 
						|
    return $term->id();
 | 
						|
  }
 | 
						|
 | 
						|
  /**
 | 
						|
   * Create media for the player headshot.
 | 
						|
   *
 | 
						|
   * @return void
 | 
						|
   */
 | 
						|
  public function createMediaEntityFromImage() {
 | 
						|
    $file_name = self::extractFileName();
 | 
						|
 | 
						|
    if (empty($file_name)) {
 | 
						|
      $rand = rand(0,100000000) . '.png';
 | 
						|
      $file_name = "default-headshot_$rand";
 | 
						|
    }
 | 
						|
 | 
						|
    if (!empty($this->image)) {
 | 
						|
      $file_data = file_get_contents($this->image);
 | 
						|
    }
 | 
						|
 | 
						|
    if (empty($file_data)) {
 | 
						|
      $file_data = file_get_contents("public://player-headshots/default-headshot.jpeg");
 | 
						|
    }
 | 
						|
 | 
						|
    $file = \Drupal::service('file.repository')->writeData($file_data, "public://player-headshots/$file_name", FileSystemInterface::EXISTS_RENAME);
 | 
						|
    $media_image = Media::create([
 | 
						|
      'bundle' => 'image',
 | 
						|
      'name' => $file_name,
 | 
						|
      'field_media_image' => [
 | 
						|
        'target_id' => $file->id(),
 | 
						|
      ],
 | 
						|
    ]);
 | 
						|
 | 
						|
    $media_image->save();
 | 
						|
    $this->image_id = $media_image->id();
 | 
						|
  }
 | 
						|
 | 
						|
  /**
 | 
						|
   * Helper to extract file name for images.
 | 
						|
   *
 | 
						|
   * @return void
 | 
						|
   */
 | 
						|
  public function extractFilename() {
 | 
						|
    if (!empty($this->image)) {
 | 
						|
      preg_match('/[A-Z_0-9]*.png/', $this->image, $matches);
 | 
						|
      $file_name = reset($matches);
 | 
						|
      return $file_name;
 | 
						|
    }
 | 
						|
    return FALSE;
 | 
						|
  }
 | 
						|
 | 
						|
}
 |