This commit is contained in:
Dan Chadwick
2024-04-09 01:47:04 +00:00
parent 3bcbe3b783
commit 3cfd95ee81
219 changed files with 47894 additions and 3767 deletions

View File

@@ -0,0 +1,211 @@
<?php
namespace Drupal\ufc\Controller;
use Drupal\Core\Controller\ControllerBase;
use Drupal\Core\Entity\EntityTypeManager;
use Drupal\Core\StreamWrapper\StreamWrapperManager;
use Drupal\Core\Render\Renderer;
use Drupal\node\NodeInterface;
use Drupal\node\Entity\Node;
use Drupal\media\Entity\Media;
use Drupal\file\Entity\File;
use Drupal\Core\Cache\CacheableResponse;
use Symfony\Component\DependencyInjection\ContainerInterface;
// use Symfony\Component\HttpFoundation\Response;
// use Symfony\Component\HttpKernel\Exception\NotFoundHttpException;
use Symfony\Component\HttpFoundation\Request;
class DcjsRouteController extends ControllerBase {
/**
* The entity type manager service.
*
* @var Drupal\Core\Entity\EntityTypeManager
* The entity type manager service.
*/
protected $entityTypeManager;
/**
* The renderer service.
*
* @var \Drupal\Core\Render\Renderer
* The renderer service.
*/
protected $renderer;
/**
* The Stream Wrapper Manager service.
*
* @var \Drupal\Core\StreamWrapper\StreamWrapperManager
* Stream wrapper manager service.
*/
protected $streamWrapperManager;
/**
* Public constructor.
*
* @var \Drupal\Core\Entity\EntityTypeManager $entityTypeManager
* The entity type manager service.
*
* @var \Drupal\Core\Render\Renderer $renderer
* The renderer service.
*
* @var \Drupal\Core\StreamWrapper\StreamWrapperManager $streamWrapperManager
* The stream wrapper manager service.
*
*/
public function __construct(
EntityTypeManager $entityTypeManager,
Renderer $renderer,
StreamWrapperManager $streamWrapperManager
) {
$this->entityTypeManager = $entityTypeManager;
$this->renderer = $renderer;
$this->streamWrapperManager = $streamWrapperManager;
}
/**
* {@inheritdoc}
*/
public static function create(ContainerInterface $container) {
// Instantiates this form class.
return new static(
// Load the service required to construct this class.
$container->get('entity_type.manager'),
$container->get('renderer'),
$container->get('stream_wrapper_manager'),
);
}
/**
* Not working right now.
*/
private function validateDcjsUserAgent(Request $request): bool {
$user_agent = $request->headers->get('user-agent');
if ($user_agent !== "DCJS Fetcher") {
return TRUE;
}
return TRUE;
}
/**
* Get all fighters to serve via DCJS.
*
* @return array<string,string>
*/
public function allFightersForDcjs(): CacheableResponse {
$query = $this->entityTypeManager->getStorage('node')->getQuery();
$query->accessCheck(TRUE);
$query->condition('type', 'fighter')->sort('title', 'ASC');
$query->range(0, 200);
$nids = $query->execute();
$all_fighters = Node::loadMultiple($nids);
$fighter_data = [];
foreach ($all_fighters as $fighter) {
$photo_mid = $fighter->field_player_photo->target_id;
$fighter_id = $fighter->id();
$link_to_content = "/node/$fighter_id";
$fighter_name = $fighter->title->value;
$photo_uri = $this->retrievePhotoUri($photo_mid);
$fighter_data[] = [
'link' => $link_to_content,
'name' => $fighter_name,
'picUrl' => $photo_uri,
];
}
$build = [
'#theme' => 'fighters_dcjs_list',
'#fighters' => $fighter_data,
];
return new CacheableResponse($this->renderer->renderPlain($build));
}
/**
* Retrieves an individual fighter.
*/
public function fighterForDcjs(NodeInterface $fighter_node): CacheableResponse {
$personal_fields = [
'age',
'division',
'height',
'weight',
'reach',
'leg_reach',
];
$stats_fields = [
'knockouts',
'striking_accuracy',
'strikes_per_minute',
'sig_strike_defense',
'absorbed_per_min',
'standing_strikes',
'clinch_strikes',
'ground_strikes',
'grappling_accuracy',
'strikes_to_head',
'strikes_to_body',
'strikes_to_leg',
'knockdown_ratio',
'takedowns_per_15',
'takedown_defense',
'average_fight_time',
'first_round_finishes',
];
$fighter_build = [
'#theme' => 'fighter_for_dcjs',
'#node' => $fighter_node,
'#personal_info' => $this->getFieldValuesFromNode($fighter_node, $personal_fields),
'#stats' => $this->getFieldValuesFromNode($fighter_node, $stats_fields),
];
$rendered = $this->renderer->renderPlain($fighter_build);
return new CacheableResponse($rendered);
}
/**
* Extracts target field values from a node.
*
* @param NodeInterface $node
* The node.
*
* @param array[] $retrieve
* The fields to retrieve
*
* @return array[]
*/
private function getFieldValuesFromNode(NodeInterface $node, array $retrieve): array {
$return_data = [];
foreach ($retrieve as $field_name) {
$field_name_with_prefix = "field_$field_name";
if ($field_name == 'division') {
$div_id = $node->get($field_name_with_prefix)->target_id;
$term = $this->entityTypeManager->getStorage('taxonomy_term')->load($div_id);
$return_data['division'] = $term->getName();
}
else {
$return_data[$field_name] = $node->{$field_name_with_prefix}->value;
}
}
return $return_data;
}
/**
* Retrieve URI for a player photo.
*/
private function retrievePhotoUri(int $media_id): string {
$player_fid = Media::load($media_id)->field_media_image->target_id;
$file = File::load($player_fid);
if ($wrapper = $this->streamWrapperManager->getViaUri($file->getFileUri())) {
return $wrapper->getExternalUrl();
}
}
}

View File

@@ -0,0 +1,220 @@
<?php
namespace Drupal\ufc\Controller;
use Drupal\Core\Controller\ControllerBase;
use Drupal\Core\Entity\EntityTypeManager;
use Drupal\node\NodeInterface;
use Drupal\node\Entity\Node;
use Drupal\media\Entity\Media;
use Drupal\file\Entity\File;
use Drupal\Core\Cache\CacheableJsonResponse;
use Symfony\Component\DependencyInjection\ContainerInterface;
use Symfony\Component\HttpKernel\Exception\NotFoundHttpException;
use Symfony\Component\HttpFoundation\Request;
class FightTrainingController extends ControllerBase {
/*
* Fields holding fighter data.
*/
protected $fields = [
'age',
'height',
'reach',
'leg_reach',
'knockouts',
'striking_accuracy',
'strikes_per_minute',
'sig_strike_defense',
'absorbed_per_min',
'standing_strikes',
'clinch_strikes',
'ground_strikes',
'grappling_accuracy',
'strikes_to_head',
'strikes_to_body',
'strikes_to_leg',
'knockdown_ratio',
'takedowns_per_15',
'takedown_defense',
'average_fight_time',
'first_round_finishes',
];
/**
* The entity type manager service.
*
* @var Drupal\Core\Entity\EntityTypeManager
* The entity type manager service.
*/
protected $entityTypeManager;
/**
* Public constructor.
*
* @var \Drupal\Core\Entity\EntityTypeManager $entityTypeManager
* The entity type manager service.
*
*/
public function __construct(
EntityTypeManager $entityTypeManager
) {
$this->entityTypeManager = $entityTypeManager;
}
/**
* {@inheritdoc}
*/
public static function create(ContainerInterface $container) {
// Instantiates this form class.
return new static(
// Load the service required to construct this class.
$container->get('entity_type.manager'),
);
}
/**
* Generates all fighting data for training NN.
*/
public function generateTrainingData(): CacheableJsonResponse {
// Go get all fights.
$all_fights = $this->entityTypeManager->getStorage('node')->loadByProperties(['type' => 'fight']);
$training_data = [];
foreach ($all_fights as $fight) {
$train_array = [
'input' => [],
'output' => [],
];
// Ensure we have a winner.
if (!$fight->field_result->target_id) {
continue;
}
// Extract fighters.
$fighter_one_id = $fight->field_fighter_one->target_id;
$fighter_two_id = $fight->field_fighter_two->target_id;
if (!$fighter_one_id || !$fighter_two_id) {
continue;
}
$fighter_one_data = $this->getFighterData($fighter_one_id);
$fighter_two_data = $this->getFighterData($fighter_two_id, FALSE);
if (empty($fighter_one_data) || empty($fighter_two_data)) {
continue;
}
$train_array['input'] = array_merge($fighter_one_data, $fighter_two_data);
if ($fight->field_result->target_id == $fighter_one_id) {
$train_array['output'] = [
'fighter_one' => 1,
'fighter_two' => 0,
];
$training_data[] = $train_array;
}
else if ($fight->field_result->target_id == $fighter_two_id) {
$train_array['output'] = [
'fighter_one' => 0,
'fighter_two' => 1,
];
$training_data[] = $train_array;
}
else {
continue;
}
}
return new CacheableJsonResponse($training_data);
}
/**
* Retrieves data about a specific fight for predictions.
*/
public function getFightData(NodeInterface $fight): CacheableJsonResponse {
$fighter_1_id = $fight->field_fighter_one->target_id;
$fighter_2_id = $fight->field_fighter_two->target_id;
$fight_data = array_merge(
$this->getFighterData($fighter_1_id),
$this->getFighterData($fighter_2_id, FALSE),
);
return new CacheableJsonResponse($fight_data);
}
/**
* Gets the fighter data.
*
* @return array<string,mixed>
*/
private function getFighterData(int $id, bool $is_f1 = TRUE): array {
if ($is_f1) {
$prefix = 'fighter_one_';
}
else {
$prefix = 'fighter_two_';
}
$extracted_values = $this->extractValuesFromFields($id, $this->fields, $prefix);
return $this->normalizeData($extracted_values);
}
/**
* Extracts a value from a given field (cannot be ent reference)
*/
private function extractValuesFromFields(int $id, array $field_names, string $prefix): mixed {
$fighter = Node::load($id);
$values = [];
foreach ($field_names as $field) {
$value_key = $prefix . $field;
$field_machine_name = 'field_' . $field;
$values[$value_key] = $fighter->{$field_machine_name}->value ?? 0;
}
return $values;
}
/**
* Normalize the field value.
*
* This needs to be between 0-1
*/
private function normalizeData(array $data): array {
$min = min(array_values($data));
$max = max(array_values($data));
$normalized = [];
foreach ($data as $key => $value) {
$norm_val = 0;
if ($max - $min == 0) {
$normalized[$key] = $norm_val;
}
else {
$norm_val = ($value - $min) / ($max - $min);
$normalized[$key] = $norm_val;
}
}
return $normalized;
}
/**
* Gets the trained neural network.
*/
public function getNeuralNetwork(): CacheableJsonResponse {
$build = [];
$cur_network = \Drupal::state()->get('neuralNetwork') ?? FALSE;
if (!$cur_network) {
$build['ERROR'] = "There is no spoon.";
}
else {
$build['data'] = base64_decode($cur_network);
}
return new CacheableJsonResponse($build);
}
}

View File

@@ -1,313 +0,0 @@
<?php
namespace Drupal\ufc;
use Symfony\Component\DependencyInjection\ContainerInterface;
use Drupal\ufc\Fighter;
use Drupal\node\Entity\Node;
use Drupal\Core\Entity\EntityTypeManager;
use Drupal\Core\Config\ConfigFactory;
class FightPredictor {
/**
* First Fighter.
*
* @var Drupal\node\Entity\Node
*/
protected $fighterOne;
/**
* Second Fighter.
*
* @var Drupal\node\Entity\Node
*/
protected $fighterTwo;
/**
* Entity Type Manager.
*
* @var Drupal\Core\Entity\EntityTypeManager
*/
protected $entityTypeManager;
/**
* Config for ufc weights.
*
* @var Drupal\Core\Config\ConfigFactory
*/
protected $config;
/**
* Weights config object.
*
* @var Drupal\Core\Config\ConfigFactory
*/
protected $weights;
/**
* Public constructor.
*/
public function __construct(EntityTypeManager $entityTypeManager, ConfigFactory $config) {
$this->entityTypeManager = $entityTypeManager;
$this->config = $config->getEditable('ufc.weights');
$this->weights = $config->getEditable('ufc.weights')->get('fight_weights');
}
/**
* The results of the fight.
*
* @var array
*/
protected $results = [
'fighter_one' => [
'name' => null,
'points' => 0,
'advantages' => [],
'disadvantages' => []
],
'fighter_two' => [
'name' => null,
'points' => 0,
'advantages' => [],
'disadvantages' => []
],
'skips' => 0
];
/**
* The amount of categories skipped.
*
* @var int
*/
protected $skipCount;
/**
* Calculate the fight results.
*/
public function calculate(Node $fighter_one, Node $fighter_two) {
// Reset skip count.
$this->skipCount = 0;
// First Fighter.
$this->fighterOne = $fighter_one;
// Second Fighter.
$this->fighterTwo = $fighter_two;
// Run the fight.
$this->runFight();
// Label the results array.
$this->results['fighter_one']['name'] = $fighter_one->label();
$this->results['fighter_two']['name'] = $fighter_two->label();
// Add total skips.
$this->results['skips'] = $this->skipCount;
// echo "<pre>";
// print_r($this->results);
// echo "</pre>";
// exit();
return $this->results;
}
/**
* Populate fight results.
*
* @return void
*/
public function runFight() {
$points = 0;
$this->calcComparativePoints();
}
/**
* Calculate comparative points.
*
* @return void
*/
public function calcComparativePoints() {
$this->calculateDiff('field_height', 'height');
$this->calculateDiff('field_weight', 'weight');
$this->calculateDiff('field_age', 'age');
$this->calculateDiff('field_reach', 'reach');
$this->calculateDiff('field_leg_reach', 'leg_reach');
$this->calculateDiff('field_wins', 'wins');
$this->calculateDiff('field_losses', 'losses', TRUE);
$this->calculateDiff('field_ties', 'ties');
$this->calculateDiff('field_decisions', 'decisions');
$this->calculateDiff('field_knockouts', 'knockouts');
$this->calculateDiff('field_submissions', 'submissions');
$this->calculateDiff('field_grappling_accuracy', 'grappling_accuracy');
$this->calculateDiff('field_striking_accuracy', 'striking_accuracy');
$this->calculateDiff('field_strikes_per_minute', 'strikes_per_minute');
$this->calculateDiff('field_absorbed_per_min', 'absorbed_per_min', TRUE);
$this->calculateDiff('field_takedowns_per_15', 'takedowns_per_15');
$this->calculateDiff('field_knockdown_ratio', 'knockdown_ratio');
}
/**
* Calculate the weighted diff between two fighters.
*
* @param [type] $field
* @param [type] $weight_label
* @return void
*/
public function calculateDiff($field, $weight_label, $inverse = FALSE) {
$f1 = $this->getFieldValue($field, $this->fighterOne);
$f2 = $this->getFieldValue($field, $this->fighterTwo);
if (empty($f1) || empty($f2)) {
$this->skipCount += 1;
return;
}
$weighted_points = 1 * $this->config->get('fight_weights')[$weight_label];
if ($f1 === $f2) {
$this->skipCount += 1;
return;
}
if ($inverse) {
if ($f1 < $f2) {
$this->results['fighter_one']['points'] += $weighted_points;
$this->results['fighter_one']['advantages'][] = $field;
$this->results['fighter_two']['disadvantages'][] = $field;
return;
}
elseif ($f1 > $f2) {
$this->results['fighter_two']['points'] += $$weighted_points;
$this->results['fighter_two']['advantages'][] = $field;
$this->results['fighter_one']['disadvantages'][] = $field;
return;
}
}
else {
if ($f1 > $f2) {
$this->results['fighter_one']['points'] += $weighted_points;
$this->results['fighter_one']['advantages'][] = $field;
$this->results['fighter_two']['disadvantages'][] = $field;
}
elseif ($f1 < $f2) {
$this->results['fighter_two']['points'] += $weighted_points;
$this->results['fighter_two']['advantages'][] = $field;
$this->results['fighter_one']['disadvantages'][] = $field;
}
}
}
/**
* Get value from a field.
*/
public function getFieldValue($field, Node $fighter) {
$field_values = reset($fighter->get($field)->getValue());
if (empty($field_values['value'])) {
return 0;
}
return (float) $field_values['value'];
}
/**
* Calculate the knockout percentage for a fighter.
*
* @param Node $fighter
* @return void
*/
public function getKnockoutPercentage(Node $fighter) {
$total_fights =
$this->getFieldValue('field_wins', $fighter) +
$this->getFieldValue('field_losses', $fighter) +
$this->getFieldValue('field_ties', $fighter);
$knockout_pct = $this->getFieldValue('field_knockouts', $fighter) / $total_fights;
return $knockout_pct;
}
/**
* Update predictions for all fights in batches
*/
public function updatePredictionsBatched($fights, &$context) {
// $fights = $this->entityTypeManager->getStorage('node')->loadByProperties(['type' => 'fight']);
$results = [];
foreach ($fights as $fight) {
\Drupal::logger('ufc')->notice("Starting " . $fight->label() . "");
$fight->save();
$results[] = $fight->id();
}
$context['results'] = $results;
}
/**
* Update predictions for all fights.
*/
public function updatePredictions() {
$fights = $this->entityTypeManager->getStorage('node')->loadByProperties(['type' => 'fight']);
// $count = 0;
foreach ($fights as $fight) {
\Drupal::logger('ufc')->notice("Starting " . $fight->label() . "");
$fight->save();
// if ($count == 4) {
// return;
// }
// $count++;
}
}
public function adjustWeights($adv, $dis_adv) {
foreach ($adv as $cat_to_increase) {
$clean = str_replace('field_', '', $cat_to_increase);
$this->adjustWeight($clean, 2);
}
foreach ($dis_adv as $cat_to_decrease) {
$clean = str_replace('field_', '', $cat_to_decrease);
$this->adjustWeight($clean, -1);
}
}
/**
* Helper function to increase the weight of a category.
*/
public function adjustWeight($key, $increment) {
$current_val = $this->config->get('fight_weights')[$key];
$increase = $current_val + $increment;
$target = 'fight_weights.' . $key;
$this->config->set($target, $increase)->save();
}
/**
* Helper function to reset all weighting.
*
* @return void
*/
public function resetWeights() {
foreach ($this->config->get('fight_weights') as $cat => $weight) {
$this->config->set('fight_weights.' . $cat, 1)->save();
}
}
public function fightUpdatedCallback($success, $results, $operations) {
// The 'success' parameter means no fatal PHP errors were detected. All
// other error management should be handled using 'results'.
if ($success) {
$message = \Drupal::translation()->formatPlural(
count($results),
'One fight processed.', '@count fight processed.'
);
}
else {
$message = t('Finished with an error.');
}
drupal_set_message($message);
}
}

View File

@@ -2,14 +2,23 @@
namespace Drupal\ufc;
use Drupal\Core\File\FileSystemInterface;
use Drupal\media\Entity\Media;
use Drupal\node\Entity\Node;
use Drupal\Component\Utility\Html;
use GuzzleHttp\Client;
use Drupal\taxonomy\Entity\Term;
use Drupal\ufc\Traits\NameConversionTrait;
use Drupal\ufc\Traits\ScraperHelperTrait;
use GuzzleHttp\Client;
use Symfony\Component\DomCrawler\Crawler;
class Fighter {
use NameConversionTrait;
use ScraperHelperTrait;
public $crawler;
public $http_client;
public $name;
public $first_name;
public $last_name;
@@ -24,7 +33,6 @@ class Fighter {
public $wins;
public $losses;
public $ties;
public $http_client;
public $fighter_page;
public $striking_accuracy;
public $grappling_accuracy;
@@ -35,6 +43,18 @@ class Fighter {
public $knockouts;
public $decisions;
public $submissions;
public $submission_avg_per_15;
public $takedown_defense;
public $sig_strike_defense;
public $average_fight_time;
public $standing_strikes;
public $clinch_strikes;
public $ground_strikes;
public $strikes_to_head;
public $strikes_to_body;
public $strikes_to_leg;
public $first_round_finishes;
/**
* Public constructor.
@@ -46,209 +66,96 @@ class Fighter {
}
/**
* Get fighters url from name.
* Parent method to scrape all data from fighter profile.
*
* @return string
* @return void
*/
public function getFighterUrl() {
// Manual overrides for name problems go here.
if ($this->first_name == 'Khaos') {
$this->first_name = 'Kalinn';
public function scrapeDataFromFighterPage(string $profile_uri): bool {
$url = "https://www.ufc.com$profile_uri";
$this->setFighterPage($url);
if (!$this->checkValidFighter()) {
return FALSE;
}
if ($this->first_name == 'J') {
$this->first_name = 'JP';
$this->last_name = 'Buys';
}
if ($this->last_name == 'Mc Kee') {
$this->last_name = 'McKee';
}
if ($this->last_name == 'Mc Gee') {
$this->last_name = 'McGee';
}
if ($this->last_name == 'Mc Gregor') {
$this->last_name = 'mcgregor';
}
if ($this->last_name == "O&#039; Malley") {
$this->last_name = 'Omalley';
}
if ($this->first_name == "Don&#039;") {
$this->first_name = 'dontale';
$this->last_name = 'mayes';
}
if ($this->first_name == "Marc-") {
$this->first_name = 'Marc';
}
if ($this->first_name == "A") {
$this->first_name = 'AJ';
$this->last_name = 'Dobson';
}
if ($this->first_name == "C") {
$this->first_name = 'CB';
$this->last_name = 'Dollaway';
}
if ($this->last_name == "Della Maddalena") {
$this->last_name = 'Della';
}
if ($this->first_name == "Elizeudos") {
$this->first_name = 'elizeu';
$this->last_name = 'dos-santos';
}
if ($this->last_name == "La Flare") {
$this->last_name = 'laflare';
}
if ($this->first_name == "JoelÁlvarez") {
$this->first_name = 'Joel';
}
if ($this->last_name == "J Brown") {
$this->first_name = 'TJ';
$this->last_name = 'Brown';
}
if ($this->first_name == "Alexda") {
$this->first_name = "alex-da";
}
if ($this->last_name == "Mc Kinney") {
$this->last_name = "mckinney";
}
if ($this->last_name == "Van Camp") {
$this->last_name = "vancamp";
}
if ($this->last_name == "J Laramie") {
$this->first_name = "TJ";
$this->last_name = "Laramie";
}
if ($this->last_name == "Al- Qaisi") {
$this->last_name = "alqaisi";
}
if ($this->first_name == "Alatengheili") {
$this->first_name = "heili";
$this->last_name = "alateng";
}
if ($this->last_name == "J Dillashaw") {
$this->first_name = "TJ";
$this->last_name = "Dillashaw";
}
if ($this->first_name == "Andersondos") {
$this->first_name = "Anderson-dos";
}
if ($this->last_name == "Silvade Andrade") {
$this->last_name = "Silva-de-andrade";
}
if ($this->first_name == "Ode&#039;") {
$this->first_name = "ode";
}
if ($this->first_name == "Sumudaerji") {
$this->first_name = "su";
$this->last_name = "mudaerji";
}
$hyphens = str_replace(" ", "-", $this->last_name);
$suffix = $this->first_name . "-" . $hyphens;
if ($this->first_name == "Aoriqileng") {
$suffix = $this->first_name;
}
$url = "https://www.ufc.com/athlete/$suffix";
$trim = rtrim($url);
return $url;
$this->setAge();
$this->scrapeBio();
$this->setFighterRecord();
$this->setAccuracy();
$this->setAverages();
$this->setWinsBreakdown();
$this->setStrikesByPosition();
return TRUE;
}
/**
* Get contents of the fighter page.
*
* @return void
*/
public function getFighterPage() {
public function setFighterPage(string $url): void {
try {
$request = $this->http_client->request('GET', $this->getFighterUrl(), ['verify' => FALSE]);
$request = $this->http_client->request('GET', $url, ['verify' => FALSE]);
$this->fighter_page = $request->getBody()->getContents();
$this->crawler = new Crawler($this->fighter_page);
} catch (\Exception $e) {
echo 'Caught exception: ', $e->getMessage(), "\n";
// exit();
}
}
/*
USE THIS FOR TESTING:
$player_url = "https://www.ufc.com/athlete/anthony-hamilton";
try {
$request = $this->http_client->request('GET', $player_url);
}
catch (\Exception $e) {
/**
* Checks is the fighters has stats to pull before proceeding.
*/
public function checkValidFighter(): bool {
$athlete_stats = $this->crawler->filter('h2.stats-records__title')->count();
if ($athlete_stats == 0) {
return FALSE;
}
$content = $request->getBody()->getContents();
*/
return TRUE;
}
/**
* Get fighter age.
*
* @return void
* @return bool
*/
public function getAge() {
$pattern = '/<div class="field field--name-age(.*)<\/div>/';
preg_match($pattern, $this->fighter_page, $matches);
preg_match_all('!\d+!', $matches[0], $age);
$fighter_age = reset($age[0]);
$this->age = (float) $fighter_age;
public function setAge(): void {
$age = $this->crawler->filter('.field--name-age')->innerText();
if (strlen($age) < 1) {
$this->age = 0;
return;
} else {
assert(ctype_digit($age));
$this->age = (int) $age;
}
}
public function getBio() {
$pattern = '/<div class="c-bio__text">[0-9]+\.[0-9]+/s';
preg_match_all($pattern, $this->fighter_page, $matches);
$matches = reset($matches);
// Get height.
$pattern = '/[0-9]+\.[0-9]+/';
preg_match($pattern, $matches[0], $height);
$this->height = reset($height);
// Get weight.
$pattern = '/[0-9]+\.[0-9]+/';
preg_match($pattern, $matches[1], $weight);
$this->weight = reset($weight);
// Get reach.
$pattern = '/[0-9]+\.[0-9]+/';
preg_match($pattern, $matches[2], $reach);
$this->reach = reset($reach);
// Get leg reach.
$pattern = '/[0-9]+\.[0-9]+/';
preg_match($pattern, $matches[3], $leg_reach);
$this->leg_reach = reset($leg_reach);
/**
* Gets data from the bio section of a fighter page.
*/
public function scrapeBio(): void {
$three_cols = $this->crawler->filter('.c-bio__row--3col');
$data = [];
if (!$three_cols) {
return;
}
foreach ($three_cols as $three_col) {
// Extract data from nodevalue.
$cleaned = str_replace([" ", "\n"], "", $three_col->nodeValue);
$pattern = '/[0-9]+\.[0-9]+/s';
preg_match_all($pattern, $cleaned, $matches);
$data[] = $matches;
}
$height_weight = $data[0][0] ?? FALSE;
$arm_leg_reach = $data[1][0] ?? FALSE;
if ($height_weight && count($height_weight) == 2) {
$this->height = (float) $height_weight[0];
$this->weight = (float) $height_weight[1];
}
if ($arm_leg_reach && count($arm_leg_reach) == 2) {
$this->reach = (float) $arm_leg_reach[0];
$this->leg_reach = (float) $arm_leg_reach[1];
}
}
/**
@@ -256,37 +163,46 @@ class Fighter {
*
* @return void
*/
public function getFighterRecord() {
$pattern = "/[0-9]+[-][0-9]+[-][0-9]+.\(W-L-D\)/";
preg_match($pattern, $this->fighter_page, $matches);
$r = reset($matches);
$r = str_replace(" (W-L-D)", "", $r);
$record_chunks = explode("-", $r);
$this->wins = $record_chunks[0];
$this->losses = $record_chunks[1];
$this->ties = $record_chunks[2];
public function setFighterRecord(): void {
$record = $this->crawler->filter(".hero-profile__division-body")->innerText();
// 20-3-0 (W-L-D)
$parts = explode(" ", $record);
$record_details = $parts[0] ?? FALSE;
if (!$record_details) {
return;
}
// 20-3-0
$record_chunks = explode("-", $record_details);
$this->wins = (int) $record_chunks[0];
$this->losses = (int) $record_chunks[1];
$this->ties = (int) $record_chunks[2];
}
/**
* Extracts the striking accuracy.
*
* @return void
*/
public function getStrikingAccuracy() {
$pattern = '/<title>Striking accuracy.(.*)<\/title>/';
preg_match($pattern, $this->fighter_page, $matches);
$this->striking_accuracy = str_replace('%', '', $matches[1]);
}
/**
* Extracts the grappling accuracy.
*
* @return void
*/
public function getGrapplingAccuracy() {
$pattern = '/<title>Grappling accuracy.(.*)<\/title>/';
preg_match($pattern, $this->fighter_page, $matches);
$this->grappling_accuracy = str_replace('%', '', $matches[1]);
public function setAccuracy(): void {
$circles = $this->crawler->filter('.e-chart-circle');
foreach ($circles as $circle) {
foreach ($circle->childNodes as $child) {
$text = strtolower($child->textContent);
if (str_contains($text, "accuracy")) {
$cir_title = $text;
}
}
if (!isset($cir_title)) {
continue;
}
if (str_contains($cir_title, "striking")) {
$this->striking_accuracy = $this->extractAccuracyFromString($cir_title);
}
if (str_contains($cir_title, "takedown")) {
$this->grappling_accuracy = $this->extractAccuracyFromString($cir_title);
}
}
}
/**
@@ -294,15 +210,19 @@ class Fighter {
*
* @return void
*/
public function getAverages() {
$pattern = '/<div class="c-stat-compare__group-1(.*)(<\/div>)/sU';
preg_match_all($pattern, $this->fighter_page, $matches);
$this->strikes_per_min = $this->extractNumber($matches[1][0]);
$this->takedowns_per_15 = $this->extractNumber($matches[1][1]);
$this->knockdown_ratio = $this->extractNumber($matches[1][3]);
$pattern = '/<div class="c-stat-compare__group-2(.*)(<\/div>)/sU';
preg_match_all($pattern, $this->fighter_page, $matches);
$this->absorbed_per_min = $this->extractNumber($matches[1][0]);
public function setAverages(): void {
$compare_wrappers = $this->crawler->filter(".c-stat-compare--no-bar");
if (is_null($compare_wrappers)) {
return;
}
foreach ($compare_wrappers as $compare_wrapper) {
if ($compare_wrapper && !property_exists($compare_wrapper, "childNodes")) {
continue;
}
foreach ($compare_wrapper->childNodes as $child) {
$this->extractAndSetAverage($child);
}
}
}
/**
@@ -310,12 +230,50 @@ class Fighter {
*
* @return void
*/
public function getWinsBreakdown() {
$pattern = '/<div class="c-stat-3bar__value">(.*)<\/div>/sU';
preg_match_all($pattern, $this->fighter_page, $matches);
$this->knockouts = $this->extractWinByType($matches[0][3]);
$this->decisions = $this->extractWinByType($matches[0][4]);
$this->submissions = $this->extractWinByType($matches[0][5]);
public function setWinsBreakdown(): void {
$athlete_stats = $this->crawler->filter('.hero-profile__stat-numb');
$stats = [];
foreach ($athlete_stats as $stat) {
$stats[] = (int) $stat->textContent;
}
if (count($stats) == 2) {
$this->knockouts = $stats[0] ?? 0;
$this->first_round_finishes = $stats[1] ?? 0;
}
if (count($stats) == 3) {
$this->knockouts = $stats[0] ?? 0;
$this->submissions = $stats[1] ?? 0;
$this->first_round_finishes = $stats[2] ?? 0;
}
$this->decisions = 0;
}
/**
* Extract strikes by position.
*/
public function setStrikesByPosition(): void {
$three_stats = $this->crawler->filter('.stats-records--three-column');
foreach ($three_stats as $three_stat) {
$text = strtolower($three_stat->textContent);
if (str_contains($text, "sig. str. by position")) {
foreach ($three_stat->childNodes as $child) {
if ($child->nodeName == 'div') {
foreach ($child->childNodes as $grandchild) {
$grand_text = strtolower($grandchild->textContent);
if (str_contains($grand_text, "standing")) {
$this->setStrikes($child->lastElementChild->nodeValue);
}
}
}
}
}
if (str_contains($text, "sig. str. by target")) {
$this->strikes_to_head = (int) $this->crawler->filter('text#e-stat-body_x5F__x5F_head_value')->innerText();
$this->strikes_to_body = (int) $this->crawler->filter('text#e-stat-body_x5F__x5F_body_value')->innerText();
$this->strikes_to_leg = (int) $this->crawler->filter('text#e-stat-body_x5F__x5F_leg_value')->innerText();
}
}
}
/**
@@ -324,31 +282,20 @@ class Fighter {
* @param [type] $string
* @return void
*/
protected function extractNumber($string) {
protected function extractNumber($string): string {
$no_tags = strip_tags($string);
preg_match_all('!\d+!', $no_tags, $matches);
$number = implode('.', $matches[0]);
return $number;
}
/**
* Helper to extract number from number and percentage.
*
* @param [type] $string
* @return void
*/
protected function extractWinByType($string) {
$no_tags = strip_tags($string);
preg_match_all('!\d+!', $no_tags, $matches);
return $matches[0][0];
}
/**
/**
* Creates a player node.
*
* @return void
*/
public function createPlayerNode() {
public function createPlayerNode(): void {
$division_id = self::transformWeightClass();
$title = $this->first_name . " " . $this->last_name;
$node = Node::create([
@@ -376,12 +323,31 @@ class Fighter {
'field_reach' => $this->reach,
'field_leg_reach' => $this->leg_reach,
'field_height' => $this->height,
'field_weight' => $this->weight
'field_weight' => $this->weight,
'field_submission_avg_per_15' => $this->submission_avg_per_15,
'field_takedown_defense' => $this->takedown_defense,
'field_sig_strike_defense' => $this->sig_strike_defense,
'field_average_fight_time' => $this->average_fight_time,
'field_standing_strikes' => $this->standing_strikes,
'field_clinch_strikes' => $this->clinch_strikes,
'field_ground_strikes' => $this->ground_strikes,
'field_strikes_to_head' => $this->strikes_to_head,
'field_strikes_to_body' => $this->strikes_to_body,
'field_strikes_to_leg' => $this->strikes_to_leg,
'field_first_round_finishes' => $this->first_round_finishes,
]);
$node->status = 1;
$node->enforceIsNew();
try {
$node->save();
\Drupal::logger('ufc')->notice("$title created successfully.");
} catch (e) {
\Drupal::logger('ufc')->error("Unable to create new plyer node for $title.");
}
$node->save();
}
/**
@@ -390,7 +356,7 @@ class Fighter {
* @param $nid
* @return void
*/
public function updatePlayerNode($nid) {
public function updatePlayerNode($nid): void {
$node_storage = \Drupal::entityTypeManager()->getStorage('node');
$node = $node_storage->load($nid);
$node->field_wins = $this->wins;
@@ -410,9 +376,19 @@ class Fighter {
$node->field_leg_reach = $this->leg_reach;
$node->field_height = $this->height;
$node->field_weight = $this->weight;
$node->field_submission_avg_per_15 = $this->submission_avg_per_15;
$node->field_takedown_defense = $this->takedown_defense;
$node->field_sig_strike_defense = $this->sig_strike_defense;
$node->field_average_fight_time = $this->average_fight_time;
$node->field_standing_strikes = $this->standing_strikes;
$node->field_clinch_strikes = $this->clinch_strikes;
$node->field_ground_strikes = $this->ground_strikes;
$node->field_strikes_to_head = $this->strikes_to_head;
$node->field_strikes_to_body = $this->strikes_to_body;
$node->field_strikes_to_leg = $this->strikes_to_leg;
$node->field_first_round_finishes = $this->first_round_finishes;
$node->save();
}
/**
@@ -420,24 +396,26 @@ class Fighter {
*
* @return void
*/
public function transformWeightClass() {
public function transformWeightClass(): string {
$weight_class = $this->class;
$stripped = str_replace("_", " ", $weight_class);
$upper = ucwords($stripped);
return $upper;
}
/**
* Helper function to retrieve taxo term by name.
*
* @param [type] $term_name
* @param int $term_name
* @return void
*/
public function getTermByName($term_name) {
public function getTermByName($term_name): int {
// Get taxonomy term storage.
$taxonomyStorage = \Drupal::service('entity.manager')->getStorage('taxonomy_term');
$taxonomyStorage = \Drupal::service('entity_type.manager')->getStorage('taxonomy_term');
// Set name properties.
$properties = [];
$properties['name'] = $term_name;
$properties['vid'] = 'ufc_divisions';
@@ -453,7 +431,6 @@ class Fighter {
return $new_term;
};
return $term->id();
}
@@ -475,13 +452,10 @@ class Fighter {
}
if (empty($file_data)) {
$file_data = file_get_contents("public://player-headshots/headshot-default.png");
$file_data = file_get_contents("public://player-headshots/default-headshot.jpeg");
}
$file = file_save_data(
$file_data,
"public://player-headshots/$file_name", FILE_EXISTS_RENAME);
$file = \Drupal::service('file.repository')->writeData($file_data, "public://player-headshots/$file_name", FileSystemInterface::EXISTS_RENAME);
$media_image = Media::create([
'bundle' => 'image',
'name' => $file_name,
@@ -508,4 +482,4 @@ class Fighter {
return FALSE;
}
}
}

View File

@@ -1,202 +0,0 @@
<?php
namespace Drupal\ufc;
use Drupal\Core\Entity\EntityTypeManager;
use Drupal\media\Media;
use Drupal\ufc\Fighter;
use GuzzleHttp\Client;
class FighterImporter {
protected $http_client;
protected $entity_type_manager;
protected $fighters = [];
protected $weight_class;
// The base url for fighter lists.
const UFC_BASE = "https://www.ufc.com/athletes/all?filters%5B0%5D=status%3A23&filters%5B1%5D=weight_class%3";
// These are the filter values used to cycle through divisions.
protected $divisions = [
'heavyweight' => "A11",
'light_heavyweight' => "A13",
'middleweight' => "A14",
'welterweight' => "A15",
'lightweight' => "A12",
'featherweight' => "A9",
'bantamweight' => "A8",
'flyweight' => "A10",
'strawweight' => "A99",
];
/**
* Public constructor for Fighter Importer.
*
* @param Client $httpClient
* @param EntityTypeManager $entityTypeManager
*/
public function __construct(Client $httpClient, EntityTypeManager $entityTypeManager) {
$this->http_client = $httpClient;
$this->entity_type_manager = $entityTypeManager;
}
public function importFighters() {
// This will populate fighters array.
$fighters_by_div = self::getListOfCurrentFighters();
// Process each fighter into system.
foreach ($fighters_by_div as $division => $fighters) {
foreach ($fighters as $fighter_data) {
$fighter = new Fighter($this->http_client);
$fighter->first_name = $fighter_data['firstname'];
$fighter->last_name = $fighter_data['lastname'];
$fighter->image = $fighter_data['image'];
$fighter->class = $division;
$fighter->getFighterPage();
$fighter->getAge();
$fighter->getBio();
$fighter->getFighterRecord();
$fighter->getStrikingAccuracy();
$fighter->getGrapplingAccuracy();
$fighter->getAverages();
$fighter->getWinsBreakdown();
$fighter->createMediaEntityFromImage();
// Check if node exists, by title.
$title = $fighter->first_name . " " . $fighter->last_name;
$node_lookup = reset($this->entity_type_manager->getStorage('node')->loadByProperties(['title' => $title]));
if (!empty($node_lookup)) {
// Update instead of create.
$fighter->updatePlayerNode($node_lookup->id());
\Drupal::logger('ufc')->notice($fighter_data['firstname'] . " " . $fighter_data['lastname'] . " updated successfully.");
}
else {
echo "didnt find an existing player, creating new for " . $title;
$fighter->createPlayerNode();
\Drupal::logger('ufc')->notice($fighter_data['firstname'] . " " . $fighter_data['lastname'] . " imported successfully.");
}
}
}
}
/**
* Get list of current fighters.
*
* @return array $fighters
*/
public function getListOfCurrentFighters() {
foreach ($this->divisions as $division => $div_base_url) {
$division_url = self::UFC_BASE . $div_base_url;
$this->weight_class = $division;
echo "Starting import for " . $division . "\n";
self::loopThroughFighterPages($division_url);
}
return $this->fighters;
}
/**
* There is a pager, loop through to get all fighters.
*
* @param string $base_url
*/
public function loopThroughFighterPages($base_url) {
for ($i=0; $i<=20; $i++) {
$url = $base_url . "&page=$i";
$request = $this->http_client->request('GET', $url, ['verify' => false]);
$content = $request->getBody()->getContents();
$check = strpos($content, "No Result Found For");
if (!$check) {
echo "extracting fighters from page $i \n";
self::extractFighters($content);
continue;
}
break;
}
}
/**
* Extract fighters from a page.
*/
public function extractFighters($input) {
$pattern = '/<span class="c-listing-athlete__name">(.*)<\/span>/sU';
preg_match_all($pattern, $input, $matches);
$fighter_names = [];
foreach ($matches[1] as $fighter_name) {
$fighter_names[] = trim(html_entity_decode($fighter_name));
}
$fighter_names = array_unique($fighter_names);
foreach ($fighter_names as $name) {
$name_no_spaces = str_replace(" ", "", $name);
$class_exists = array_key_exists($this->weight_class, $this->fighters);
$fighter_exists = null;
if ($class_exists) {
$fighter_exists = array_key_exists($name_no_spaces, $this->fighters[$this->weight_class]);
}
if (!$fighter_exists) {
$chunks = preg_split('/(?=[A-Z])/', $name_no_spaces);
$first_name = $chunks[1];
$last_name_size = count($chunks);
switch($last_name_size) {
case 3:
$last_name = $chunks[2];
break;
case 4:
$last_name = $chunks[2] . " " . $chunks[3];
break;
case 5:
$last_name = $chunks[2] . " " . $chunks[3] . " " . $chunks[4];
break;
}
$this->fighters[$this->weight_class][$name_no_spaces]['firstname'] = $first_name;
$this->fighters[$this->weight_class][$name_no_spaces]['lastname'] = $last_name;
}
// Get image paths.
$pattern = '/<div class="c-listing-athlete__thumbnail(.*)<\/div>/sU';
preg_match_all($pattern, $input, $matches);
$images = [];
foreach ($matches[0] as $html_img) {
$images[] = $this->extractImageSource($html_img);
}
$name = strtoupper($last_name . '_' . $first_name);
$fighter_image = $this->getFighterImageFromList($images, $name);
$this->fighters[$this->weight_class][$name_no_spaces]['image'] = $fighter_image;
}
}
// Get the fighter image.
public function extractImageSource($source_html) {
$pattern = '/<img[^>]+src="([^">]+)"/';
preg_match_all($pattern, $source_html, $image);
return $image[1];
}
// Extract a specific fighter image from a list.
public function getFighterImageFromList($list, $fighter) {
foreach ($list as $url_arr) {
foreach ($url_arr as $url) {
if (strpos($url, $fighter) !== FALSE) {
return $url;
}
}
}
return FALSE;
}
}

View File

@@ -1,54 +0,0 @@
<?php
namespace Drupal\ufc\Form;
use Drupal\Core\Form\FormBase;
use Drupal\Core\Form\FormStateInterface;
/**
* Class SelfHealKickoffForm to start self heal in batches.
*
* @package Drupal\ufc\Form
*/
class SelfHealKickoffForm extends FormBase {
/**
* {@inheritdoc}
*/
public function getFormId() {
return 'self_heal_kickoff_form';
}
/**
* {@inheritdoc}
*/
public function buildForm(array $form, FormStateInterface $form_state) {
$form['self_heal'] = array(
'#type' => 'submit',
'#value' => $this->t('Self Heal'),
);
return $form;
}
/**
* {@inheritdoc}
*/
public function submitForm(array &$form, FormStateInterface $form_state) {
$fights = \Drupal::service('entity_type.manager')->getStorage('node')->loadByProperties(['type' => 'fight']);
$batch = array(
'title' => t('Updating Fights...'),
'operations' => array(
array(
'\Drupal\ufc\FightPredictor::updatePredictionsBatched',
array($fights)
),
),
'finished' => '\Drupal\ufc\FightPredictor::fightUpdatedCallback',
);
batch_set($batch);
}
}

View File

@@ -0,0 +1,280 @@
<?php
namespace Drupal\ufc\Services;
use Drupal\Core\Datetime\DateFormatter;
use Drupal\Core\Entity\EntityTypeManager;
use Drupal\taxonomy\Entity\Term;
use Drupal\node\Entity\Node;
use GuzzleHttp\Client;
use Symfony\Component\DomCrawler\Crawler;
class FightImporter {
const EVENTS_BASE = "https://www.espn.com/mma/schedule/_/year/";
const EVENT_BASE = "https://www.espn.com";
/**
* The Guzzle HTTP Client.
*/
protected $httpClient;
/**
* The entity type manager.
*/
protected $entityTypeManager;
/**
* The date formatter.
*/
protected $dateFormatter;
/**
* Public constructor.
*
* @var \GuzzleHttp\Client $httpClient
* The guzzle http client.
*
* @var \Drupal\Core\Entity\EntityTypeManager
* The entity type manager.
*
* @var \Drupal\Core\Datetime\DateFormatter $dateFormatter
* The date formatter service.
*/
public function __construct(
Client $httpClient,
EntityTypeManager $entityTypeManager,
DateFormatter $dateFormatter
) {
$this->httpClient = $httpClient;
$this->entityTypeManager = $entityTypeManager;
$this->dateFormatter = $dateFormatter;
}
/**
* Import all events to taxonomy.
*/
public function importEvents(): void {
// First delete all events :-).
$this->removeExistingEvents();
// Old fashioned for loop to target years.
for ($i = 2000; $i <= 2024; $i++) {
$year_event_url = self::EVENTS_BASE . "{$i}/league/ufc";
$event_listing = $this->httpClient
->get($year_event_url)->getBody()->getContents();
$crawler = new Crawler($event_listing);
$events = $crawler->filter('.Schedule__EventLeague--ufc tbody tr');
foreach ($events as $event) {
$term_build = $this->processEvent($event, $i);
if (empty($term_build)) {
continue;
}
if ($this->shouldSkipEvent($term_build['name'])) {
\Drupal::logger('ufc')->warning("Skipping " . $term_build['name']);
continue;
}
if (Term::create($term_build)->save()) {
\Drupal::logger('ufc')->notice("Creating new term: " . $term_build['name']);
}
else {
\Drupal::logger('ufc')->alert("Unable to save new event.");
}
}
}
}
private function processEvent(\DOMElement $event, string $year): array {
if ($event->childElementCount !== 4) {
return [];
}
$term_build = [
'vid' => 'ufc_events'
];
for ($i = 0; $i < 4; $i++) {
if ($i === 0) {
$event_date = $this->convertDate($event->childNodes[0]->textContent . " $year");
$term_build['field_event_date'] = $event_date;
}
if ($i === 1) {
$name = $event->childNodes[1]->textContent;
$term_build['name'] = $name;
$event_url = $event->childNodes[1]->firstChild->getAttribute('href');
$term_build['field_event_url'] = $event_url;
}
}
return $term_build;
}
/**
* If this event should be skipped.
*/
private function shouldSkipEvent(string $term_name): bool {
$events_to_avoid = [
"Contender Series",
"The Ultimate Fighter",
"NEF: Fight Night",
"TUF Brazil",
];
foreach ($events_to_avoid as $avoid) {
if (str_contains($term_name, $avoid)) {
return TRUE;
}
}
return FALSE;
}
/*
* Converts a string into a database storable string.
*
* Output format: YYYY-MM-DD.
*
*/
private function convertDate(string $date_str): string {
return $this->dateFormatter->format(strtotime($date_str), 'custom', 'Y-m-d');
}
/**
* Clear out all prior events in the vocab.
*/
private function removeExistingEvents(): void {
\Drupal::logger('ufc')->notice("Removing all former events.");
$terms = $this->entityTypeManager->getStorage('taxonomy_term')->loadByProperties(['vid' => 'ufc_events']);
foreach ($terms as $term) {
$loaded = Term::load($term->id());
$term_name = $term->name->value;
\Drupal::logger('ufc')->notice("Removing $term_name.");
try {
$loaded->delete();
\Drupal::logger('ufc')->notice("Operation successful.");
} catch (\Exception $e) {
\Drupal::logger('ufc')->alert($e->getMessage());
}
}
}
/**
* Create fights from events.
*/
public function createFights(): void {
// Clear out past fights.
$existing_fights = $this->entityTypeManager->getStorage('node')->loadByProperties(['type' => 'fight']);
foreach ($existing_fights as $existing_fight) {
$delete = $existing_fight->delete();
if ($delete) {
\Drupal::logger('ufc')->notice("Removed " . $existing_fight->getTitle());
}
}
// Go get all events.
$all_events = $this->entityTypeManager->getStorage('taxonomy_term')
->loadByProperties(['vid' => 'ufc_events']);
foreach ($all_events as $event) {
$event_page_html = $this->httpClient
->get(self::EVENT_BASE . $event->field_event_url->uri)
->getBody()->getContents();
$crawler = new Crawler($event_page_html);
$fight_result_rows = $crawler->filter(".MMAGamestrip");
foreach ($fight_result_rows as $fight_result_row) {
$result = $this->processFightResultRow($fight_result_row);
if (empty($result)) {
continue;
}
$result['event'] = $event->id();
$this->createFightNodeFromResult($result);
}
}
}
/**
* Create fight node from results of extracted fight.
*/
private function createFightNodeFromResult(array $result): void {
// Result is fighter_1, fighter_2, winner -- all strings.
// Need a way to look up fighters by name.
$fighter_1_name = $result['fighter_1'];
$fighter_2_name = $result['fighter_2'];
$fight_winner = $result['winner'];
$fighter_1_id = $this->getFighterIdByName($fighter_1_name);
$fighter_2_id = $this->getFighterIdByName($fighter_2_name);
$fight_winner_id = $this->getFighterIdByName($fight_winner);
$fight = Node::create([
'type' => 'fight',
'title' => "$fighter_1_name vs. $fighter_2_name",
'field_fighter_one' => [
'target_id' => $fighter_1_id,
],
'field_fighter_two' => [
'target_id' => $fighter_2_id,
],
'field_event' => [
'target_id' => $result['event'],
],
'field_result' => [
'target_id' => $fight_winner_id,
],
]);
if ($fight->save()) {
\Drupal::logger('ufc')->notice("$fighter_1_name vs. $fighter_2_name Created");
}
else {
\Drupal::logger('ufc')->alert("$fighter_1_name vs. $fighter_2_name FAILED");
}
}
/**
* Retrieve a fighter ID by name.
*/
private function getFighterIdByName(string $name): int {
$existing_node = reset($this->entityTypeManager->getStorage('node')->loadByProperties(['title' => $name]));
if ($existing_node) {
return $existing_node->id();
}
return 0;
}
/**
* Iterate over fight result rows to extract results.
*/
private function processFightResultRow(\DOMElement $row): array {
$results = [
'winner' => 0,
];
$comp_crawler = new Crawler($this->getInnerHtml($row));
$competitors = $comp_crawler->filter(".MMACompetitor");
$fighter_num = 1;
// @todo - this is crazy. Maybe do another crawler instead.
foreach ($competitors as $competitor) {
foreach ($competitor->childNodes as $child) {
if ($child->tagName == 'div') {
foreach ($child->childNodes as $grandchild) {
foreach ($grandchild->childNodes as $gg_child) {
if ($gg_child->tagName == 'h2') {
$results["fighter_$fighter_num"] = $gg_child->textContent;
if ($competitor->childElementCount == 2) {
$results['winner'] = $gg_child->textContent;
}
$fighter_num++;
}
}
}
}
}
}
return $results;
}
/**
* Get the inner html from a DOMElement.
*/
private function getInnerHtml(\DOMElement $node) {
$innerHTML= '';
$children = $node->childNodes;
foreach ($children as $child) {
$innerHTML .= $child->ownerDocument->saveXML( $child );
}
return $innerHTML;
}
}

View File

@@ -0,0 +1,241 @@
<?php
namespace Drupal\ufc\Services;
use Drupal\Core\Entity\EntityTypeManager;
use Drupal\Core\Cache\CacheBackendInterface;
use Drupal\Core\Cache\Cache;
use Drupal\media\Media;
use Drupal\ufc\Fighter;
use GuzzleHttp\Client;
use Symfony\Component\DomCrawler\Crawler;
class FighterImporter {
/**
* Guzzle http client service.
* @var \GuzzleHttp\Client
*/
protected $httpClient;
/**
* Entity type manager service.
* @var \Drupal\Core\Entity\EntityTypeManager
*/
protected $entityTypeManager;
/**
* Array of all fighters.
* @var array[]
*/
protected $fighters = [];
/**
* The current weight class.
* @var string
*/
protected $weightClass;
/**
* The UFC cache bin.
*/
protected $cache;
/**
* The base url for fighter lists.
* @var string
*/
const UFC_BASE = "https://www.ufc.com/athletes/all?filters%5B0%5D=weight_class%3";
/**
* All applicable divisions and their keys on UFC.com.
* @var array[]
*/
protected $divisions = [
'heavyweight' => "A11",
'light_heavyweight' => "A13",
'middleweight' => "A14",
'welterweight' => "A15",
'lightweight' => "A12",
'featherweight' => "A9",
'bantamweight' => "A8",
'flyweight' => "A10",
];
/**
* Public constructor for Fighter Importer.
*
* @param Client $httpClient
* @param EntityTypeManager $entityTypeManager
* @param Cache $cache
*/
public function __construct(
Client $httpClient,
EntityTypeManager $entityTypeManager,
CacheBackendInterface $cache
) {
$this->httpClient = $httpClient;
$this->entityTypeManager = $entityTypeManager;
$this->cache = $cache;
}
public function importFighters(): void {
$test_run = FALSE;
if ($test_run) {
// Overriding to test!!!
$fighter_name_text_on_ufc = "
Georges St-Pierre
";
$fighter = new Fighter($this->httpClient);
$fighter->first_name = 'scott';
$fighter->last_name = 'adams';
$fighter->scrapeDataFromFighterPage();
$fighter_clone = clone ($fighter);
unset($fighter_clone->fighter_page);
unset($fighter_clone->crawler);
dump($fighter_clone);
}
else {
$fighters_by_div = self::getListOfCurrentFighters();
// Process each fighter into system.
foreach ($fighters_by_div as $division => $fighters) {
$this->processDivision($division, $fighters);
}
}
}
/**
* Process a division.
*
* @param mixed $div
* @param mixed $fighters
*/
private function processDivision($div, $fighters): void {
foreach ($fighters as $fighter_data) {
$fighter = new Fighter($this->httpClient);
$fighter->first_name = $fighter_data['firstname'];
$fighter->last_name = $fighter_data['lastname'];
$fighter->image = $fighter_data['image'];
$fighter->class = $div;
if (!$fighter->scrapeDataFromFighterPage($fighter_data['profile'])) {
\Drupal::logger('ufc')->alert("FAILED: $fighter->first_name $fighter->last_name to " . $fighter_data['profile']);
}
// Check if node exists, by title.
$fighter->createMediaEntityFromImage();
$title = $fighter->first_name . " " . $fighter->last_name;
$node_lookup = reset($this->entityTypeManager->getStorage('node')->loadByProperties(['title' => $title]));
if (!empty($node_lookup)) {
// Update instead of create.
$fighter->updatePlayerNode($node_lookup->id());
\Drupal::logger('ufc')->notice("$title updated successfully.");
}
else {
\Drupal::logger('ufc')->warning("No existing player found for $title...creating");
$fighter->createPlayerNode();
}
}
}
/**
* Get list of current fighters.
*
* @return array $fighters
*/
public function getListOfCurrentFighters(): array {
foreach ($this->divisions as $division => $div_base_url) {
$division_url = self::UFC_BASE . $div_base_url;
$this->weightClass = $division;
echo "Starting import for " . $division . "\n";
self::loopThroughFighterPages($division_url);
}
return $this->fighters;
}
/**
* There is a pager, loop through to get all fighters.
*
* @param string $base_url
*/
public function loopThroughFighterPages($base_url): void {
// Here you are Dan.
// Implement caching to store instead of needing fresh requests.
for ($i=0; $i<=100; $i++) {
$url = $base_url . "&page=$i";
$cid = "ufc:" . $url;
$request = $this->httpClient->request('GET', $url, ['verify' => false]);
$content = $request->getBody()->getContents();
$check = strpos($content, "No Result Found For");
if (!$check) {
\Drupal::logger('ufc')->notice("Extracting fighters from page $i.");
self::extractFighters($content);
continue;
}
break;
}
}
/**
* Extract fighters from an html string.
*/
public function extractFighters(string $input): void {
// @todo REBUILD THIS NOW!!
$fighter_list = [];
$crawler = new Crawler($input);
$athlete_flipcards = $crawler->filter('.c-listing-athlete-flipcard');
$fighter_names = $athlete_flipcards->each(function (Crawler $crawler, $i) {
return $crawler->filter('.c-listing-athlete__name')->text();
});
$fighter_profile_urls = $athlete_flipcards->each(function (Crawler $crawler, $i) {
return $crawler->filter('.e-button--black')->attr('href');
});
$fighter_images = $athlete_flipcards->each(function (Crawler $crawler, $i) {
$imgs = $crawler->filter('img')->each(function ($i) {
return $i->attr('src');
});
return $imgs;
});
$count_fighter_names = count($fighter_names);
$count_profile_urls = count($fighter_profile_urls);
$count_images = count($fighter_images);
// Make sure the arrays are all the same size.
assert((
($count_fighter_names == $count_profile_urls) &&
($count_profile_urls == $count_images)
));
foreach ($fighter_names as $key => $fighter) {
$fighter_names[$key] = [
'name' => $fighter,
'profile' => $fighter_profile_urls[$key],
'images' => $fighter_images[$key],
];
}
foreach ($fighter_names as $fighter_data) {
$name_no_spaces = str_replace(" ", "", $fighter_data['name']);
$weight_class_exists = array_key_exists($this->weightClass, $this->fighters);
$fighter_exists = NULL;
if ($weight_class_exists) {
$fighter_exists = array_key_exists($name_no_spaces, $this->fighters[$this->weightClass]);
}
if (!$fighter_exists) {
$split_name = explode(" ", $fighter_data['name'], 2);
$this->fighters[$this->weightClass][$name_no_spaces]['firstname'] = $split_name[0] ?? " ";
$this->fighters[$this->weightClass][$name_no_spaces]['lastname'] = $split_name[1] ?? " ";
$this->fighters[$this->weightClass][$name_no_spaces]['profile'] = $fighter_data['profile'];
}
// here you are dan, set the profile url of the fighter, then use that in parsing later on
if (!empty($fighter_data['images']) && count($fighter_data['images']) == 2) {
$this->fighters[$this->weightClass][$name_no_spaces]['image'] = $fighter_data['images'][0];
}
else {
$this->fighters[$this->weightClass][$name_no_spaces]['image'] = FALSE;
}
}
}
}

View File

@@ -0,0 +1,137 @@
<?php
namespace Drupal\ufc\Traits;
trait NameConversionTrait {
public function convertNames() {
// Manual overrides for name problems go here.
if ($this->first_name == 'Khaos') {
$this->first_name = 'Kalinn';
}
if ($this->first_name == "Aoriqileng") {
$suffix = $this->first_name;
}
if ($this->first_name == 'J') {
$this->first_name = 'JP';
$this->last_name = 'Buys';
}
if ($this->last_name == 'Mc Kee') {
$this->last_name = 'McKee';
}
if ($this->last_name == 'Mc Gee') {
$this->last_name = 'McGee';
}
if ($this->last_name == 'Mc Gregor') {
$this->last_name = 'mcgregor';
}
if ($this->last_name == "O&#039; Malley") {
$this->last_name = 'Omalley';
}
if ($this->first_name == "Don&#039;") {
$this->first_name = 'dontale';
$this->last_name = 'mayes';
}
if ($this->first_name == "Marc-") {
$this->first_name = 'Marc';
}
if ($this->first_name == "A") {
$this->first_name = 'AJ';
$this->last_name = 'Dobson';
}
if ($this->first_name == "C") {
$this->first_name = 'CB';
$this->last_name = 'Dollaway';
}
if ($this->last_name == "Della Maddalena") {
$this->last_name = 'Della';
}
if ($this->first_name == "Elizeudos") {
$this->first_name = 'elizeu';
$this->last_name = 'dos-santos';
}
if ($this->last_name == "La Flare") {
$this->last_name = 'laflare';
}
if ($this->first_name == "JoelÁlvarez") {
$this->first_name = 'Joel';
}
if ($this->last_name == "J Brown") {
$this->first_name = 'TJ';
$this->last_name = 'Brown';
}
if ($this->first_name == "Alexda") {
$this->first_name = "alex-da";
}
if ($this->last_name == "Mc Kinney") {
$this->last_name = "mckinney";
}
if ($this->last_name == "Van Camp") {
$this->last_name = "vancamp";
}
if ($this->last_name == "J Laramie") {
$this->first_name = "TJ";
$this->last_name = "Laramie";
}
if ($this->last_name == "Al- Qaisi") {
$this->last_name = "alqaisi";
}
if ($this->first_name == "Alatengheili") {
$this->first_name = "heili";
$this->last_name = "alateng";
}
if ($this->last_name == "J Dillashaw") {
$this->first_name = "TJ";
$this->last_name = "Dillashaw";
}
if ($this->first_name == "Andersondos") {
$this->first_name = "Anderson-dos";
}
if ($this->last_name == "Silvade Andrade") {
$this->last_name = "Silva-de-andrade";
}
if ($this->first_name == "Ode&#039;") {
$this->first_name = "ode";
}
if ($this->first_name == "Sumudaerji") {
$this->first_name = "su";
$this->last_name = "mudaerji";
}
if ($this->first_name == "Georges" && $this->last_name == "St- Pierre") {
$this->last_name = "st pierre";
}
}
}

View File

@@ -0,0 +1,132 @@
<?php
namespace Drupal\ufc\Traits;
trait ScraperHelperTrait {
/**
* Map to extract average from a blob of text, process, and set.
*/
public function extractAndSetAverage(\DOMText|\DOMElement $elem): void {
$child_content = strtolower($elem->textContent);
if (strlen($child_content) < 1) {
return;
}
$averages_map = [
"strikes_per_min" => [
"sig. str. landed",
"cleanFloatDatapoint",
],
"absorbed_per_min" => [
"sig. str. absorbed",
"cleanFloatDatapoint",
],
"takedowns_per_15" => [
"takedown avg",
"cleanFloatDatapoint",
],
"submission_avg_per_15" => [
"submission avg",
"cleanFloatDatapoint",
],
"sig_strike_defense" => [
"sig. str. defense",
"cleanIntDatapoint",
],
"takedown_defense" => [
"takedown defense",
"cleanIntDatapoint",
],
"knockdown_ratio" => [
"knockdown avg",
"cleanFloatDatapoint",
],
"average_fight_time" => [
"average fight time",
"convertAverageFightTimeToSeconds",
],
];
foreach ($averages_map as $avg_type => $handlers) {
// First item in $handlers is the string to search for.
// Second item is the cleaning function.
if (str_contains($child_content, $handlers[0])) {
/* @var DOMElement */
$this->{$avg_type} = $this->{$handlers[1]}($elem->firstElementChild->nodeValue);
}
}
}
/**
* Extracts accuracy number from a string.
*/
public function extractAccuracyFromString(string $string): float {
// Examples:
// Striking accuracy 65%
// Grappling Accuracy 60%
$split_on_space = explode(" ", $string);
$percentage = $split_on_space[2] ?? FALSE;
// If no percentage located, simply set to 0.
if (!$percentage) {
return 0.0;
}
$percentage = (float) trim(str_replace('%', '', $percentage));
assert(is_float($percentage));
return $percentage;
}
/**
* Pulls strike totals.
*/
public function setStrikes(string $input): void {
$clean = str_replace(["\n", '"'], "", $input);
$break_on_space = array_filter(explode(" ", $clean));
$data = [];
foreach ($break_on_space as $str) {
if (ctype_digit($str)) {
$data[] = $str;
}
}
if (!empty($data) && count($data) == 3) {
$this->standing_strikes = (int) $data[0];
$this->clinch_strikes = (int) $data[1];
$this->ground_strikes = (int) $data[2];
}
}
/**
* Converts time format (MM:SS) to seconds (s).
*/
public function convertAverageFightTimeToSeconds(string $fight_time): int {
$seconds = 0;
$mins_seconds = explode(':', $fight_time);
$seconds += ( (int) $mins_seconds[0] * 60) + (int) $mins_seconds[1];
assert(is_int($seconds));
return $seconds;
}
/**
* Cleans a float datapoint for insertion to the database.
*/
protected function cleanFloatDatapoint(string $datapoint): float {
$datapoint = str_replace("\n", "", $datapoint);
$datapoint = (float) trim($datapoint);
assert(is_float($datapoint));
return $datapoint;
}
/**
* Cleans an int datapoint for insertion to the database.
*/
protected function cleanIntDatapoint(string $datapoint): int {
$datapoint = str_replace("\n", "", $datapoint);
$datapoint = (int) trim($datapoint);
assert(is_int($datapoint));
return $datapoint;
}
}