Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
57 changes: 57 additions & 0 deletions first_cycling_api/combi.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on Sun May 14 15:39:13 2023

@author: maxime
"""
from .race.race import RaceEdition

def combi_results_startlist(race_id, year,**kwargs):
try:
r=RaceEdition(race_id=race_id,year=year)
t=r.results(**kwargs)

if t is None or ("results_table" in t.__dir__() and t.results_table is None):
#case of race not completed yet
r=RaceEdition(race_id=race_id,year=year)
kwargs.update(stage_num=1)
t=r.results(**kwargs)
if t is None or ("results_table" in t.__dir__() and (t.results_table is None or not "Inv name" in t.results_table.columns)):
#fallback TTT
kwargs.update(stage_num=2)
t=r.results(**kwargs)

if "results_table" in t.__dir__():
results_table=t.results_table
else:
results_table=t

print(results_table)
print("Inv name" in results_table.columns)

start_list=r.startlist()

""" Convert HTML table from bs4 to pandas DataFrame. Return None if no data. """
# TODO for rider results, format dates nicely with hidden column we are throwing away

if "Inv name" in results_table.columns:
for i in results_table.index:
try:
results_table.loc[i,"BIB"]=start_list.bib_df.loc[results_table.loc[i,"Inv name"]]["BIB"]
except:
print(results_table.loc[i,"Inv name"] + " not found in the start list")
results_table.loc[i,"BIB"]=0
t.results_table=results_table
else:
print("No Inv name in results_table, the stage may be a TTT")
return None

return t
except Exception as msg:
import sys
_, _, exc_tb = sys.exc_info()
print("line " + str(exc_tb.tb_lineno))
print(msg)


18 changes: 17 additions & 1 deletion first_cycling_api/parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -75,18 +75,34 @@ def parse_table(table):
out_df[col] = out_df[col].astype(str).str.replace('.', '', regex=False).astype(int)

# Parse soup to add information hidden in tags/links

headers = [th.text for th in table.find_all('th')]
trs = [tr for tr in table.find_all('tr') if tr.th is None]

if 'Race.1' in out_df:
out_df = out_df.rename(columns={'Race': 'Race_Country', 'Race.1': 'Race'})
headers.insert(headers.index('Race'), 'Race_Country')


for col in out_df.columns: #problems with \nRider\n
if "Rider" in col:
out_df = out_df.rename(columns={col: 'Rider'})
break
for i, col in enumerate(headers): #problems with \nRider\n
if "Rider" in col:
headers[i]='Rider'
break

soup_df = pd.DataFrame([tr.find_all('td') for tr in trs], columns=headers)

# Add information hidden in tags
for col, series in soup_df.items():
if col in ('Rider', 'Winner', 'Second', 'Third'):
if col =="Rider":
out_df["Rider"]=out_df["Rider"].str.replace("[*]","",regex=False)
out_df["Rider"]=out_df["Rider"].str.replace("*","",regex=False)
out_df["Rider"]=out_df["Rider"].str.replace(" "," " ,regex=False)
out_df["Inv name"]=out_df["Rider"].str.lower()

out_df[col + '_ID'] = series.apply(lambda td: rider_link_to_id(td.a))
try:
out_df[col + '_Country'] = series.apply(lambda td: img_to_country_code(td.img))
Expand Down
53 changes: 45 additions & 8 deletions first_cycling_api/race/endpoints.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
from ..endpoints import ParsedEndpoint
from ..parser import parse_table

import pandas as pd

class RaceEndpoint(ParsedEndpoint):
"""
Expand Down Expand Up @@ -48,7 +48,10 @@ def _get_victory_table(self):
victory_table = self.soup.find('table', {'class': 'tablesorter'})
self.table = parse_table(victory_table)


class Standing():
def __init__(self, results_table):
self.results_table=results_table

class RaceStageVictories(RaceEndpoint):
"""
Race stage victory table response. Extends RaceEndpoint.
Expand Down Expand Up @@ -86,14 +89,48 @@ def _parse_soup(self):
self._get_sidebar_information()

def _get_results_table(self):
results_table = self.soup.find('table', {'class': 'sortTabell'})
results_table = self.soup.find('table', {'class': 'sortTabell tablesorter'})
if not results_table:
results_table = self.soup.find('table', {'class': 'sortTabell2'})
self.results_table = parse_table(results_table)

# Load all classification standings after stage
divs = self.soup.find_all('div', {'class': "tab-content"})
self.standings = {div['id']: parse_table(div.table) for div in divs}
results_table = self.soup.find('table', {'class': 'sortTabell2 tablesorter'})

if results_table: #old race type
self.results_table = parse_table(results_table)

# Load all classification standings after stage
divs = self.soup.find_all('div', {'class': "tab-content dummy"})
self.standings = {div['id']: Standing(parse_table(div.table)) for div in divs} #may not work and require the use of l=classification num

else: #new race type
divs = self.soup.find_all('div', {'class': "tab-content"}) #includes also tab-content results
self.standings= {div['id']: Standing(parse_table(div.table)) for div in divs}

self.results_table = self.standings[divs[0]['id']].results_table #first appearing is the result


def _get_sidebar_information(self): # TODO
return

class RaceEditionStartlist(RaceEndpoint):
def _parse_soup(self):
super()._parse_soup()
self._get_results_table()

def _get_results_table(self):
tables = self.soup.find_all('table', {'class': 'tablesorter'})

arr=[]

for t in tables:
sub_df=pd.read_html(str(t), decimal=',')[0]
sub_df.columns=["BIB","Inv name"]
sub_df["Inv name"]=sub_df["Inv name"].str.lower()
sub_df["Inv name"]=sub_df["Inv name"].str.replace("[*]","",regex=False)
sub_df["Inv name"]=sub_df["Inv name"].str.replace(" *","",regex=False)
sub_df["Inv name"]=sub_df["Inv name"].str.replace("*","",regex=False)
sub_df["Inv name"]=sub_df["Inv name"].str.replace(" "," " ,regex=False)

arr.append(sub_df)

bib_df =pd.concat(arr)
self.bib_df = bib_df.set_index(bib_df["Inv name"])
4 changes: 2 additions & 2 deletions first_cycling_api/race/race.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
from ..objects import FirstCyclingObject
from .endpoints import RaceEndpoint, RaceVictoryTable, RaceStageVictories, RaceEditionResults
from .endpoints import RaceEndpoint, RaceVictoryTable, RaceStageVictories, RaceEditionResults, RaceEditionStartlist
from ..api import fc
from ..constants import Classification

Expand Down Expand Up @@ -161,7 +161,7 @@ def startlist(self):
-------
RaceEndpoint
"""
return self._get_endpoint(k=8)
return self._get_endpoint(k=8,endpoint=RaceEditionStartlist)

def startlist_extended(self):
"""
Expand Down
83 changes: 83 additions & 0 deletions tests/test_combi.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,83 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on Sun May 14 16:26:19 2023

@author: maxime
"""

from first_cycling_api.combi import combi_results_startlist
import numpy as np

def test_combi_2019_amstel():
t = combi_results_startlist(9,2019)

assert len(t.results_table) == 175
assert t.results_table['Rider'].iloc[0] == 'van der Poel Mathieu'
assert t.results_table['BIB'].iloc[0] ==181

def test_2022_TdF():
t = combi_results_startlist(17,2022)

assert len(t.results_table) == 176
assert t.results_table['Rider'].iloc[0] == 'Vingegaard Jonas'
assert t.results_table['BIB'].iloc[0] == 18

t = combi_results_startlist(17,2022,classification_num=1)
assert len(t.results_table) == 176
assert t.results_table['Rider'].iloc[0] == 'Vingegaard Jonas'
assert t.results_table['BIB'].iloc[0] == 18

t = combi_results_startlist(17,2022,classification_num=2)
assert len(t.results_table) == 26
assert t.results_table['Rider'].iloc[0] == "Pogacar Tadej"
assert t.results_table['Time'].iloc[0] == "79:36:03"



def test_combi_2023_itzulia():
t = combi_results_startlist(14244,2023,stage_num=1)

assert len(t.results_table) == 113
assert t.results_table['Rider'].iloc[0] == 'Vollering Demi'
assert t.results_table['BIB'].iloc[0] ==1

assert 'gc' in t.standings
assert 'point' in t.standings
assert 'mountain' in t.standings
assert 'youth' in t.standings

t = combi_results_startlist(14244,2023,stage_num=1,classification_num=1)
assert t.results_table['Rider'].iloc[0] == 'Vollering Demi'
assert t.results_table['BIB'].iloc[0] ==1

t = combi_results_startlist(14244,2023,stage_num=1,classification_num=3)
assert t.results_table['Rider'].iloc[0] == 'Vollering Demi'
assert t.results_table['BIB'].iloc[0] ==1

def test_combi_2023_gracia():
t = combi_results_startlist(9549,2023,stage_num=3)

assert len(t.results_table) == 128
assert t.results_table['Rider'].iloc[0] == 'Rissveds Jenny'
assert t.results_table['BIB'].iloc[0] ==73

assert 'gc' in t.standings
assert 'point' in t.standings
assert 'mountain' in t.standings
assert 'youth' in t.standings

#t = combi_results_startlist(9549,2023,stage_num=3,classification_num=3)
#assert t.results_table['Rider'].iloc[0] == 'Wlodarczyk Dominika'

def test_giro_donne_2001():
t = combi_results_startlist(9064,2001,stage_num=1)
assert len(t.results_table) == 10

#t = combi_results_startlist(9064,2001,stage_num=1,classification_num=3) #not existing
#assert t==None





Loading