diff --git a/first_cycling_api/combi.py b/first_cycling_api/combi.py new file mode 100644 index 0000000..3348871 --- /dev/null +++ b/first_cycling_api/combi.py @@ -0,0 +1,57 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- +""" +Created on Sun May 14 15:39:13 2023 + +@author: maxime +""" +from .race.race import RaceEdition + +def combi_results_startlist(race_id, year,**kwargs): + try: + r=RaceEdition(race_id=race_id,year=year) + t=r.results(**kwargs) + + if t is None or ("results_table" in t.__dir__() and t.results_table is None): + #case of race not completed yet + r=RaceEdition(race_id=race_id,year=year) + kwargs.update(stage_num=1) + t=r.results(**kwargs) + if t is None or ("results_table" in t.__dir__() and (t.results_table is None or not "Inv name" in t.results_table.columns)): + #fallback TTT + kwargs.update(stage_num=2) + t=r.results(**kwargs) + + if "results_table" in t.__dir__(): + results_table=t.results_table + else: + results_table=t + + print(results_table) + print("Inv name" in results_table.columns) + + start_list=r.startlist() + + """ Convert HTML table from bs4 to pandas DataFrame. Return None if no data. """ + # TODO for rider results, format dates nicely with hidden column we are throwing away + + if "Inv name" in results_table.columns: + for i in results_table.index: + try: + results_table.loc[i,"BIB"]=start_list.bib_df.loc[results_table.loc[i,"Inv name"]]["BIB"] + except: + print(results_table.loc[i,"Inv name"] + " not found in the start list") + results_table.loc[i,"BIB"]=0 + t.results_table=results_table + else: + print("No Inv name in results_table, the stage may be a TTT") + return None + + return t + except Exception as msg: + import sys + _, _, exc_tb = sys.exc_info() + print("line " + str(exc_tb.tb_lineno)) + print(msg) + + diff --git a/first_cycling_api/parser.py b/first_cycling_api/parser.py index a24d6ae..fe57ea1 100644 --- a/first_cycling_api/parser.py +++ b/first_cycling_api/parser.py @@ -75,18 +75,34 @@ def parse_table(table): out_df[col] = out_df[col].astype(str).str.replace('.', '', regex=False).astype(int) # Parse soup to add information hidden in tags/links + headers = [th.text for th in table.find_all('th')] trs = [tr for tr in table.find_all('tr') if tr.th is None] if 'Race.1' in out_df: out_df = out_df.rename(columns={'Race': 'Race_Country', 'Race.1': 'Race'}) headers.insert(headers.index('Race'), 'Race_Country') - + + for col in out_df.columns: #problems with \nRider\n + if "Rider" in col: + out_df = out_df.rename(columns={col: 'Rider'}) + break + for i, col in enumerate(headers): #problems with \nRider\n + if "Rider" in col: + headers[i]='Rider' + break + soup_df = pd.DataFrame([tr.find_all('td') for tr in trs], columns=headers) # Add information hidden in tags for col, series in soup_df.items(): if col in ('Rider', 'Winner', 'Second', 'Third'): + if col =="Rider": + out_df["Rider"]=out_df["Rider"].str.replace("[*]","",regex=False) + out_df["Rider"]=out_df["Rider"].str.replace("*","",regex=False) + out_df["Rider"]=out_df["Rider"].str.replace(" "," " ,regex=False) + out_df["Inv name"]=out_df["Rider"].str.lower() + out_df[col + '_ID'] = series.apply(lambda td: rider_link_to_id(td.a)) try: out_df[col + '_Country'] = series.apply(lambda td: img_to_country_code(td.img)) diff --git a/first_cycling_api/race/endpoints.py b/first_cycling_api/race/endpoints.py index d54bd5d..763add5 100644 --- a/first_cycling_api/race/endpoints.py +++ b/first_cycling_api/race/endpoints.py @@ -1,6 +1,6 @@ from ..endpoints import ParsedEndpoint from ..parser import parse_table - +import pandas as pd class RaceEndpoint(ParsedEndpoint): """ @@ -48,7 +48,10 @@ def _get_victory_table(self): victory_table = self.soup.find('table', {'class': 'tablesorter'}) self.table = parse_table(victory_table) - +class Standing(): + def __init__(self, results_table): + self.results_table=results_table + class RaceStageVictories(RaceEndpoint): """ Race stage victory table response. Extends RaceEndpoint. @@ -86,14 +89,48 @@ def _parse_soup(self): self._get_sidebar_information() def _get_results_table(self): - results_table = self.soup.find('table', {'class': 'sortTabell'}) + results_table = self.soup.find('table', {'class': 'sortTabell tablesorter'}) if not results_table: - results_table = self.soup.find('table', {'class': 'sortTabell2'}) - self.results_table = parse_table(results_table) - # Load all classification standings after stage - divs = self.soup.find_all('div', {'class': "tab-content"}) - self.standings = {div['id']: parse_table(div.table) for div in divs} + results_table = self.soup.find('table', {'class': 'sortTabell2 tablesorter'}) + + if results_table: #old race type + self.results_table = parse_table(results_table) + + # Load all classification standings after stage + divs = self.soup.find_all('div', {'class': "tab-content dummy"}) + self.standings = {div['id']: Standing(parse_table(div.table)) for div in divs} #may not work and require the use of l=classification num + + else: #new race type + divs = self.soup.find_all('div', {'class': "tab-content"}) #includes also tab-content results + self.standings= {div['id']: Standing(parse_table(div.table)) for div in divs} + + self.results_table = self.standings[divs[0]['id']].results_table #first appearing is the result + def _get_sidebar_information(self): # TODO return + +class RaceEditionStartlist(RaceEndpoint): + def _parse_soup(self): + super()._parse_soup() + self._get_results_table() + + def _get_results_table(self): + tables = self.soup.find_all('table', {'class': 'tablesorter'}) + + arr=[] + + for t in tables: + sub_df=pd.read_html(str(t), decimal=',')[0] + sub_df.columns=["BIB","Inv name"] + sub_df["Inv name"]=sub_df["Inv name"].str.lower() + sub_df["Inv name"]=sub_df["Inv name"].str.replace("[*]","",regex=False) + sub_df["Inv name"]=sub_df["Inv name"].str.replace(" *","",regex=False) + sub_df["Inv name"]=sub_df["Inv name"].str.replace("*","",regex=False) + sub_df["Inv name"]=sub_df["Inv name"].str.replace(" "," " ,regex=False) + + arr.append(sub_df) + + bib_df =pd.concat(arr) + self.bib_df = bib_df.set_index(bib_df["Inv name"]) \ No newline at end of file diff --git a/first_cycling_api/race/race.py b/first_cycling_api/race/race.py index d42d9bf..7ecb9ea 100644 --- a/first_cycling_api/race/race.py +++ b/first_cycling_api/race/race.py @@ -1,5 +1,5 @@ from ..objects import FirstCyclingObject -from .endpoints import RaceEndpoint, RaceVictoryTable, RaceStageVictories, RaceEditionResults +from .endpoints import RaceEndpoint, RaceVictoryTable, RaceStageVictories, RaceEditionResults, RaceEditionStartlist from ..api import fc from ..constants import Classification @@ -161,7 +161,7 @@ def startlist(self): ------- RaceEndpoint """ - return self._get_endpoint(k=8) + return self._get_endpoint(k=8,endpoint=RaceEditionStartlist) def startlist_extended(self): """ diff --git a/tests/test_combi.py b/tests/test_combi.py new file mode 100644 index 0000000..4ee034e --- /dev/null +++ b/tests/test_combi.py @@ -0,0 +1,83 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- +""" +Created on Sun May 14 16:26:19 2023 + +@author: maxime +""" + +from first_cycling_api.combi import combi_results_startlist +import numpy as np + +def test_combi_2019_amstel(): + t = combi_results_startlist(9,2019) + + assert len(t.results_table) == 175 + assert t.results_table['Rider'].iloc[0] == 'van der Poel Mathieu' + assert t.results_table['BIB'].iloc[0] ==181 + +def test_2022_TdF(): + t = combi_results_startlist(17,2022) + + assert len(t.results_table) == 176 + assert t.results_table['Rider'].iloc[0] == 'Vingegaard Jonas' + assert t.results_table['BIB'].iloc[0] == 18 + + t = combi_results_startlist(17,2022,classification_num=1) + assert len(t.results_table) == 176 + assert t.results_table['Rider'].iloc[0] == 'Vingegaard Jonas' + assert t.results_table['BIB'].iloc[0] == 18 + + t = combi_results_startlist(17,2022,classification_num=2) + assert len(t.results_table) == 26 + assert t.results_table['Rider'].iloc[0] == "Pogacar Tadej" + assert t.results_table['Time'].iloc[0] == "79:36:03" + + + +def test_combi_2023_itzulia(): + t = combi_results_startlist(14244,2023,stage_num=1) + + assert len(t.results_table) == 113 + assert t.results_table['Rider'].iloc[0] == 'Vollering Demi' + assert t.results_table['BIB'].iloc[0] ==1 + + assert 'gc' in t.standings + assert 'point' in t.standings + assert 'mountain' in t.standings + assert 'youth' in t.standings + + t = combi_results_startlist(14244,2023,stage_num=1,classification_num=1) + assert t.results_table['Rider'].iloc[0] == 'Vollering Demi' + assert t.results_table['BIB'].iloc[0] ==1 + + t = combi_results_startlist(14244,2023,stage_num=1,classification_num=3) + assert t.results_table['Rider'].iloc[0] == 'Vollering Demi' + assert t.results_table['BIB'].iloc[0] ==1 + +def test_combi_2023_gracia(): + t = combi_results_startlist(9549,2023,stage_num=3) + + assert len(t.results_table) == 128 + assert t.results_table['Rider'].iloc[0] == 'Rissveds Jenny' + assert t.results_table['BIB'].iloc[0] ==73 + + assert 'gc' in t.standings + assert 'point' in t.standings + assert 'mountain' in t.standings + assert 'youth' in t.standings + + #t = combi_results_startlist(9549,2023,stage_num=3,classification_num=3) + #assert t.results_table['Rider'].iloc[0] == 'Wlodarczyk Dominika' + +def test_giro_donne_2001(): + t = combi_results_startlist(9064,2001,stage_num=1) + assert len(t.results_table) == 10 + + #t = combi_results_startlist(9064,2001,stage_num=1,classification_num=3) #not existing + #assert t==None + + + + + diff --git a/tests/test_race.py b/tests/test_race.py index e77ab91..52c4d27 100644 --- a/tests/test_race.py +++ b/tests/test_race.py @@ -4,6 +4,8 @@ my_vcr = vcr.VCR(cassette_library_dir='tests/vcr_cassettes/race', path_transformer=vcr.VCR.ensure_suffix('.yaml')) +#Amstel uses the old style, single day race + @my_vcr.use_cassette() def test_2019_amstel(): amstel = Race(9) @@ -11,15 +13,93 @@ def test_2019_amstel(): results_2019 = amstel_2019.results() assert len(results_2019.results_table) == 175 assert results_2019.results_table['Rider'].iloc[0] == 'van der Poel Mathieu' + +#LBL uses the new style, single day race +def test_2023_lbl_women(): + lbl = Race(9052) + lbl_2023 = lbl.edition(year=2023) + results_2023 = lbl_2023.results() + assert len(results_2023.results_table) == 140 + assert results_2023.results_table['Rider'].iloc[0] == 'Vollering Demi' + +#TdF uses the old style, stage race +def test_2022_TdF(): + tdf= Race(17) + tdf_2022 = tdf.edition(year=2022) + results_2022 = tdf_2022.results() + assert len(results_2022.results_table) == 176 + assert results_2022.results_table['Rider'].iloc[0] == 'Vingegaard Jonas' + assert results_2022.results_table['Time'].iloc[0] == "79:33:20" + assert results_2022.results_table['Pos'].iloc[0] == "01" + + r=tdf_2022.results(classification_num=1).results_table + assert len(r) == 176 + assert r['Rider'].iloc[0] == 'Vingegaard Jonas' + assert r['Time'].iloc[0] == "79:33:20" + + r=tdf_2022.results(classification_num=2).results_table + assert len(r) == 26 + assert r['Rider'].iloc[0] == "Pogacar Tadej" + assert r['Time'].iloc[0] == "79:36:03" + + r=tdf_2022.results(classification_num=3).results_table + assert len(r) == 119 + assert r['Rider'].iloc[0] == "van Aert Wout" + assert r['Points'].iloc[0] == 480 + + r=tdf_2022.results(classification_num=4).results_table + assert len(r) == 55 + assert r['Rider'].iloc[0] == 'Vingegaard Jonas' + assert r['Points'].iloc[0] == 72 + + assert len(tdf_2022.results(classification_num=8).results_table) == 22 + results_st1 = tdf_2022.results(stage_num=1) + assert len(results_st1.results_table) == 176 + assert results_st1.results_table['Rider'].iloc[0] == 'Lampaert Yves' + +#Itzulia uses the new style, stage race +def test_2023_itzulia(): + race = Race(14244) + r_2023 = race.edition(year=2023) + + results_2023 = r_2023.results(stage_num=1) + assert len(results_2023.results_table) == 113 + assert results_2023.results_table['Rider'].iloc[0] == 'Vollering Demi' + + assert 'gc' in results_2023.standings + assert 'point' in results_2023.standings + assert 'mountain' in results_2023.standings + assert 'youth' in results_2023.standings -@my_vcr.use_cassette() + #r=r_2023.results(stage_num=1,classification_num=1).results_table + #assert len(r) == 97 + #assert r['Rider'].iloc[0] == 'Vollering Demi' + #assert r['Time'].iloc[0] == "03:16:22" + + #r=r_2023.results(stage_num=1,classification_num=2).results_table + #assert len(r) == 23 + #assert r['Rider'].iloc[0] == 'Wyllie Ella' + #assert r['Time'].iloc[0] == "03:19:38" + + #r=r_2023.results(stage_num=1,classification_num=3).results_table + #assert len(r) == 19 + #assert r['Rider'].iloc[0] == 'Vollering Demi' + #assert r['Points'].iloc[0] == 25 + + #r=r_2023.results(stage_num=1,classification_num=4).results_table + #assert len(r) == 7 + #assert r['Rider'].iloc[0] == 'Vollering Demi' + #assert r['Points'].iloc[0] == 6 + + #assert len(r_2023.results(stage_num=1,classification_num=8).results_table) == 19 + +my_vcr.use_cassette() #Is it normal that it is no decorator??? def test_2014_giro_rosa_prologue(): giro_rosa_2014 = RaceEdition(race_id=9064, year=2014) results = giro_rosa_2014.results(stage_num=0) assert results.results_table['Rider'].iloc[0] == 'van Vleuten Annemiek' - @my_vcr.use_cassette() def test_2023_amstel(): amstel = Race(9) @@ -27,7 +107,16 @@ def test_2023_amstel(): results_2023 = amstel_2023.results() assert len(results_2023.results_table) == 175 assert results_2023.results_table['Rider'].iloc[0] == 'Pogacar Tadej' + +def test_giro_donne_2001(): + giro_rosa_2001 = RaceEdition(race_id=9064, year=2001, ) + results = giro_rosa_2001.results(stage_num=1) + assert len(results.results_table) == 10 + ###Following tests don't work with this PR, more code is required + + #results = giro_rosa_2001.results(stage_num=1,classification_num=3) #not existing + #assert results==None @my_vcr.use_cassette() def test_2022_basque(): @@ -42,7 +131,6 @@ def test_2022_basque(): assert len(results_2022_yc.results_table) == 11 assert results_2022_yc.results_table['Rider'].iloc[0] == 'Evenepoel Remco' - @my_vcr.use_cassette() def test_2023_basque(): basque = Race(6) @@ -51,5 +139,6 @@ def test_2023_basque(): assert len(results_2023.results_table) == 161 assert results_2023.results_table['Rider'].iloc[0] == 'Vingegaard Jonas' - assert len(results_2023.standings['youth']) == 26 - assert results_2023.standings['youth']['Rider'].iloc[0] == 'McNulty Brandon' + assert len(results_2023.standings['youth'].results_table) == 26 + assert results_2023.standings['youth'].results_table['Rider'].iloc[0] == 'McNulty Brandon' + diff --git a/tests/test_rider.py b/tests/test_rider.py index fef7f26..8d2e9bc 100644 --- a/tests/test_rider.py +++ b/tests/test_rider.py @@ -8,4 +8,7 @@ def test_roglic_2020_results(): roglic = Rider(18655) results_2020 = roglic.year_results(2020) + #assert results_2020.sidebar_details['nation'] == 'Slovenia' + #assert results_2020.sidebar_details['height'] == 1.77 assert results_2020.results_df['UCI'].max() == 850 +