diff --git a/DataAnalyser.py b/DataAnalyser.py index 7eb8fc3135eb1b5a68f3102a64177acb59599257..ca7e1405c881293fe2bd08eab4a30b6496b266d6 100644 --- a/DataAnalyser.py +++ b/DataAnalyser.py @@ -1,5 +1,11 @@ +import re +import string +from typing import Iterator + import pandas as pandas +import json as json from fastf1.core import Session, Lap, Laps, DataNotLoadedError +from bs4 import BeautifulSoup, PageElement, NavigableString, Tag from DataHandler import DataHandler @@ -116,6 +122,18 @@ class DataAnalyser(DataHandler): # ===== Weather ===== + def getWeatherFromHtml(self, rawHtml): + parsedHtml = BeautifulSoup(rawHtml, features="html.parser") + tableRows = parsedHtml.find_all("tr") # Get all table rows on wiki page + for tableRow in tableRows: + header = tableRow.find("th") + if header is None: continue + if header.string == "Weather": + weatherRaw = tableRow.find("td").string + return re.sub("\n", "", tableRow.find("td").string) + + raise Exception("No weather entry found") # TODO: Use correct exception type + def filterForRainSessions(self, sessions: list[Session]): """ Filter out & return only those sessions from input list that had rain falling at any point during the session. diff --git a/DataImporter.py b/DataImporter.py index 877e9ea95ff8682f81115afab11e7ef85e88cce8..eeefbecaa391aa26f2413d9c64d66a1751414e02 100644 --- a/DataImporter.py +++ b/DataImporter.py @@ -1,6 +1,11 @@ +import re +import string +from datetime import datetime + import fastf1 from abc import ABC +import requests from fastf1.core import Session from fastf1.events import EventSchedule, Event @@ -17,6 +22,33 @@ class DataImporter(DataHandler, ABC): """ + + def importWeatherFromWiki(self, event: Event | SessionIdentifier): + if isinstance(event, SessionIdentifier): + event = self.importEvent(event.year, event.event) + + + wikiHtml = self.importWikiHtml(event) + analyser = DataAnalyser() + weather = analyser.getWeatherFromHtml(wikiHtml) + + return weather + + def importWikiHtml(self, event: Event): + apiRootUrl: string = "https://en.wikipedia.org/wiki/" + grandPrixName: string = f"{event.year} {event["EventName"]}" + uri: string = re.sub(" ", "_", grandPrixName) + response = requests.get(f"{apiRootUrl}{uri}") + return response.text + # URL example to get 2024 Sao Paulo GP: https://en.wikipedia.org/w/api.php?action=query&titles=2024_S%C3%A3o_Paulo_Grand_Prix&prop=extracts&format=json&exintro=1 + + + + + + def importEvent(self, year: int, event: int | str): + return fastf1.get_event(year, event) + def importAllEventsFromYear(self, year: int): races: list[Event] = [] schedule: EventSchedule = fastf1.get_event_schedule(year, include_testing = False) @@ -51,8 +83,12 @@ class DataImporter(DataHandler, ABC): return session def getRainRacesSince(self, firstYear: int): + currentYear = datetime.now().year + if firstYear > currentYear: + raise ValueError("Cannot get race data from the future :)") + rainRaces: list[Session] = [] - for firstYear in range(firstYear, 2025): # FIXME: Add automatic upper bound for year so that it can at most be set to current year or year of latest f1 race (e.g. shortly after new years eve) + for firstYear in range(firstYear, currentYear): # FIXME: Handle exception after new years, when no events have run in current year yet wetWeatherRacesInYear: list[Session] = self.getRainRacesIn(firstYear) for wetWeatherRace in wetWeatherRacesInYear: rainRaces.append(wetWeatherRace)