From 69738ee6c9096df5480ba3a6812bc39a61079f5c Mon Sep 17 00:00:00 2001 From: Lennard Geese <lennard.geese@sva.de> Date: Tue, 15 Apr 2025 18:22:45 +0200 Subject: [PATCH] Implement fetching weather for an event from respective wiki entry --- DataAnalyser.py | 18 ++++++++++++++++++ DataImporter.py | 38 +++++++++++++++++++++++++++++++++++++- 2 files changed, 55 insertions(+), 1 deletion(-) diff --git a/DataAnalyser.py b/DataAnalyser.py index 7eb8fc3..ca7e140 100644 --- a/DataAnalyser.py +++ b/DataAnalyser.py @@ -1,5 +1,11 @@ +import re +import string +from typing import Iterator + import pandas as pandas +import json as json from fastf1.core import Session, Lap, Laps, DataNotLoadedError +from bs4 import BeautifulSoup, PageElement, NavigableString, Tag from DataHandler import DataHandler @@ -116,6 +122,18 @@ class DataAnalyser(DataHandler): # ===== Weather ===== + def getWeatherFromHtml(self, rawHtml): + parsedHtml = BeautifulSoup(rawHtml, features="html.parser") + tableRows = parsedHtml.find_all("tr") # Get all table rows on wiki page + for tableRow in tableRows: + header = tableRow.find("th") + if header is None: continue + if header.string == "Weather": + weatherRaw = tableRow.find("td").string + return re.sub("\n", "", tableRow.find("td").string) + + raise Exception("No weather entry found") # TODO: Use correct exception type + def filterForRainSessions(self, sessions: list[Session]): """ Filter out & return only those sessions from input list that had rain falling at any point during the session. diff --git a/DataImporter.py b/DataImporter.py index 877e9ea..eeefbec 100644 --- a/DataImporter.py +++ b/DataImporter.py @@ -1,6 +1,11 @@ +import re +import string +from datetime import datetime + import fastf1 from abc import ABC +import requests from fastf1.core import Session from fastf1.events import EventSchedule, Event @@ -17,6 +22,33 @@ class DataImporter(DataHandler, ABC): """ + + def importWeatherFromWiki(self, event: Event | SessionIdentifier): + if isinstance(event, SessionIdentifier): + event = self.importEvent(event.year, event.event) + + + wikiHtml = self.importWikiHtml(event) + analyser = DataAnalyser() + weather = analyser.getWeatherFromHtml(wikiHtml) + + return weather + + def importWikiHtml(self, event: Event): + apiRootUrl: string = "https://en.wikipedia.org/wiki/" + grandPrixName: string = f"{event.year} {event["EventName"]}" + uri: string = re.sub(" ", "_", grandPrixName) + response = requests.get(f"{apiRootUrl}{uri}") + return response.text + # URL example to get 2024 Sao Paulo GP: https://en.wikipedia.org/w/api.php?action=query&titles=2024_S%C3%A3o_Paulo_Grand_Prix&prop=extracts&format=json&exintro=1 + + + + + + def importEvent(self, year: int, event: int | str): + return fastf1.get_event(year, event) + def importAllEventsFromYear(self, year: int): races: list[Event] = [] schedule: EventSchedule = fastf1.get_event_schedule(year, include_testing = False) @@ -51,8 +83,12 @@ class DataImporter(DataHandler, ABC): return session def getRainRacesSince(self, firstYear: int): + currentYear = datetime.now().year + if firstYear > currentYear: + raise ValueError("Cannot get race data from the future :)") + rainRaces: list[Session] = [] - for firstYear in range(firstYear, 2025): # FIXME: Add automatic upper bound for year so that it can at most be set to current year or year of latest f1 race (e.g. shortly after new years eve) + for firstYear in range(firstYear, currentYear): # FIXME: Handle exception after new years, when no events have run in current year yet wetWeatherRacesInYear: list[Session] = self.getRainRacesIn(firstYear) for wetWeatherRace in wetWeatherRacesInYear: rainRaces.append(wetWeatherRace) -- GitLab