-
Lennard Geese authoredLennard Geese authored
Code owners
Assign users and groups as approvers for specific file changes. Learn more.
DataAnalyser.py 12.83 KiB
import re
import string
from typing import Iterator
import pandas as pandas
import json as json
from fastf1.core import Session, Lap, Laps, DataNotLoadedError
from bs4 import BeautifulSoup, PageElement, NavigableString, Tag
from DataHandler import DataHandler
class DataAnalyser(DataHandler):
"""
Analyses sessions by extrapolating existing or new data from them.
Any method of this class must be given a Session object or a list thereof. If the method does not require such an
object, it should not be part of this class.
"""
# ===== Overtakes =====
def getOvertakesPerLapForRaces(self, races: list[Session]):
overtakesInRaces: list[list[int]] = [[]]
for race in races:
overtakesInRaces.append(self.getOvertakesPerLapForRace(race))
return overtakesInRaces
def getOvertakesPerLapForRace(self, race: Session):
self.enforceSessionType(race, "Race")
overtakesInLaps: list[int] = self.countOvertakesPerLap(race)
return overtakesInLaps
def countOvertakesPerLap(self, race: Session):
overtakes: list[int] = []
for lapNumber in range(1, race.total_laps + 1): # in this context, index 1 = lap 1
overtakes.append(self.countOvertakesInLap(lapNumber, race))
return overtakes
def countOvertakesInLap(self, lapNumber: int, race: Session):
orderToSort: list[(Lap, int)] = self.prepareWeightedOrderFromLap(lapNumber, race)
overtakes: int = 0
i: int = 0
while i < len(orderToSort) - 1: # do not change to for-loop, seems to not like resetting the index
weightedDriverAhead: list[(Lap, int)] = orderToSort[i]
weightedDriverBehind: list[(Lap, int)] = orderToSort[i + 1]
if weightedDriverAhead[1] > weightedDriverBehind[1]:
temp: int = orderToSort[i]
orderToSort[i] = orderToSort[i + 1]
orderToSort[i + 1] = temp
i = -1 # reset to first index; -1 because loop will set it to 0
if not ( # don't count overtake if driver nonexistent or if one of them is on an in-lap
weightedDriverAhead[0] is None
or weightedDriverBehind[0] is None
or self.isAnInLap(weightedDriverAhead[0])
or self.isAnInLap(weightedDriverBehind[0])
): overtakes += 1
i += 1
return overtakes
def prepareWeightedOrderFromLap(self, lapNumber: int, race: Session):
"""Prepare a list from specific lap & race, that can be sorted via bubble sort to determine the number of
overtakes that occurred in that lap.
:param lapNumber: Which lap to prepare from the given race. Note that value of 1 will return a list ordered by
starting grid, as there is no previous lap.
:param race: Race from which to pull the given lap from.
:return: list[(Lap, int)]: A list with pairs of every driver's lap and their position at the end of the lap. Entries are
sorted by the driver's positions at the start of the lap. If an invalid lap number (below 1 or above the number
of laps the race had), all laps in the list will be either None objects or empty Panda Dataframes.
"""
previousLaps: Laps = race.laps.pick_laps(lapNumber - 1)
currentLaps: Laps = race.laps.pick_laps(lapNumber)
out: int = self.numberOfDrivers # weighting for drivers that did not complete the lap (they are "out" of the lap/race)
weightedOrder: list[(Lap, int)] = [(None, out)] * 20 # data from start of lap + weighting for end of lap; initialize to add laps in lap's starting order
if self.activateDebugOvertakeAnalysis: print(f"Lap: {lapNumber}")
# Put every driver's laps in a sortable array & apply weighting based on position at end of lap
for driver in race.drivers:
if self.activateDebugOvertakeAnalysis: print(f"Driver: {driver}")
driversPreviousLap: Laps = previousLaps.pick_drivers(
driver) # should only get 1 lap, but data type shenanigans
driversCurrentLap: Laps = currentLaps.pick_drivers(driver)
startPosition: int = out
endPosition: int = out
try:
if lapNumber == 1:
startPosition = self.getGridPositionForDriver(driver, race)
else: startPosition = int(driversPreviousLap['Position'].iloc[0])
endPosition = int(driversCurrentLap['Position'].iloc[0])
except ValueError:
if self.activateDebugOvertakeAnalysis:
print(f"Could not fetch positions from lap; driver %d likely didn't finish lap %d or %d",
driver, lapNumber, (lapNumber + 1))
except IndexError:
if self.activateDebugOvertakeAnalysis:
print("['Position'].iloc[0] was out of bounds; lap was likely empty because driver previously left the race")
weightedOrder[startPosition - 1] = (driversCurrentLap, endPosition)
return weightedOrder
def getGridPositionForDriver(self, driverNumber: int, race: Session):
sessionResults = race.results
gridPosition: int = int(sessionResults['GridPosition'].loc[driverNumber])
return gridPosition
def isAnInLap(self, lap: Lap):
try:
return not pandas.isnull(lap['PitInTime'].iloc[0])
except: # caused when lap is empty and possibly when lap is None
return True # like in-laps, empty laps should not be counted for overtakes
# ===== Weather =====
def getWeatherFromHtml(self, rawHtml):
parsedHtml = BeautifulSoup(rawHtml, features="html.parser")
tableRows = parsedHtml.find_all("tr") # Get all table rows on wiki page
for tableRow in tableRows:
header = tableRow.find("th")
if header is None: continue
if header.string == "Weather":
weatherRaw = tableRow.find("td").string
return re.sub("\n", "", tableRow.find("td").string)
raise Exception("No weather entry found") # TODO: Use correct exception type
def filterForRainSessions(self, sessions: list[Session]):
"""
Filter out & return only those sessions from input list that had rain falling at any point during the session.
Note: The sessions returned are not necessarily sessions that had wet conditions for any meaningful amount of
time. Also, sessions that had wet conditions only from leftover rainwater on track are not included in the
returned sessions, as no rain occurred during the session. This is due to technical limitations.
:param sessions: List of sessions from which to pick out sessions with rain.
:return: List of sessions which had rain falling during the session for any amount of time.
"""
rainSessions: list[Session] = []
for session in sessions:
try:
for rainfallEntry in session.weather_data["Rainfall"]:
if rainfallEntry is True:
rainSessions.append(session)
break
except DataNotLoadedError:
raise DataNotLoadedError(f"Weather data not loaded for session {session}")
return rainSessions
def raceHasWeatherChange(self, race: Session):
if self.getFirstTireChange(race) == -1: return True
return False
# ===== Tire Changes =====
def getFirstTireChanges(self, races: list[Session]):
earliestTireChanges: list[int] = [[]] # isRaining per lap per race
for race in races:
earliestTireChange = self.getFirstTireChange(race)
earliestTireChanges.append(earliestTireChange)
return earliestTireChanges
def getFirstTireChange(self, race: Session):
"""
Determines the first lap in which a tire change to a different weather compound was done.
:param race: Race session in which to look for a tire change.
:return: Lap number in which the first tire change to a different weather compound took place. Returns -1 if no
such tire change took place.
"""
compoundsPerLap: list[list[str]] = self.getCompoundsForRace(race)
compoundsPerLap[0] = compoundsPerLap[1] # presume grid tires same as 1st lap; races are only picked if weather change after first 10 laps anyway, so it's ok
startingCompound: str = self.getPredominantCompound(compoundsPerLap[0])
earliestTireChangeLap = self.getFirstLapWithOppositeCompound(compoundsPerLap, startingCompound)
return earliestTireChangeLap
def getLastTireChanges(self, races: list[Session]):
latestTireChanges: list[int] = [[]] # isRaining per lap per race
for race in races:
latestTireChange = self.getLastTireChange(race)
latestTireChanges.append(latestTireChange)
return latestTireChanges
def getLastTireChange(self, race: Session):
"""
Determines the last lap in which a tire change to a different weather compound was done.
:param race: Race session in which to look for a tire change.
:return: Lap number in which the last tire change to a different weather compound took place. Returns -1 if no
such tire change took place.
"""
compoundsPerLap: list[list[str]] = self.getCompoundsForRace(race)
compoundsPerLap[0] = compoundsPerLap[1] # presume grid tires same as 1st lap; races are only picked if weather change after first 10 laps anyway, so it's ok
startingCompound: str = self.getPredominantCompound(compoundsPerLap[0])
latestTireChangeLap = self.getFirstLapWithoutCompound(compoundsPerLap, startingCompound)
return latestTireChangeLap
def getFirstLapWithoutCompound(self, compoundsPerLap: list[list[str]], startingCompound: str):
currentLap = 0
compoundFilter = self.setFilter(startingCompound)
for compoundsThisLap in compoundsPerLap:
noStartingCompoundsLeft = True
for compound in compoundsThisLap:
if compound in compoundFilter:
noStartingCompoundsLeft = False
if noStartingCompoundsLeft: return currentLap
currentLap += 1
return -1 # no lap without compound found; all laps use same compound type
def getCompoundsForRace(self, race: Session):
compoundsPerLap: list[list[str]] = [[]]
allRaceLaps = race.laps
for raceLapIndex in range(race.total_laps):
compoundsThisLap: list[str] = []
for driver in race.drivers:
raceLap = allRaceLaps.pick_laps(raceLapIndex + 1) # Lap 0 doesn't exist
raceLap = raceLap.pick_drivers(driver)
try:
compound = raceLap['Compound'].iloc[0]
compoundsThisLap.append(compound)
except Exception: # triggered when not all drivers that took part reached lap, probably by crashing or being behind
pass
compoundsPerLap.append(compoundsThisLap)
return compoundsPerLap
def getPredominantCompound(self, compoundsThisLap: list[str]):
slickCounter = 0
interCounter = 0
wetCounter = 0
for compound in compoundsThisLap:
if compound in self.slickCompounds: slickCounter += 1
if compound == 'INTERMEDIATE': interCounter += 1
if compound == 'WET': wetCounter += 1
mostUsed = max(slickCounter, interCounter, wetCounter)
if slickCounter == mostUsed: return 'SLICK'
if interCounter == mostUsed: return 'INTERMEDIATE'
if wetCounter == mostUsed: return 'WET'
return 'error'
def getFirstLapWithOppositeCompound(self, compoundsPerLap: list[list[str]], startingCompound: str):
compoundFilter = self.setFilter(startingCompound)
currentLap = 0
for compoundsThisLap in compoundsPerLap:
for compound in compoundsThisLap:
if compound not in compoundFilter:
return currentLap
currentLap += 1
return -1 # no lap with opposite compound found; all laps use same compound type
def setFilter(self, startingCompound: str):
if startingCompound == 'SLICK': return self.slickCompounds
return startingCompound
# ===== Crashes =====
# def analyseRacesForCrashes(self, races):
# ===== Events =====
# def analyseRacesForSafetyCars(self, races):
# ===== Other
def enforceSessionType(self, session: Session, sessionType: str):
if sessionType not in self.validSessionTypes:
raise ValueError(f"Invalid session type \"{sessionType}\"; only {self.validSessionTypes} are allowed")
if not session.session_info["Type"] == sessionType:
raise ValueError(f"Session must be a {sessionType} session, not a {session.session_info["Type"]} session")