Skip to content
Snippets Groups Projects
Code owners
Assign users and groups as approvers for specific file changes. Learn more.
DataAnalyser.py 12.83 KiB
import re
import string
from typing import Iterator

import pandas as pandas
import json as json
from fastf1.core import Session, Lap, Laps, DataNotLoadedError
from bs4 import BeautifulSoup, PageElement, NavigableString, Tag

from DataHandler import DataHandler


class DataAnalyser(DataHandler):
    """
    Analyses sessions by extrapolating existing or new data from them.

    Any method of this class must be given a Session object or a list thereof. If the method does not require such an
    object, it should not be part of this class.
    """


    # ===== Overtakes =====

    def getOvertakesPerLapForRaces(self, races: list[Session]):
        overtakesInRaces: list[list[int]] = [[]]
        for race in races:
            overtakesInRaces.append(self.getOvertakesPerLapForRace(race))
        return overtakesInRaces

    def getOvertakesPerLapForRace(self, race: Session):
        self.enforceSessionType(race, "Race")
        overtakesInLaps: list[int] = self.countOvertakesPerLap(race)
        return overtakesInLaps

    def countOvertakesPerLap(self, race: Session):
        overtakes: list[int] = []
        for lapNumber in range(1, race.total_laps + 1): # in this context, index 1 = lap 1
            overtakes.append(self.countOvertakesInLap(lapNumber, race))
        return overtakes

    def countOvertakesInLap(self, lapNumber: int, race: Session):
        orderToSort: list[(Lap, int)] = self.prepareWeightedOrderFromLap(lapNumber, race)
        overtakes: int = 0
        i: int = 0

        while i < len(orderToSort) - 1: # do not change to for-loop, seems to not like resetting the index
            weightedDriverAhead: list[(Lap, int)] = orderToSort[i]
            weightedDriverBehind: list[(Lap, int)] = orderToSort[i + 1]
            if weightedDriverAhead[1] > weightedDriverBehind[1]:
                temp: int = orderToSort[i]
                orderToSort[i] = orderToSort[i + 1]
                orderToSort[i + 1] = temp
                i = -1  # reset to first index; -1 because loop will set it to 0

                if not ( # don't count overtake if driver nonexistent or if one of them is on an in-lap
                    weightedDriverAhead[0] is None
                    or weightedDriverBehind[0] is None
                    or self.isAnInLap(weightedDriverAhead[0])
                    or self.isAnInLap(weightedDriverBehind[0])
                ):  overtakes += 1
            i += 1

        return overtakes

    def prepareWeightedOrderFromLap(self, lapNumber: int, race: Session):
        """Prepare a list from specific lap & race, that can be sorted via bubble sort to determine the number of
        overtakes that occurred in that lap.
        :param lapNumber: Which lap to prepare from the given race. Note that value of 1 will return a list ordered by
        starting grid, as there is no previous lap.
        :param race: Race from which to pull the given lap from.
        :return: list[(Lap, int)]: A list with pairs of every driver's lap and their position at the end of the lap. Entries are
        sorted by the driver's positions at the start of the lap. If an invalid lap number (below 1 or above the number
        of laps the race had), all laps in the list will be either None objects or empty Panda Dataframes.
        """

        previousLaps: Laps = race.laps.pick_laps(lapNumber - 1)
        currentLaps: Laps = race.laps.pick_laps(lapNumber)
        out: int = self.numberOfDrivers  # weighting for drivers that did not complete the lap (they are "out" of the lap/race)
        weightedOrder: list[(Lap, int)] = [(None, out)] * 20  # data from start of lap + weighting for end of lap; initialize to add laps in lap's starting order

        if self.activateDebugOvertakeAnalysis: print(f"Lap: {lapNumber}")

        # Put every driver's laps in a sortable array & apply weighting based on position at end of lap
        for driver in race.drivers:
            if self.activateDebugOvertakeAnalysis: print(f"Driver: {driver}")
            driversPreviousLap: Laps = previousLaps.pick_drivers(
                driver)  # should only get 1 lap, but data type shenanigans
            driversCurrentLap: Laps = currentLaps.pick_drivers(driver)
            startPosition: int = out
            endPosition: int = out

            try:
                if lapNumber == 1:
                    startPosition = self.getGridPositionForDriver(driver, race)
                else: startPosition = int(driversPreviousLap['Position'].iloc[0])
                endPosition = int(driversCurrentLap['Position'].iloc[0])

            except ValueError:
                if self.activateDebugOvertakeAnalysis:
                    print(f"Could not fetch positions from lap; driver %d likely didn't finish lap %d or %d",
                          driver, lapNumber, (lapNumber + 1))
            except IndexError:
                if self.activateDebugOvertakeAnalysis:
                    print("['Position'].iloc[0] was out of bounds; lap was likely empty because driver previously left the race")

            weightedOrder[startPosition - 1] = (driversCurrentLap, endPosition)

        return weightedOrder

    def getGridPositionForDriver(self, driverNumber: int, race: Session):
        sessionResults = race.results
        gridPosition: int = int(sessionResults['GridPosition'].loc[driverNumber])
        return gridPosition

    def isAnInLap(self, lap: Lap):
        try:
            return not pandas.isnull(lap['PitInTime'].iloc[0])
        except: # caused when lap is empty and possibly when lap is None
            return True # like in-laps, empty laps should not be counted for overtakes



    # ===== Weather =====

    def getWeatherFromHtml(self, rawHtml):
        parsedHtml = BeautifulSoup(rawHtml, features="html.parser")
        tableRows = parsedHtml.find_all("tr") # Get all table rows on wiki page
        for tableRow in tableRows:
            header = tableRow.find("th")
            if header is None: continue
            if header.string == "Weather":
                weatherRaw = tableRow.find("td").string
                return re.sub("\n", "", tableRow.find("td").string)

        raise Exception("No weather entry found") # TODO: Use correct exception type

    def filterForRainSessions(self, sessions: list[Session]):
        """
        Filter out & return only those sessions from input list that had rain falling at any point during the session.

        Note: The sessions returned are not necessarily sessions that had wet conditions for any meaningful amount of
        time. Also, sessions that had wet conditions only from leftover rainwater on track are not included in the
        returned sessions, as no rain occurred during the session. This is due to technical limitations.
        :param sessions: List of sessions from which to pick out sessions with rain.
        :return: List of sessions which had rain falling during the session for any amount of time.
        """

        rainSessions: list[Session] = []
        for session in sessions:
            try:
                for rainfallEntry in session.weather_data["Rainfall"]:
                    if rainfallEntry is True:
                        rainSessions.append(session)
                        break
            except DataNotLoadedError:
                raise DataNotLoadedError(f"Weather data not loaded for session {session}")

        return rainSessions

    def raceHasWeatherChange(self, race: Session):
        if self.getFirstTireChange(race) == -1: return True
        return False


    # ===== Tire Changes =====

    def getFirstTireChanges(self, races: list[Session]):
        earliestTireChanges: list[int] = [[]]  # isRaining per lap per race
        for race in races:
            earliestTireChange = self.getFirstTireChange(race)
            earliestTireChanges.append(earliestTireChange)
        return earliestTireChanges

    def getFirstTireChange(self, race: Session):
        """
        Determines the first lap in which a tire change to a different weather compound was done.
        :param race: Race session in which to look for a tire change.
        :return: Lap number in which the first tire change to a different weather compound took place. Returns -1 if no
        such tire change took place.
        """
        compoundsPerLap: list[list[str]] = self.getCompoundsForRace(race)
        compoundsPerLap[0] = compoundsPerLap[1] # presume grid tires same as 1st lap; races are only picked if weather change after first 10 laps anyway, so it's ok
        startingCompound: str = self.getPredominantCompound(compoundsPerLap[0])
        earliestTireChangeLap = self.getFirstLapWithOppositeCompound(compoundsPerLap, startingCompound)

        return earliestTireChangeLap

    def getLastTireChanges(self, races: list[Session]):
        latestTireChanges: list[int] = [[]]  # isRaining per lap per race
        for race in races:
            latestTireChange = self.getLastTireChange(race)
            latestTireChanges.append(latestTireChange)
        return latestTireChanges

    def getLastTireChange(self, race: Session):
        """
        Determines the last lap in which a tire change to a different weather compound was done.
        :param race: Race session in which to look for a tire change.
        :return: Lap number in which the last tire change to a different weather compound took place. Returns -1 if no
        such tire change took place.
        """
        compoundsPerLap: list[list[str]] = self.getCompoundsForRace(race)
        compoundsPerLap[0] = compoundsPerLap[1]  # presume grid tires same as 1st lap; races are only picked if weather change after first 10 laps anyway, so it's ok
        startingCompound: str = self.getPredominantCompound(compoundsPerLap[0])
        latestTireChangeLap = self.getFirstLapWithoutCompound(compoundsPerLap, startingCompound)

        return latestTireChangeLap

    def getFirstLapWithoutCompound(self, compoundsPerLap: list[list[str]], startingCompound: str):
        currentLap = 0
        compoundFilter = self.setFilter(startingCompound)
        for compoundsThisLap in compoundsPerLap:
            noStartingCompoundsLeft = True
            for compound in compoundsThisLap:
                if compound in compoundFilter:
                    noStartingCompoundsLeft = False
            if noStartingCompoundsLeft: return currentLap
            currentLap += 1

        return -1 # no lap without compound found; all laps use same compound type

    def getCompoundsForRace(self, race: Session):
        compoundsPerLap: list[list[str]] = [[]]
        allRaceLaps = race.laps

        for raceLapIndex in range(race.total_laps):
            compoundsThisLap: list[str] = []
            for driver in race.drivers:
                raceLap = allRaceLaps.pick_laps(raceLapIndex + 1)  # Lap 0 doesn't exist
                raceLap = raceLap.pick_drivers(driver)
                try:
                    compound = raceLap['Compound'].iloc[0]
                    compoundsThisLap.append(compound)
                except Exception:  # triggered when not all drivers that took part reached lap, probably by crashing or being behind
                    pass
            compoundsPerLap.append(compoundsThisLap)

        return compoundsPerLap

    def getPredominantCompound(self, compoundsThisLap: list[str]):
        slickCounter = 0
        interCounter = 0
        wetCounter = 0
        for compound in compoundsThisLap:
            if compound in self.slickCompounds: slickCounter += 1
            if compound == 'INTERMEDIATE': interCounter += 1
            if compound == 'WET': wetCounter += 1
        mostUsed = max(slickCounter, interCounter, wetCounter)
        if slickCounter == mostUsed: return 'SLICK'
        if interCounter == mostUsed: return 'INTERMEDIATE'
        if wetCounter == mostUsed: return 'WET'
        return 'error'

    def getFirstLapWithOppositeCompound(self, compoundsPerLap: list[list[str]], startingCompound: str):
        compoundFilter = self.setFilter(startingCompound)
        currentLap = 0
        for compoundsThisLap in compoundsPerLap:
            for compound in compoundsThisLap:
                if compound not in compoundFilter:
                    return currentLap
            currentLap += 1
        return -1 # no lap with opposite compound found; all laps use same compound type

    def setFilter(self, startingCompound: str):
        if startingCompound == 'SLICK': return self.slickCompounds
        return startingCompound



    # ===== Crashes =====

#    def analyseRacesForCrashes(self, races):



    # ===== Events =====

#    def analyseRacesForSafetyCars(self, races):



    # ===== Other

    def enforceSessionType(self, session: Session, sessionType: str):
        if sessionType not in self.validSessionTypes:
            raise ValueError(f"Invalid session type \"{sessionType}\"; only {self.validSessionTypes} are allowed")
        if not session.session_info["Type"] == sessionType:
            raise ValueError(f"Session must be a {sessionType} session, not a {session.session_info["Type"]} session")