Skip to content
Snippets Groups Projects
analyze.py 4.02 KiB
Newer Older
  • Learn to ignore specific revisions
  • # format of json is {"id":1,"timestamp":186514,"netem_parameters":{"srv_rate":1000,"srv_delay":0,"srv_jitter":0,"srv_pkt_loss":0,"srv_duplicate":0,"srv_corrupt":0,"srv_reorder":0,"cli_rate":1000,"cli_delay":0,"cli_jitter":0,"cli_pkt_loss":0,"cli_duplicate":0,"cli_corrupt":0,"cli_reorder":0}}
    
    # write a function that reads the json file into a pandas dataframe
    import json
    import pandas as pd
    
    
    def main():
        df = read_json_file("logs/netem_data.log")
        # print(df)
        summarized = summarize_data(df)
        summarized = summarized.drop(
            columns=["srv_reorder", "srv_corrupt", "srv_duplicate"]
        )
        print(summarized)
    
    
    def read_json_file(file_path):
        data = []
        with open(file_path, "r") as file:
            for line in file:
                data.append(json.loads(line))
    
        df = pd.json_normalize(data)
        df.rename(columns={"timestamp": "time_ns"}, inplace=True)
        df["time_ms"] = df["time_ns"] / 1000000
    
        # make netem_parameters a boolean
        df = df.astype({"netem_parameters": "bool"})
        # set netem_parameters to true if all netem_parameters.* are some value
        netem_columns = [col for col in df.columns if col.startswith("netem_parameters.")]
        df["netem_parameters"] = df[netem_columns].notnull().all(axis=1)
    
        df = df[df["time_ms"] < 1e30]  # filter out unreasonable large values
    
        return df
    
    
    def summarize_data(df):
        summarized = pd.DataFrame()
    
        # print the row with index 1001
        # print(df.loc[1])
        # print(df.loc[5])
    
        # find out each set of distinct values for all netem_parameters.*
        netem_columns = [col for col in df.columns if col.startswith("netem_parameters")]
        distinct_rows = df[netem_columns].drop_duplicates()
        # print("Distinct rows for netem parameters:")
        # print(distinct_rows)
    
        for row in distinct_rows.iterrows():
            # Extract the current `netem_parameters` values
            entry = row[1]
            current_netem_params = entry.to_dict()
    
            if current_netem_params["netem_parameters"] == True:
                mask = df[netem_columns] == current_netem_params
                # print(mask)
                # print(mask.all(axis=1))
                filtered_df = df[mask.all(axis=1)]
            else:
                mask = df["netem_parameters"] == False
                # print(mask)
                filtered_df = df[mask]
    
            # print(filtered_df)
    
            time_ms_max = filtered_df["time_ms"].max()
            time_ms_min = filtered_df["time_ms"].min()
            time_ms_avg = filtered_df["time_ms"].mean()
            time_ms_std = filtered_df["time_ms"].std()
            count = filtered_df["time_ms"].count()
            netem_parameters = filtered_df["netem_parameters"].iloc[0]
            srv_rate = filtered_df["netem_parameters.srv_rate"].iloc[0]
            srv_delay = filtered_df["netem_parameters.srv_delay"].iloc[0]
            srv_jitter = filtered_df["netem_parameters.srv_jitter"].iloc[0]
            srv_pkt_loss = filtered_df["netem_parameters.srv_pkt_loss"].iloc[0]
            srv_duplicate = filtered_df["netem_parameters.srv_duplicate"].iloc[0]
            srv_corrupt = filtered_df["netem_parameters.srv_corrupt"].iloc[0]
            srv_reorder = filtered_df["netem_parameters.srv_reorder"].iloc[0]
    
            summarized = pd.concat(
                [
                    summarized,
                    pd.DataFrame(
                        {
                            "time_ms_min": time_ms_min,
                            "time_ms_max": time_ms_max,
                            "time_ms_avg": time_ms_avg,
                            "time_ms_std": time_ms_std,
                            "count": count,
                            "netem_parameters": netem_parameters,
                            "srv_rate": srv_rate,
                            "srv_delay": srv_delay,
                            "srv_jitter": srv_jitter,
                            "srv_pkt_loss": srv_pkt_loss,
                            "srv_duplicate": srv_duplicate,
                            "srv_corrupt": srv_corrupt,
                            "srv_reorder": srv_reorder,
                        },
                        index=[0],
                    ),
                ],
                ignore_index=True,
            )
    
        return summarized
    
    
    main()