Newer
Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
# format of json is {"id":1,"timestamp":186514,"netem_parameters":{"srv_rate":1000,"srv_delay":0,"srv_jitter":0,"srv_pkt_loss":0,"srv_duplicate":0,"srv_corrupt":0,"srv_reorder":0,"cli_rate":1000,"cli_delay":0,"cli_jitter":0,"cli_pkt_loss":0,"cli_duplicate":0,"cli_corrupt":0,"cli_reorder":0}}
# write a function that reads the json file into a pandas dataframe
import json
import pandas as pd
def main():
df = read_json_file("logs/netem_data.log")
# print(df)
summarized = summarize_data(df)
summarized = summarized.drop(
columns=["srv_reorder", "srv_corrupt", "srv_duplicate"]
)
print(summarized)
def read_json_file(file_path):
data = []
with open(file_path, "r") as file:
for line in file:
data.append(json.loads(line))
df = pd.json_normalize(data)
df.rename(columns={"timestamp": "time_ns"}, inplace=True)
df["time_ms"] = df["time_ns"] / 1000000
# make netem_parameters a boolean
df = df.astype({"netem_parameters": "bool"})
# set netem_parameters to true if all netem_parameters.* are some value
netem_columns = [col for col in df.columns if col.startswith("netem_parameters.")]
df["netem_parameters"] = df[netem_columns].notnull().all(axis=1)
df = df[df["time_ms"] < 1e30] # filter out unreasonable large values
return df
def summarize_data(df):
summarized = pd.DataFrame()
# print the row with index 1001
# print(df.loc[1])
# print(df.loc[5])
# find out each set of distinct values for all netem_parameters.*
netem_columns = [col for col in df.columns if col.startswith("netem_parameters")]
distinct_rows = df[netem_columns].drop_duplicates()
# print("Distinct rows for netem parameters:")
# print(distinct_rows)
for row in distinct_rows.iterrows():
# Extract the current `netem_parameters` values
entry = row[1]
current_netem_params = entry.to_dict()
if current_netem_params["netem_parameters"] == True:
mask = df[netem_columns] == current_netem_params
# print(mask)
# print(mask.all(axis=1))
filtered_df = df[mask.all(axis=1)]
else:
mask = df["netem_parameters"] == False
# print(mask)
filtered_df = df[mask]
# print(filtered_df)
time_ms_max = filtered_df["time_ms"].max()
time_ms_min = filtered_df["time_ms"].min()
time_ms_avg = filtered_df["time_ms"].mean()
time_ms_std = filtered_df["time_ms"].std()
count = filtered_df["time_ms"].count()
netem_parameters = filtered_df["netem_parameters"].iloc[0]
srv_rate = filtered_df["netem_parameters.srv_rate"].iloc[0]
srv_delay = filtered_df["netem_parameters.srv_delay"].iloc[0]
srv_jitter = filtered_df["netem_parameters.srv_jitter"].iloc[0]
srv_pkt_loss = filtered_df["netem_parameters.srv_pkt_loss"].iloc[0]
srv_duplicate = filtered_df["netem_parameters.srv_duplicate"].iloc[0]
srv_corrupt = filtered_df["netem_parameters.srv_corrupt"].iloc[0]
srv_reorder = filtered_df["netem_parameters.srv_reorder"].iloc[0]
summarized = pd.concat(
[
summarized,
pd.DataFrame(
{
"time_ms_min": time_ms_min,
"time_ms_max": time_ms_max,
"time_ms_avg": time_ms_avg,
"time_ms_std": time_ms_std,
"count": count,
"netem_parameters": netem_parameters,
"srv_rate": srv_rate,
"srv_delay": srv_delay,
"srv_jitter": srv_jitter,
"srv_pkt_loss": srv_pkt_loss,
"srv_duplicate": srv_duplicate,
"srv_corrupt": srv_corrupt,
"srv_reorder": srv_reorder,
},
index=[0],
),
],
ignore_index=True,
)
return summarized
main()