From 6564d92e93584127de00d7715a8ffa49eced8220 Mon Sep 17 00:00:00 2001
From: Bebbo <bartolomeo.mueller@stud.h-da.de>
Date: Mon, 24 Feb 2025 22:32:30 +0100
Subject: [PATCH] last commit

---
 .../emulation-exp/code/kex/Makefile           |   4 +-
 .../code/kex/scripts/analyze_packets.py       | 113 ++++++++++++++----
 .../code/kex/scripts/generate_graphs.py       |   2 +-
 .../emulation-exp/code/kex/scripts/queries.py |   9 +-
 4 files changed, 99 insertions(+), 29 deletions(-)

diff --git a/pq-tls-benchmark-framework/emulation-exp/code/kex/Makefile b/pq-tls-benchmark-framework/emulation-exp/code/kex/Makefile
index 4daf6d4ab..6cfdd729e 100644
--- a/pq-tls-benchmark-framework/emulation-exp/code/kex/Makefile
+++ b/pq-tls-benchmark-framework/emulation-exp/code/kex/Makefile
@@ -39,13 +39,13 @@ clean:
 	rm -f s_timer quic_s_timer quic-client-block
 
 _open_wireshark_with_sslkeylogfile:
-	wireshark -o tls.keylog_file:saved/captures/for_packet_analyzation/sslkeylogfile_20240830153007.log saved/captures/for_packet_analyzation/capture_20240830153007.pcap
+	wireshark -o tls.keylog_file:saved/captures/new_for_packet_analyzation/sslkeylogfile_20250221012131.log saved/captures/new_for_packet_analyzation/capture_20250221012131.pcap
 
 # has to be built before
 _open_oqs_demos_docker_wireshark:
 	docker run --rm -it --net=host -e DISPLAY=$DISPLAY -v /tmp/.X11-unix:/tmp/.X11-unix -v /home/bebbo/own/master/benchmarking-pqc-in-quic/pq-tls-benchmark-framework/emulation-exp/code/kex/saved/captures:/home/ubuntu oqs-wireshark
 	# does not work directly opening a file :(
-	# docker run --rm -it --net=host -e DISPLAY=$DISPLAY -v /tmp/.X11-unix:/tmp/.X11-unix -v /home/bebbo/own/master/benchmarking-pqc-in-quic/pq-tls-benchmark-framework/emulation-exp/code/kex/saved/captures:/home/ubuntu oqs-wireshark sh -c wireshark -o tls.keylog_file:/home/ubuntu/new_for_packet_analyzation/sslkeylogfile_20250220235359.log /home/ubuntu/new_for_packet_analyzation/capture_20250220235359.pcap
+	# docker run --rm -it --net=host -e DISPLAY=$DISPLAY -v /tmp/.X11-unix:/tmp/.X11-unix -v /home/bebbo/own/master/benchmarking-pqc-in-quic/pq-tls-benchmark-framework/emulation-exp/code/kex/saved/captures:/home/ubuntu oqs-wireshark sh -c wireshark -o tls.keylog_file:/home/ubuntu/new_for_packet_analyzation/sslkeylogfile_20250221012131.log /home/ubuntu/new_for_packet_analyzation/capture_20250221012131.pcap
 
 _copy_plots_to_thesis:
 	rm -rf ../../../../../master-thesis-bartolomeo-mueller/gfx/plots/
diff --git a/pq-tls-benchmark-framework/emulation-exp/code/kex/scripts/analyze_packets.py b/pq-tls-benchmark-framework/emulation-exp/code/kex/scripts/analyze_packets.py
index 01bd92c42..8ecef0687 100644
--- a/pq-tls-benchmark-framework/emulation-exp/code/kex/scripts/analyze_packets.py
+++ b/pq-tls-benchmark-framework/emulation-exp/code/kex/scripts/analyze_packets.py
@@ -13,7 +13,7 @@ import helper_scripts.helper_functions as helper_functions
 # Then run experiment.py with scenario_analyze_packets.csv while setting the variables to POOL_SIZE = 1, MEASUREMENTS_PER_TIMER = 5, TIMERS = 1
 # Then run teardown.sh
 
-DATESTRING = "20240830153007"
+DATESTRING = "20250221012131"
 EXPECTED_DELAY = 10  # ms
 EXPECTED_MEASUREMENTS_PER_CONFIG = 5
 
@@ -33,7 +33,7 @@ def main():
 
 
 def get_packets_sent_by_node(udp_packets_df):
-    udp_packets_df = udp_packets_df.drop(columns=["srcport", "quic_cid"])
+    udp_packets_df = udp_packets_df.drop(columns=["srcport", "quic_scid", "quic_dcid"])
     # print(udp_packets_df.head(20))
     # print()
 
@@ -65,6 +65,16 @@ def get_packets_sent_by_node(udp_packets_df):
         packets = g_df.groupby("Sender").size()
         packets_with_crypto = g_df.query("no_crypto == False").groupby("Sender").size()
 
+        # print(g_df.loc[g_df["Sender"] == "Server"])
+        server_last_packet_length = g_df.loc[
+            g_df["Sender"] == "Server", "frame_length"
+        ].iloc[-1]
+        client_last_packet_length = g_df.loc[
+            g_df["Sender"] == "Client", "frame_length"
+        ].iloc[-1]
+        # print(server_last_packet_length)
+        # print(client_last_packet_length)
+
         # if g_df["kem_algo"].iloc[0] == "p256":
         #     print(finished_row.index[0])
         #     print(g_df)
@@ -85,6 +95,8 @@ def get_packets_sent_by_node(udp_packets_df):
                         "server_sent_packets_with_crypto_count": packets_with_crypto[
                             "Server"
                         ],
+                        "client_last_packet_length": client_last_packet_length,
+                        "server_last_packet_length": server_last_packet_length,
                     }
                 ),
             ],
@@ -95,6 +107,13 @@ def get_packets_sent_by_node(udp_packets_df):
         #     break
     # print(packets_per_node)
     # print(packets_per_node.loc[packets_per_node["kem_algo"] == "p256"])
+    # print(packets_per_node.loc[packets_per_node["kem_algo"] == "bikel1"])
+    # print(packets_per_node.loc[packets_per_node["kem_algo"] == "p384_mlkem1024"])
+
+    # print(packets_per_node["server_last_packet_length"].unique())
+
+    # print(packets_per_node["client_last_packet_length"].unique())
+    # print(packets_per_node.loc[packets_per_node["client_last_packet_length"] == 151])
 
     nunique_and_count = packets_per_node.groupby("kem_algo").agg(
         {
@@ -102,21 +121,26 @@ def get_packets_sent_by_node(udp_packets_df):
             "server_sent_packets_count": ["nunique", "count"],
             "client_sent_packets_with_crypto_count": ["nunique"],
             "server_sent_packets_with_crypto_count": ["nunique"],
+            "client_last_packet_length": ["median"],
+            "server_last_packet_length": ["median"],
         }
     )
     nunique_and_count.columns = [
         "_".join(col).strip() for col in nunique_and_count.columns.values
     ]
-    assert (
-        (
-            nunique_and_count.client_sent_packets_count_count
-            == EXPECTED_MEASUREMENTS_PER_CONFIG
-        )
-        & (
-            nunique_and_count.server_sent_packets_count_count
-            == EXPECTED_MEASUREMENTS_PER_CONFIG
-        )
-    ).all()
+    print(nunique_and_count.columns)
+    # print(nunique_and_count.client_sent_packets_count_count)
+    # print(nunique_and_count.server_sent_packets_count_count)
+    # assert (
+    #     (
+    #         nunique_and_count.client_sent_packets_count_count
+    #         == EXPECTED_MEASUREMENTS_PER_CONFIG
+    #     )
+    #     & (
+    #         nunique_and_count.server_sent_packets_count_count
+    #         == EXPECTED_MEASUREMENTS_PER_CONFIG
+    #     )
+    # ).all()
     nunique_and_count = nunique_and_count.drop(
         columns=["client_sent_packets_count_count", "server_sent_packets_count_count"]
     )
@@ -126,12 +150,29 @@ def get_packets_sent_by_node(udp_packets_df):
     ).all()
     # print(nunique_and_count)
 
+    # assign to every kem_algo the median of the last packet length
+    for kem_algo in packets_per_node["kem_algo"].unique():
+        median_server_last_packet_length = nunique_and_count.loc[
+            nunique_and_count.index == kem_algo, "server_last_packet_length_median"
+        ].values[0]
+        packets_per_node.loc[
+            packets_per_node["kem_algo"] == kem_algo, "server_last_packet_length"
+        ] = median_server_last_packet_length
+        median_client_last_packet_length = nunique_and_count.loc[
+            nunique_and_count.index == kem_algo, "client_last_packet_length_median"
+        ].values[0]
+        packets_per_node.loc[
+            packets_per_node["kem_algo"] == kem_algo, "client_last_packet_length"
+        ] = median_client_last_packet_length
+
     # print(packets_per_node)
     packets_per_node_with_crypto = packets_per_node[
         [
             "kem_algo",
             "client_sent_packets_with_crypto_count",
             "server_sent_packets_with_crypto_count",
+            "server_last_packet_length",
+            "client_last_packet_length",
         ]
     ]
     # print(packets_per_node_with_crypto)
@@ -143,7 +184,7 @@ def get_packets_sent_by_node(udp_packets_df):
             ]
         )
     )
-    print(packets_per_node_with_crypto)
+    # print(packets_per_node_with_crypto)
     kem_characteristics_df = helper_functions.get_kem_characteristics()
     df = pd.merge(
         packets_per_node_with_crypto, kem_characteristics_df, on="kem_algo", how="left"
@@ -161,6 +202,7 @@ def get_packets_sent_by_node(udp_packets_df):
             "claimed_security",
             "length_secret_key",
             "length_shared_secret",
+            "client_last_packet_length",
         ]
     )
 
@@ -171,10 +213,18 @@ def get_packets_sent_by_node(udp_packets_df):
 
 def analyze_udp_packets(kem_id_df):
     cap = pyshark.FileCapture(
-        os.path.join("saved", "captures", f"capture_{DATESTRING}.pcap"),
+        os.path.join(
+            "saved",
+            "captures",
+            "new_for_packet_analyzation",
+            f"capture_{DATESTRING}.pcap",
+        ),
         override_prefs={
             "tls.keylog_file": os.path.join(
-                "saved", "captures", f"sslkeylogfile_{DATESTRING}.log"
+                "saved",
+                "captures",
+                "new_for_packet_analyzation",
+                f"sslkeylogfile_{DATESTRING}.log",
             )
         },
         display_filter="udp",
@@ -182,13 +232,22 @@ def analyze_udp_packets(kem_id_df):
     # print(cap)
     df = pd.DataFrame()
 
+    counter_no_quic_in_udp_packets = 0
     for idx, packet in enumerate(cap):
+        # print(packet.number)
+        if int(packet.number) > 5735:  # 5735 is where tlstcp starts
+            break
         # icmp messages with pings that contain quic, ignore them
         if "udp" not in packet:
             # print(packet)
             # print(packet.layers)
             continue
 
+        if "quic" not in packet:
+            counter_no_quic_in_udp_packets += 1
+            # print(f"No quic in udp packet {counter_no_quic_in_udp_packets}")
+            continue
+
         # if idx >= 2000:
         # if idx >= 6:
         # break
@@ -204,13 +263,17 @@ def analyze_udp_packets(kem_id_df):
 
         match ("scid" in packet.quic.field_names, "dcid" in packet.quic.field_names):
             case (True, True):
-                assert False, "Both scid and dcid are present"
+                scid = packet.quic.scid
+                dcid = packet.quic.dcid
             case (False, False):
-                cid = np.nan
+                scid = np.nan
+                dcid = np.nan
             case (True, False):
-                cid = packet.quic.scid
+                scid = packet.quic.scid
+                dcid = np.nan
             case (False, True):
-                cid = packet.quic.dcid
+                dcid = packet.quic.dcid
+                scid = np.nan
 
         # A packet can have multiple quic layers, the layers can have multiple fields with the same name, but they are hidden behind the all_fields attribute
         tls_handshake_types = []
@@ -240,6 +303,7 @@ def analyze_udp_packets(kem_id_df):
         assert len(no_crypto) > 0, "No quic layer"
         no_crypto = all(no_crypto)
 
+        # print(supported_group)
         df = pd.concat(
             [
                 df,
@@ -261,7 +325,8 @@ def analyze_udp_packets(kem_id_df):
                         "udp_length": [packet.udp.length],
                         "quic_length": [packet.quic.packet_length],
                         "wireshark_quic_cid": [packet.quic.connection_number],
-                        "quic_cid": [cid],
+                        "quic_scid": [scid],
+                        "quic_dcid": [dcid],
                         "supported_group": [supported_group],
                         "tls_handshake_type": [tls_handshake_types],
                         "no_crypto": [no_crypto],
@@ -282,10 +347,12 @@ def analyze_udp_packets(kem_id_df):
     df["quic_length"] = df["quic_length"].astype(int)
     df["wireshark_quic_cid"] = df["wireshark_quic_cid"].astype(int)
 
-    # supported groups do have hex string values, but with lowercase letters, so keep the x lowercase and transform the rest to uppercase
+    # supported groups do have hex string values, but with lowercase letters, so transform them to uppercase
     df["supported_group"] = df["supported_group"].apply(
-        lambda x: x[0:2] + x[2:].upper() if pd.notna(x) else np.nan
+        lambda x: f"0x{int(x, 16):04X}" if pd.notna(x) else np.nan
     )
+    # print(df["supported_group"].unique())
+    # print(kem_id_df.loc[int(kem_id_df["nid"]) == int("0xfe00"), "kem_name"])
     df["kem_algo"] = df["supported_group"].apply(
         lambda x: (
             kem_id_df.loc[kem_id_df["nid"] == x, "kem_name"].values[0]
@@ -297,7 +364,7 @@ def analyze_udp_packets(kem_id_df):
         lambda x: x.ffill().bfill()
     )
 
-    printdf = df.drop(columns=["srcport", "quic_cid"])
+    # printdf = df.drop(columns=["srcport", "quic_scid", "quic_dcid"])
     # print(printdf.head())
     # print(printdf.query("ID >= 689 and ID <= 699"))
     # print()
diff --git a/pq-tls-benchmark-framework/emulation-exp/code/kex/scripts/generate_graphs.py b/pq-tls-benchmark-framework/emulation-exp/code/kex/scripts/generate_graphs.py
index 0ad3804eb..b0253f60a 100755
--- a/pq-tls-benchmark-framework/emulation-exp/code/kex/scripts/generate_graphs.py
+++ b/pq-tls-benchmark-framework/emulation-exp/code/kex/scripts/generate_graphs.py
@@ -14,7 +14,7 @@ import scipy
 import helper_scripts.helper_functions as helper_functions
 import helper_scripts.performance_eval_of_oqs as performance_eval_of_oqs
 
-RESULTS_DIR = "saved/results-run-20250207-vm-p16"
+RESULTS_DIR = "saved/results-run-20250222-vm-p16"
 FILTER_RESULTS = []
 PLOTS_DIR = "plots"
 FEATHERS_DIR = "feathers"
diff --git a/pq-tls-benchmark-framework/emulation-exp/code/kex/scripts/queries.py b/pq-tls-benchmark-framework/emulation-exp/code/kex/scripts/queries.py
index 760fa8f23..8b129a8f5 100644
--- a/pq-tls-benchmark-framework/emulation-exp/code/kex/scripts/queries.py
+++ b/pq-tls-benchmark-framework/emulation-exp/code/kex/scripts/queries.py
@@ -27,13 +27,14 @@ def main():
 def bandwith_calcs():
     df = get_cic_and_sic()
 
-    def calc_delay_cuz_of_bandwidth_in_ms(cic, sic, bandwidth, packetlength=1200):
+    def calc_delay_cuz_of_bandwidth_in_ms(cic, sic, lsf, bandwidth, packetlength=1242):
         """
         Calculates the delay in ms caused by bandwidth.
 
         Args:
             cic: client initial count.
             sic: server initial count.
+            lsf: last server ethernet frame length.
             bandwidth: bandwidth in bits per second.
             packetlength: length of a packet in bytes.
 
@@ -41,12 +42,14 @@ def bandwith_calcs():
             delay in seconds.
         """
         SECONDS_IN_MS = 1000
-        return (cic + sic) * packetlength * 8 / bandwidth * SECONDS_IN_MS
+        return (
+            (((cic + (sic - 1)) * packetlength) + lsf) * 8 / bandwidth * SECONDS_IN_MS
+        )
 
     for bw in [0.1, 0.25, 0.5, 1, 3, 5, 500]:
         df[f"t_delay_{bw}"] = df.apply(
             lambda row: calc_delay_cuz_of_bandwidth_in_ms(
-                row["cic"], row["sic"], bw * 1000000
+                row["cic"], row["sic"], row["server_last_packet_length"], bw * 1000000
             ),
             axis=1,
         )
-- 
GitLab