import os import numpy as np import pandas as pd import pyshark import helper_scripts.helper_functions as helper_functions # NOTE there is also a packet called scapy which might work # To create a capture and sslkeylogfile, do the following: # First run setup.sh # Then run experiment.py with scenario_analyze_packets.csv while setting the variables to POOL_SIZE = 1, MEASUREMENTS_PER_TIMER = 5, TIMERS = 1 # Then run teardown.sh DATESTRING = "20240830153007" EXPECTED_DELAY = 10 # ms EXPECTED_MEASUREMENTS_PER_CONFIG = 5 def main(): kem_id_df = helper_functions.get_kem_ids() # print(kem_id_df) os.makedirs("feathers", exist_ok=True) if os.path.exists("feathers/udp_packets.feather"): udp_packets_df = pd.read_feather("feathers/udp_packets.feather") else: udp_packets_df = analyze_udp_packets(kem_id_df) udp_packets_df.to_feather("feathers/udp_packets.feather") get_packets_sent_by_node(udp_packets_df) def get_packets_sent_by_node(udp_packets_df): udp_packets_df = udp_packets_df.drop(columns=["srcport", "quic_cid"]) # print(udp_packets_df.head(20)) # print() i = 0 packets_per_node = pd.DataFrame() for g in udp_packets_df.groupby("wireshark_quic_cid"): # print(g[0]) # is the group number # print(g[1]) # is the dataframe of this group g_df = g[1] finished_row = g_df.loc[ (g_df["Sender"] == "Client") & (g_df["tls_handshake_type"].apply(lambda x: "Finished" in x)) ] if finished_row.empty: print( f"No finished row found for {i}, probably cuz an error, throwing away this connection, since it was probably retried" ) # print(g_df) continue # print(finished_row) # print("important", finished_row.iloc[0]["ID"]) # print("before", g_df) g_df = g_df.query(f"ID <= {finished_row.iloc[0]['ID']}") # print("after", g_df) # print() packets = g_df.groupby("Sender").size() packets_with_crypto = g_df.query("no_crypto == False").groupby("Sender").size() # if g_df["kem_algo"].iloc[0] == "p256": # print(finished_row.index[0]) # print(g_df) # print(g_df.query("Sender == 'Client'")) packets_per_node = pd.concat( [ packets_per_node, pd.DataFrame( { "wireshark_quic_cid": [g[0]], "kem_algo": g_df["kem_algo"].iloc[0], "client_sent_packets_count": packets["Client"], "server_sent_packets_count": packets["Server"], "client_sent_packets_with_crypto_count": packets_with_crypto[ "Client" ], "server_sent_packets_with_crypto_count": packets_with_crypto[ "Server" ], } ), ], ignore_index=True, ) i += 1 # if i >= 5: # break # print(packets_per_node) # print(packets_per_node.loc[packets_per_node["kem_algo"] == "p256"]) nunique_and_count = packets_per_node.groupby("kem_algo").agg( { "client_sent_packets_count": ["nunique", "count"], "server_sent_packets_count": ["nunique", "count"], "client_sent_packets_with_crypto_count": ["nunique"], "server_sent_packets_with_crypto_count": ["nunique"], } ) nunique_and_count.columns = [ "_".join(col).strip() for col in nunique_and_count.columns.values ] assert ( ( nunique_and_count.client_sent_packets_count_count == EXPECTED_MEASUREMENTS_PER_CONFIG ) & ( nunique_and_count.server_sent_packets_count_count == EXPECTED_MEASUREMENTS_PER_CONFIG ) ).all() nunique_and_count = nunique_and_count.drop( columns=["client_sent_packets_count_count", "server_sent_packets_count_count"] ) assert ( (nunique_and_count.client_sent_packets_with_crypto_count_nunique == 1) & (nunique_and_count.server_sent_packets_with_crypto_count_nunique == 1) ).all() # print(nunique_and_count) # print(packets_per_node) packets_per_node_with_crypto = packets_per_node[ [ "kem_algo", "client_sent_packets_with_crypto_count", "server_sent_packets_with_crypto_count", ] ] # print(packets_per_node_with_crypto) packets_per_node_with_crypto = ( packets_per_node_with_crypto.drop_duplicates().sort_values( by=[ "client_sent_packets_with_crypto_count", "server_sent_packets_with_crypto_count", ] ) ) print(packets_per_node_with_crypto) kem_characteristics_df = helper_functions.get_kem_characteristics() df = pd.merge( packets_per_node_with_crypto, kem_characteristics_df, on="kem_algo", how="left" ) # print(df) # print() # print(df.loc[df["kem_algo"] == "p256_mlkem512", "length_public_key"]) df = helper_functions.fill_in_kem_characteristics_for_hybrid_kems(df) # df["length_secret_key"] = df["length_secret_key"].astype(int) # df["length_shared_secret"] = df["length_shared_secret"].astype(int) df = df.drop( columns=[ "claimed_nist_level", "claimed_security", "length_secret_key", "length_shared_secret", ] ) # print(df.info()) print(df) return df def analyze_udp_packets(kem_id_df): cap = pyshark.FileCapture( os.path.join("saved", "captures", f"capture_{DATESTRING}.pcap"), override_prefs={ "tls.keylog_file": os.path.join( "saved", "captures", f"sslkeylogfile_{DATESTRING}.log" ) }, display_filter="udp", ) # print(cap) df = pd.DataFrame() for idx, packet in enumerate(cap): # icmp messages with pings that contain quic, ignore them if "udp" not in packet: # print(packet) # print(packet.layers) continue # if idx >= 2000: # if idx >= 6: # break # print(packet.number) # print(packet.layers) # print(packet.ip.field_names) # ['version', 'hdr_len', 'dsfield', 'dsfield_dscp', 'dsfield_ecn', 'len', 'id', 'flags', 'flags_rb', 'flags_df', 'flags_mf', 'frag_offset', 'ttl', 'proto', 'checksum', 'checksum_status', 'src', 'addr', 'src_host', 'host', 'dst', 'dst_host'] # print(packet.eth.field_names) # ['dst', 'dst_resolved', 'dst_oui', 'dst_oui_resolved', 'addr', 'addr_resolved', 'addr_oui', 'addr_oui_resolved', 'dst_lg', 'lg', 'dst_ig', 'ig', 'src', 'src_resolved', 'src_oui', 'src_oui_resolved', 'src_lg', 'src_ig', 'type'] # print(packet.udp.field_names) # ['srcport', 'dstport', 'port', 'length', 'checksum', 'checksum_status', 'stream', '', 'time_relative', 'time_delta', 'payload'] # if packet.number == "695" or packet.number == "696": # for quic_layer in packet.get_multiple_layers("quic"): # print(packet.number, quic_layer.field_names) match ("scid" in packet.quic.field_names, "dcid" in packet.quic.field_names): case (True, True): assert False, "Both scid and dcid are present" case (False, False): cid = np.nan case (True, False): cid = packet.quic.scid case (False, True): cid = packet.quic.dcid # A packet can have multiple quic layers, the layers can have multiple fields with the same name, but they are hidden behind the all_fields attribute tls_handshake_types = [] for quic_layer in packet.get_multiple_layers("quic"): if "tls_handshake_type" in quic_layer.field_names: for field in quic_layer.tls_handshake_type.all_fields: tls_handshake_types.append(field.show) tls_handshake_types = map_tls_handshake_types(tls_handshake_types) # The naming inside of wireshark of the kem algos is not correct all the time supported_group = np.nan if "Client Hello" in tls_handshake_types: for quic_layer in packet.get_multiple_layers("quic"): if "tls_handshake_extensions_supported_group" in quic_layer.field_names: # only shows the first of the supported groups, but fine in our context, when only looking at the client hello supported_group = ( quic_layer.tls_handshake_extensions_supported_group ) # no_crypto is only correct for the quic packets sent in the handshake, not for the packets sent after the handshake no_crypto = [] for quic_layer in packet.get_multiple_layers("quic"): if "crypto_offset" in quic_layer.field_names: no_crypto.append(False) else: no_crypto.append(True) assert len(no_crypto) > 0, "No quic layer" no_crypto = all(no_crypto) df = pd.concat( [ df, pd.DataFrame( { "ID": [packet.number], "Sender": [ ( "Server" if packet.eth.src == "00:00:00:00:00:01" else "Client" ) ], "srcport": [packet.udp.srcport], "time_relative": [packet.udp.time_relative], "time_delta": [packet.udp.time_delta], "frame_length": [packet.length], "ip_length": [packet.ip.len], "udp_length": [packet.udp.length], "quic_length": [packet.quic.packet_length], "wireshark_quic_cid": [packet.quic.connection_number], "quic_cid": [cid], "supported_group": [supported_group], "tls_handshake_type": [tls_handshake_types], "no_crypto": [no_crypto], } ), ], ignore_index=True, ) # change type from str to int df["ID"] = df["ID"].astype(int) df["srcport"] = df["srcport"].astype(int) df["time_relative"] = df["time_relative"].astype(float) df["time_delta"] = df["time_delta"].astype(float) df["frame_length"] = df["frame_length"].astype(int) df["ip_length"] = df["ip_length"].astype(int) df["udp_length"] = df["udp_length"].astype(int) df["quic_length"] = df["quic_length"].astype(int) df["wireshark_quic_cid"] = df["wireshark_quic_cid"].astype(int) # supported groups do have hex string values, but with lowercase letters, so keep the x lowercase and transform the rest to uppercase df["supported_group"] = df["supported_group"].apply( lambda x: x[0:2] + x[2:].upper() if pd.notna(x) else np.nan ) df["kem_algo"] = df["supported_group"].apply( lambda x: ( kem_id_df.loc[kem_id_df["nid"] == x, "kem_name"].values[0] if pd.notna(x) else np.nan ) ) df["kem_algo"] = df.groupby("wireshark_quic_cid")["kem_algo"].transform( lambda x: x.ffill().bfill() ) printdf = df.drop(columns=["srcport", "quic_cid"]) # print(printdf.head()) # print(printdf.query("ID >= 689 and ID <= 699")) # print() # print(printdf.query("ID >= 1657 and ID <= 1680")) return df def map_tls_handshake_types(handshake_types): tls_handshake_type_map = { "1": "Client Hello", "2": "Server Hello", "4": "New Session Ticket", "8": "Encrypted Extensions", "11": "Certificate", "12": "Server Key Exchange", "13": "Certificate Request", "14": "Server Hello Done", "15": "Certificate Verify", "16": "Client Key Exchange", "20": "Finished", } return [ tls_handshake_type_map.get( handshake_type, f"Unknown tls_handshake_type {handshake_type}" ) for handshake_type in handshake_types ] if __name__ == "__main__": main()