diff --git a/pq-tls-benchmark-framework/emulation-exp/code/kex/README.md b/pq-tls-benchmark-framework/emulation-exp/code/kex/README.md
index 7b1e77d61571a70ac6fe7cd34ff819d990ca5c36..29dc6b98e56fae6c51fd6af97533f3db4f685b24 100644
--- a/pq-tls-benchmark-framework/emulation-exp/code/kex/README.md
+++ b/pq-tls-benchmark-framework/emulation-exp/code/kex/README.md
@@ -24,7 +24,7 @@ To run the experiment with a parallelism of 16, you need aproximately 24 hours.
 In `benchmarking-pqc-in-quic/pq-tls-benchmark-framework/emulation-exp/code/kex`:
 ```bash
     ./scripts/setup.sh $(nproc)
-    sudo .venv/bin/python scripts/experiment.py testscenarios/scenario_static.csv testscenarios/scenario_corrupt.csv tests cenarios/scenario_delay.csv testscenarios/scenario_duplicate.csv testscenarios/scenario_jitter_delay20ms.csv testscenarios/scenario_packetloss.csv testscenarios/scenario_rate_both.csv testscenarios/scenario_rate_client.csv testscenarios/scenario_rate_server.csv testscenarios/scenario_reorder.csv
+    sudo .venv/bin/python scripts/experiment.py testscenarios/scenario_static.csv testscenarios/scenario_corrupt.csv tests cenarios/scenario_delay.csv testscenarios/scenario_duplicate.csv testscenarios/scenario_jitter_delay20ms.csv testscenarios/scenario_packetloss.csv testscenarios/scenario_rate_both.csv testscenarios/scenario_rate_client.csv testscenarios/scenario_rate_server.csv testscenarios/scenario_reorder.csv | tee stdout.log
     ./scripts/teardown.sh $(nproc)
 ```
 
diff --git a/pq-tls-benchmark-framework/emulation-exp/code/kex/quic_s_timer.c b/pq-tls-benchmark-framework/emulation-exp/code/kex/quic_s_timer.c
index b7b5b98431d63ea9fadd48b9c939d2041437bc9b..dc14a3872e58e34a67a582c2da83640a1113a761 100644
--- a/pq-tls-benchmark-framework/emulation-exp/code/kex/quic_s_timer.c
+++ b/pq-tls-benchmark-framework/emulation-exp/code/kex/quic_s_timer.c
@@ -247,6 +247,7 @@ int main(int argc, char *argv[])
     size_t measurements = 0;
     struct timespec start, finish;
     double *handshake_times_ms = malloc(measurements_to_make * sizeof(*handshake_times_ms));
+    int error_count = 0;
     while (measurements < measurements_to_make)
     {
         clock_gettime(CLOCK_MONOTONIC_RAW, &start);
@@ -260,6 +261,7 @@ int main(int argc, char *argv[])
              * Non-retryable errors are caught by manual
              * inspection of logs, which has sufficed
              * for our purposes */
+            error_count += 1;
             continue;
         }
 
@@ -280,6 +282,7 @@ int main(int argc, char *argv[])
         measurements++;
     }
 
+    printf("%d;", error_count);
     for (size_t i = 0; i < measurements - 1; i++)
     {
         printf("%f,", handshake_times_ms[i]);
diff --git a/pq-tls-benchmark-framework/emulation-exp/code/kex/s_timer.c b/pq-tls-benchmark-framework/emulation-exp/code/kex/s_timer.c
index 4d91f7751e0c042dbade830da98b51458da513a7..a522504edea3fcd48b3ae1e46c585945c4cef815 100644
--- a/pq-tls-benchmark-framework/emulation-exp/code/kex/s_timer.c
+++ b/pq-tls-benchmark-framework/emulation-exp/code/kex/s_timer.c
@@ -133,6 +133,7 @@ int main(int argc, char* argv[])
     SSL_CTX_set_verify(ssl_ctx, SSL_VERIFY_PEER, NULL);
 
     // Start experiments
+    int error_count = 0;
     while(measurements < measurements_to_make)
     {
         clock_gettime(CLOCK_MONOTONIC_RAW, &start);
@@ -145,6 +146,7 @@ int main(int argc, char* argv[])
              * Non-retryable errors are caught by manual
              * inspection of logs, which has sufficed
              * for our purposes */
+            error_count += 1;
             continue;
         }
 
@@ -161,6 +163,7 @@ int main(int argc, char* argv[])
         measurements++;
     }
 
+    printf("%d;", error_count);
     for(size_t i = 0; i < measurements - 1; i++)
     {
         printf("%f,", handshake_times_ms[i]);
diff --git a/pq-tls-benchmark-framework/emulation-exp/code/kex/scripts/experiment.py b/pq-tls-benchmark-framework/emulation-exp/code/kex/scripts/experiment.py
index 6fb6f7ff036b19cd876dee4d27c9655c7e1beaec..2601895adbd763f318e0f7c9a7c30d7dc62be23e 100755
--- a/pq-tls-benchmark-framework/emulation-exp/code/kex/scripts/experiment.py
+++ b/pq-tls-benchmark-framework/emulation-exp/code/kex/scripts/experiment.py
@@ -72,8 +72,10 @@ def main():
                             path_to_results_csv_file,
                             "a",
                         ) as out:
-                            result = run_timers(timer_pool, protocol, kem_alg)
-                            csv.writer(out).writerow(result)
+                            error_count, result = run_timers(
+                                timer_pool, protocol, kem_alg
+                            )
+                            csv.writer(out).writerow([error_count, *result])
 
     timer_pool.close()
     timer_pool.join()
@@ -178,7 +180,12 @@ def run_timers(timer_pool, protocol, kem_alg):
     results_nested = timer_pool.starmap(
         time_handshake, [(protocol, kem_alg, MEASUREMENTS_PER_TIMER)] * TIMERS
     )
-    return [item for sublist in results_nested for item in sublist]
+    # results_nested is a list of tuples, which contain the errors_count and the list of measurements
+    error_count_aggregated = sum([error_count for error_count, _ in results_nested])
+    results_nested = [measurements for _, measurements in results_nested]
+    return error_count_aggregated, [
+        item for sublist in results_nested for item in sublist
+    ]
 
 
 # do TLS handshake (s_timer.c)
@@ -214,7 +221,9 @@ def time_handshake(protocol, kem_alg, measurements) -> list[float]:
     ]
     result = run_subprocess(command)
     release_network_namespace(network_namespace)
-    return [float(i) for i in result.strip().split(",")]
+    error_count, result = result.split(";")
+    error_count = int(error_count)
+    return error_count, [float(i) for i in result.strip().split(",")]
 
 
 def run_subprocess(command, working_dir=".", expected_returncode=0) -> str:
diff --git a/pq-tls-benchmark-framework/emulation-exp/code/kex/scripts/generate_graphs.py b/pq-tls-benchmark-framework/emulation-exp/code/kex/scripts/generate_graphs.py
index 4992ca08e224ca6aee9a879cd0f50c6407f84428..181cb8dc049f4ffa6bd114fdc419c66410dbad98 100755
--- a/pq-tls-benchmark-framework/emulation-exp/code/kex/scripts/generate_graphs.py
+++ b/pq-tls-benchmark-framework/emulation-exp/code/kex/scripts/generate_graphs.py
@@ -62,6 +62,8 @@ def read_data_into_pandas():
             "cli_corrupt",
             "cli_reorder",
             "cli_rate",
+            "error_count",
+            "error_rate",
             "measurements",
             "mean",
             "std",
@@ -101,8 +103,9 @@ def read_data_into_pandas():
 
         assert len(result_file_data.columns) == len(df_scenariofile)
         for i in range(len(result_file_data.columns)):
-            measurements = result_file_data.iloc[:, i].tolist()
-            measurements = np.array(measurements)
+            measurements_and_error_count = result_file_data.iloc[:, i].tolist()
+            error_count = measurements_and_error_count[0]
+            measurements = np.array(measurements_and_error_count[1:])
             data.loc[len(data)] = {
                 "scenario": scenario,
                 "protocol": protocol,
@@ -122,6 +125,8 @@ def read_data_into_pandas():
                 "cli_corrupt": df_scenariofile.iloc[i]["cli_corrupt"],
                 "cli_reorder": df_scenariofile.iloc[i]["cli_reorder"],
                 "cli_rate": df_scenariofile.iloc[i]["cli_rate"],
+                "error_count": error_count,
+                "error_rate": error_count / len(measurements),
                 "measurements": measurements,
                 "mean": np.mean(measurements),
                 "std": np.std(measurements),