Merge pull request #32 from bastian-src/fix/pattern_standardization

Fix RNTI matching pattern standardization
bastian-src · Dec 1, 2024 · 3a676be · 3a676be
2 parents b14e804 + 6097a33
commit 3a676be
Show file tree

Hide file tree

Showing 3 changed files with 106 additions and 15 deletions.
diff --git a/README.md b/README.md
@@ -63,6 +63,84 @@ sudo sysctl -w net.ipv6.conf.lo.disable_ipv6=1
 
 The configuration can be made persistently by editing/adding `/etc/sysctl.d/`.
 
+## Scripts
+
+### Visualize RNTI Matching
+
+#### Diashow
+
+<details>
+<summary>Click to expand</summary>
+
+Visualize the UL traffic of all RNTIs that are left after applying the
+pre-filter. Provide `--rnti RNTI` in case you want to filter for a
+certain RNTI in the first place.
+
+The output shows how many RNTIs were filtered by the corresponding pre-filter.
+
+Example usage *without* an explicit RNTI:
+
+```
+./scripts/visualize_rnti_matching.py --path ".logs.ue/run-<run-date>/rnti_matching/run_<run-date>_traffic_collection.jsonl" diashow
+```
+
+</details>
+
+#### Standardize
+
+<details>
+<summary>Click to expand</summary>
+
+Print the standardization vector of certain RNTI's traffic.
+
+When you provide a rnti, only the records where the RNTI occurs
+(it might be removed by the pre-filter!) is used for standardization.
+
+If you don't provide an RNTI explicitly, it uses the traffic of the
+RNTI with the most number of UL occurrences.
+
+Example usage *without* an explicit RNTI:
+
+```
+./scripts/visualize_rnti_matching.py --path ".logs.ue/run-<run-date>/rnti_matching/run_<run-date>_traffic_collection.jsonl" standardize
+[...]
+DEBUG [determine_highest_count_ul_timeline] rnti: 34135 | count: 1873
+DEBUG [determine_highest_count_ul_timeline] rnti: 34226 | count: 1186
+DEBUG [determine_highest_count_ul_timeline] rnti: 34319 | count: 1166
+DEBUG [determine_highest_count_ul_timeline] rnti: 33123 | count: 1619
+DEBUG [determine_highest_count_ul_timeline] rnti: 54112 | count: 1529
+vec![
+    (2735.217, 2362.898),
+    (564014.484, 336306.997),
+    (65.652, 55.473),
+    (327.697, 249.128),
+    (428706.906, 643780.033),
+    (4422.802, 2244.733),
+    (6125.165, 2793.039),
+    (156940930.077, 382279093.565)
+],
+```
+
+Example usage *with* an explicit RNTI:
+
+```
+./scripts/visualize_rnti_matching.py --path ".logs.ue/run-<run-date>/rnti_matching/run_<run-date>_traffic_collection.jsonl" standardize --rnti 34226
+[...]
+DEBUG [determine_highest_count_ul_timeline] rnti: 34226 | count: 1529
+vec![
+    (2735.217, 2362.898),
+    (564014.484, 336306.997),
+    (65.652, 55.473),
+    (327.697, 249.128),
+    (428706.906, 643780.033),
+    (4422.802, 2244.733),
+    (6125.165, 2793.039),
+    (156940930.077, 382279093.565)
+],
+```
+
+</details>
+
 ## Data
 
 Example data and results can be found [here](https://nextcloud.schmidt-systems.eu/s/AYqZDwtWxAeQY8N).
diff --git a/scripts/requirements.txt b/scripts/requirements.txt
@@ -1,10 +1,8 @@
 flask
-marshmallow
-matplotlib
-jsonlines
-seaborn
-json
-linecache
-pandas
-pyarrow
-glob
+marshmallow==3.21.3
+matplotlib==3.9.1
+jsonlines==4.0.0
+seaborn==0.13.2
+pandas==2.1.4
+pyarrow==17.0.0
+numpy==1.24.4
diff --git a/scripts/visualize_rnti_matching.py b/scripts/visualize_rnti_matching.py
@@ -210,7 +210,7 @@ def filter_dataset_fast(settings, raw_dataset) -> FilteredRecording:
         flattened: dict = {}
         for rnti, ue_data in cell_data['traffic'].items():
             if hasattr(settings, 'rnti'):
-                if settings.rnti is not None and rnti != settings.target_rnti:
+                if settings.rnti is not None and rnti != settings.rnti:
                     result.skipped_not_target_rnti += 1
                     continue
 
@@ -446,13 +446,20 @@ def prev(self, _):
 
 def standardize(settings):
     all_recordings = read_all_recordings(settings)
+    if len(all_recordings) == 0:
+        print_debug("ERROR!\n\nNo records left after applying pre-filter.\n\n")
+        raise Exception("No records available after applying pre-filter.")
     print(f"DEBUG len(all_data) {len(all_recordings)}")
     ul_timeline_matrix = np.zeros((len(all_recordings), 3))
     dci_time_deltas_matrix = np.zeros((len(all_recordings), 3))
     count_vec = np.zeros((len(all_recordings), 1))
     total_ul_vec = np.zeros((len(all_recordings), 1))
     for (index, recordings) in enumerate(all_recordings):
-        (ul_timeline, dci_time_deltas, count, total_ul_bytes) = determine_highest_count_ul_timeline(recordings)
+        try:
+            (ul_timeline, dci_time_deltas, count, total_ul_bytes) = determine_highest_count_ul_timeline(settings, recordings)
+        except Exception as e:
+            print_debug(f"Skipping dataset [{index}]: {e}")
+            continue
         count_vec[index] = count
         total_ul_vec[index] = total_ul_bytes
 
@@ -463,6 +470,9 @@ def standardize(settings):
         dci_time_deltas_matrix[index, 0] = np.median(dci_time_deltas)
         dci_time_deltas_matrix[index, 1] = np.mean(dci_time_deltas)
         dci_time_deltas_matrix[index, 2] = np.var(dci_time_deltas)
+    if len(count_vec) == 0:
+        print_debug("ERROR!\n\nNo record available. Maybe, you provided the wrong RNTI or all your RNTI traffic was removed in the static pre-filter?\n\n")
+        raise Exception("No record available to determine standardization parameters.")
 
     std_count = (np.mean(count_vec), np.std(count_vec))
     std_total_ul = (np.mean(total_ul_vec), np.std(total_ul_vec))
@@ -524,12 +534,14 @@ def read_all_recordings(settings):
     return all_runs
 
 
-def determine_highest_count_ul_timeline(df):
-    rnti = "11852"
+def determine_highest_count_ul_timeline(settings, df):
+    rnti = settings.rnti
     target_traffic: pd.DataFrame = pd.DataFrame()
 
-    if not rnti in df.columns:
+    if rnti is None:
         rnti = df.count().idxmax()
+    elif rnti not in df.columns:
+        raise Exception("Target RNTI was not part of record.")
     target_traffic = df[rnti].dropna()
 
     ul_timeline = target_traffic.values
@@ -784,7 +796,10 @@ def plot_basic_filtered(settings, recording):
 
     # standardize subcommand
     parser_standardize = subparsers.add_parser('standardize', help='Run standardize mode')
-
+    parser_standardize.add_argument('--rnti',
+                                    type=str,
+                                    default=None,
+                                    help='Use a recording only if this RNTI is part of it. Otherwise, use the RNTI with the most UL occurences.')
     # export subcommand
     parser_export = subparsers.add_parser('export', help='Run export mode')
     parser_export.add_argument('--export-path',