From 7c524b3c2c8e0d3b3952bf4f2cc97ba28aa00560 Mon Sep 17 00:00:00 2001
From: cameron-a-johnson <cameron.johnson@kitware.com>
Date: Tue, 24 Dec 2024 16:30:13 -0500
Subject: [PATCH] working F1 and SMD metrics

---
 .../global_step_predictor.py                  | 21 ++++--
 .../global_step_prediction/run_expirement.py  | 71 +++++++++++++++++--
 2 files changed, 79 insertions(+), 13 deletions(-)

diff --git a/angel_system/global_step_prediction/global_step_predictor.py b/angel_system/global_step_prediction/global_step_predictor.py
index 36c934d2f..afdaa91a5 100644
--- a/angel_system/global_step_prediction/global_step_predictor.py
+++ b/angel_system/global_step_prediction/global_step_predictor.py
@@ -951,26 +951,33 @@ def save_TP_FP_FN_per_class(self, step_gts):
         Output form: TP, FP, FN, where each is an N-dimensional vector, where
         N = number of step classes.
         """
-        step_gts = [float(i) for i in step_gts][24:]
         assert len(self.trackers) == 1
         for i, tracker in enumerate(self.trackers):
             #import ipdb; ipdb.set_trace()
             step_predictions = tracker["granular_step_prediction_history"]
-            if len(step_predictions) * 1.8 < len(step_gts):
-                # This must be a 30Hz video, not 15Hz. Take every other GT frame.
-                step_gts = [a for ind, a in enumerate(step_gts) if i%2==0]
         TP = np.zeros(len(self.avg_probs))
         FP = np.zeros(len(self.avg_probs))
         FN = np.zeros(len(self.avg_probs))
         for ind in range(len(self.avg_probs)):
             # i = the index we'll get TP, FN, and FP for.
-            _TP = len([a for j, a in enumerate(step_predictions) if a == ind and step_gts[j] == ind])
+            _TP = len([a for j, a in enumerate(step_predictions[:len(step_gts)]) if a == ind and step_gts[j] == ind])
             TP[ind] = _TP
-            _FP = len([a for j, a in enumerate(step_predictions) if a == ind and step_gts[j] != ind])
+            _FP = len([a for j, a in enumerate(step_predictions[:len(step_gts)]) if a == ind and step_gts[j] != ind])
             FP[ind] = _FP
-            _FN = len([a for j, a in enumerate(step_predictions) if a != ind and step_gts[j] == ind])
+            _FN = len([a for j, a in enumerate(step_predictions[:len(step_gts)]) if a != ind and step_gts[j] == ind])
             FN[ind] = _FN
         return TP, FP, FN
+    def get_single_tracker_pred_history(self):
+        """
+        Get a single tracker's prediction history.
+        For now, this is only functional in the case of a single tracker instance
+        being initialized.
+
+        Output:
+        - list: prediction history. Length = number of frames processed.
+        """
+        assert len(self.trackers) == 1
+        return self.trackers[0]["granular_step_prediction_history"]
 
     def sanitize_str(self, str_: str):
         """
diff --git a/angel_system/global_step_prediction/run_expirement.py b/angel_system/global_step_prediction/run_expirement.py
index 35cd5f7bb..32ca3edfb 100644
--- a/angel_system/global_step_prediction/run_expirement.py
+++ b/angel_system/global_step_prediction/run_expirement.py
@@ -32,11 +32,18 @@ def run_inference_all_vids(
     all_vid_ids = np.unique(np.asarray(coco_test.images().lookup("video_id")))
     avg_probs = None
     preds, gt = [], []
+    avg_smd, avg_smd_normd = np.array([]), np.array([])
+    mean_F1s = np.array([])
     for vid_id in all_vid_ids:
         print(f"vid_id {vid_id}===========================")
 
         act_path = code_dir / "config/activity_labels/medical" / f"{medical_task}.yaml"
 
+        # Get framerate
+        framerate = round(coco_test.videos(video_ids=[vid_id]).peek()['framerate'])
+        # Assuming FR = 15 or 30
+        assert framerate in [15, 30], f"framerate rounded to {framerate}"
+
         step_predictor = GlobalStepPredictor(
             recipe_types=[f"{medical_task}"],
             activity_config_fpath=act_path.as_posix(),
@@ -46,8 +53,8 @@ def run_inference_all_vids(
                 / "config/tasks/medical"
                 / f"{medical_task}.yaml"
             },
-            # threshold_multiplier=0.3,
-            # threshold_frame_count=2
+            threshold_multiplier=0.6,
+            threshold_frame_count=5
         )
 
         if avg_probs is not None:
@@ -76,6 +83,15 @@ def run_inference_all_vids(
         # All N activity confs x each video frame
         activity_confs = test_video_dset.annots().get("prob")
         activity_gts = truth_video_dset.annots().get("category_id")
+        # If framerate is 30, take every other frame.
+        if framerate == 30:
+            # This must be a 30Hz video, not 15Hz. Take every other GT frame.
+            print(f"halve gt for {vid_id}. Len = {len(activity_gts)}")
+            activity_gts = [a for ind, a in enumerate(activity_gts) if ind%2==0]
+            print(f"new len = {len(activity_gts)}")
+        # ...and cut out the first 25 frames.
+        activity_gts = activity_gts[25:]
+
 
         def get_unique(activity_ids):
             """
@@ -106,7 +122,12 @@ def get_unique(activity_ids):
 
         print(f"unique broad steps: {get_unique(broad_step_gts)}")
 
-        _TP, _FP, _FN = step_predictor.save_TP_FP_FN_per_class(granular_step_gts)
+        # activity_gt_maxes[i] set to max step so far at index i.
+        activity_gt_maxes = activity_gts.copy()
+        for ind in range(1, len(activity_gts)):
+            activity_gt_maxes[ind] = max(activity_gts[:ind])
+
+        _TP, _FP, _FN = step_predictor.save_TP_FP_FN_per_class(activity_gt_maxes)
         if 'TP' in locals():
             TP += _TP
             FP += _FP
@@ -118,12 +139,51 @@ def get_unique(activity_ids):
         class_F1s, mean_F1 = compute_class_f1s_and_mean_f1(TP,FP,FN)
         print(f"class-wise F1s: {class_F1s}\nmean F1: {mean_F1}")
 
+        mean_F1s = np.append(mean_F1s, mean_F1)
+
+        pred_history = step_predictor.get_single_tracker_pred_history()
+        smds, smds_normd = get_start_moment_distances(pred_history, activity_gt_maxes)
+
+        avg_smd = np.append(avg_smd, np.mean(smds))
+        avg_smd_normd = np.append(avg_smd_normd, np.mean(smds_normd))
+
+        print(f"smds (# frames): {smds}, normalized:{smds_normd}")
+        try:
+            print(f"avg frame-wise smd:{avg_smd[-1]}, normalized: {avg_smd_normd[-1]}")
+        except:
+            import ipdb; ipdb.set_trace()
+
         _ = step_predictor.plot_gt_vs_predicted_one_recipe(
-            granular_step_gts,
+            activity_gts,
             recipe_type,
             fname_suffix=f"{str(vid_id)}_granular_{extra_output_suffix}",
             granular_or_broad="granular",
         )
+    print("########## OVERALL")
+    print(f"Overall average smd: {np.mean(avg_smd)}. Normalized: {np.mean(avg_smd_normd)}")
+    print(f"overall mean F1: {np.mean(mean_F1s)}")
+
+def get_start_moment_distances(pred_history, activity_gt_maxes):
+    """
+    Get the distance between ground truth & predictions of the starting frame of
+    each step.
+
+    Outputs:
+    - smds: list of length equal to number 
+    """
+    num_classes = max(activity_gt_maxes)
+    vid_length = len(activity_gt_maxes)
+    smds = np.zeros(num_classes)
+    smds_normd = np.zeros(num_classes)
+    for i in range(num_classes):
+        if i+1 in pred_history:
+            smds[i] = abs(np.where(pred_history == i+1)[0][0] - activity_gt_maxes.index(i+1))
+            smds_normd[i] = smds[i] / vid_length
+        else:
+            smds[i] = vid_length
+            smds_normd[i] = 1
+    return smds, smds_normd
+
 
 def compute_class_f1s_and_mean_f1(TP,FP,FN):
     F1s = np.zeros(len(TP))
@@ -131,10 +191,9 @@ def compute_class_f1s_and_mean_f1(TP,FP,FN):
     for i in range(len(TP)):
         F1s[i] = 2*TP[i] / (2*TP[i] + FP[i] + FN[i])
     # mean F1
-    mean_F1 = 2*np.sum(TP) / (2*np.sum(TP) + np.sum(FP) + np.sum(FN))
+    mean_F1 = 2*np.sum(TP[:-1]) / (2*np.sum(TP[:-1]) + np.sum(FP[:-1]) + np.sum(FN[:-1]))
     return F1s, mean_F1
 
-
 @click.command(context_settings={"help_option_names": ["-h", "--help"]})
 @click.argument(
     "medical_task",