Mar 6th

Revision for README and the codes in tc-ssn.
coin-dataset · Mar 6, 2019 · 02026c7 · 02026c7
1 parent 3eda4a6
commit 02026c7
Show file tree

Hide file tree

Showing 38 changed files with 110 additions and 225 deletions.
diff --git a/README.md b/README.md
@@ -17,8 +17,13 @@ Note that, these methods use frame-wise fisher vector as video representation, w
 
 ### References
 [1] Y. Zhao, Y. Xiong, L. Wang, Z. Wu, X. Tang, and D. Lin. Temporal action detection with structured segment networks. In ICCV, pages 2933–2942, 2017.
+
 [2] H. Xu, A. Das, and K. Saenko. R-C3D: region convolutional 3d network for temporal activity detection. In ICCV, pages 5794–5803, 2017.
+
 [3] A. Richard, H. Kuehne, and J. Gall. Action sets: Weakly supervised action segmentation without ordering constraints. In CVPR, pages 5987–5996, 2018.
+
 [4] A. Richard, H. Kuehne, A. Iqbal, and J. Gall. Neuralnetwork-viterbi: A framework for weakly supervised video learning. In CVPR, pages 7386–7395, 2018.
+
 [5] L. Ding and C. Xu. Weakly-supervised action segmentation with iterative soft boundary assignment. In CVPR, pages 6508–6516, 2018.
-[6] J. Donahue, L. A. Hendricks, M. Rohrbach, S. Venugopalan, S. Guadarrama, K. Saenko, and T. Darrell. Long-term recurrent convolutional networks for visual recognition and description. TPAMI, 39(4):677–691, 2017.
+
+[6] J. Donahue, L. A. Hendricks, M. Rohrbach, S. Venugopalan, S. Guadarrama, K. Saenko, and T. Darrell. Long-term recurrent convolutional networks for visual recognition and description. TPAMI, 39(4):677–691, 2017.
diff --git a/tc-ssn/README.md b/tc-ssn/README.md
@@ -9,6 +9,20 @@
   - terminaltables 3.1.0
   - pandas 0.23.4
 
+### The structure of SSN score file
+
+The score file dumped by SSN is in format of `pkl`. It is serialised from a python `dict` in which the paths of video frames serve as keys and a 4-element tuple of numpy arrays serve as values. The meaning of four arrays is described as following:
+
+* The shape of the 1st array in the tuple is (N,2) where N denotes the proposal number. The elements in this array indicates the lower and higher bounds of the proposal ranges.
+* The shape of the 2nd array in the tuple is (N,K+1) where K denotes the number of action classes. There are the actionness scores in this array.
+* The shape of the 3rd array in the tuple is (N,K). There are the completeness scores presented by SSN in this array.
+* The shape of the 4th array in the tuple is (N,K,2). There are the regression scores in this array. The regression score is given as a 2-element array \[`center_regression`, `duration_regression`\]. The regression operation could be formularised as:
+
+```
+regressed_center = range_renter+range_duration*center_regression
+regressed_duration = range_duration*exp(duration_regression)
+```
+
 ### Get combined score file
 
 The standalone score file of combined scores is required while refining the combined scores of RGB and Flow modality. The program derived from the original evaluation program is used to export the combined scores to a standalone `pkl` file. These programs are `fusion_pkl_generation_eval_detection_results.py` and `fusion_eval_detection_results.py`. Either the program exports the same `pkl` file.
@@ -49,4 +63,4 @@ python3 combined_refine.py -c <npy_constrains> -i <src_scores> -o <refined_score
 
 ```sh
 python3 combined_eval_detection_results.py coin_small <combined_score> --externel_score <external_score>
-```
+```
diff --git a/tc-ssn/anet_toolkit/.gitignore b/tc-ssn/anet_toolkit/.gitignore
diff --git a/tc-ssn/anet_toolkit/Evaluation/eval_detection.py b/tc-ssn/anet_toolkit/Evaluation/eval_detection.py
diff --git a/tc-ssn/anet_toolkit/Evaluation/utils.py b/tc-ssn/anet_toolkit/Evaluation/utils.py
diff --git a/tc-ssn/combined_eval_detection_results.py b/tc-ssn/combined_eval_detection_results.py
@@ -43,8 +43,6 @@
 num_class = dataset_configs['num_class']
 test_prop_file = 'data/{}_proposal_list.txt'.format(dataset_configs['test_list'])
 evaluate.number_label = num_class
-# print('hhh')
-# print(test_prop_file)
 
 nms_threshold = args.nms_threshold if args.nms_threshold else dataset_configs['evaluation']['nms_threshold']
 top_k = args.top_k if args.top_k else dataset_configs['evaluation']['top_k']
@@ -125,7 +123,7 @@ def gen_detection_results(video_id, score_tp):
 
 		# load combined scores from external numpys
 		ex_vid = video_id.split("/")[-1]
-		ex_scores = np.load(os.path.join(args.externel_score,ex_vid + ".npy"))
+		ex_scores = np.load(os.path.join(args.externel_score,"proposal_" + ex_vid + ".npy"))
 		combined_scores = ex_scores[:,:,4]
 
 		keep_idx = np.argsort(combined_scores.ravel())[-top_k:]
@@ -245,49 +243,39 @@ def callback(rst):
 	ap_values[rst[0], rst[1]] = rst[2][0]
 	ar_values[rst[0], rst[1]] = rst[2][1]
 
-zdy_miou = np.zeros((num_class,))
+zdy_miou = np.zeros((num_class,)) # used to store the mIoU of each classes
 
-pku_gt_by_class = [[] for i in range(num_class)]
-pku_prediction_by_class = [[] for i in range(num_class)]
-pku_gt = []
-pku_prediction = []
+gt_by_class = [[] for i in range(num_class)]
+prediction_by_class = [[] for i in range(num_class)]
+gt = []
+prediction = []
 for cls in range(num_class):
 	for zdy_record in gt_by_cls[cls].itertuples():
-		pku_gt_by_class[cls].append([cls,zdy_record[2],zdy_record[3],1,zdy_record[1]])
-	pku_gt += pku_gt_by_class[cls]
+		gt_by_class[cls].append([cls,zdy_record[2],zdy_record[3],1,zdy_record[1]])
+	gt += gt_by_class[cls]
 	for zdy_record in plain_detections[cls].itertuples():
-		pku_prediction_by_class[cls].append([zdy_record[2],zdy_record[3],zdy_record[4],zdy_record[5],zdy_record[1]])
-	pku_prediction += pku_prediction_by_class[cls]
+		prediction_by_class[cls].append([zdy_record[2],zdy_record[3],zdy_record[4],zdy_record[5],zdy_record[1]])
+	prediction += prediction_by_class[cls]
 	if cls!=0:
-		zdy_miou[cls] = evaluate.miou(pku_prediction_by_class[cls],pku_gt_by_class[cls])
+		zdy_miou[cls] = evaluate.miou(prediction_by_class[cls],gt_by_class[cls])
 miou = zdy_miou[1:].mean()
 
-print(str(len(pku_gt)))
-print(str(len(pku_prediction)))
+print(str(len(gt)))
+print(str(len(prediction)))
 
 f1_values = np.zeros((len(iou_range),))
 
 pool = Pool(args.ap_workers)
 jobs = []
 for iou_idx, min_overlap in enumerate(iou_range):
-#for iou_idx, min_overlap in enumerate([0.6]):
 	for cls in range(num_class):
-	#for cls in [304]:
-		#jobs.append(pool.apply_async(eval_ap, args=([min_overlap], iou_idx, cls, gt_by_cls[cls], plain_detections[cls],),callback=callback))
-		jobs.append(pool.apply_async(eval_ap, args=([min_overlap], iou_idx, cls, pku_gt_by_class[cls], pku_prediction_by_class[cls],),callback=callback))
-	f1 = evaluate.f1(pku_prediction,min_overlap,pku_gt)
+		jobs.append(pool.apply_async(eval_ap, args=([min_overlap], iou_idx, cls, gt_by_class[cls], prediction_by_class[cls],),callback=callback))
+	f1 = evaluate.f1(prediction,min_overlap,gt)
 	f1_values[iou_idx] = f1
 pool.close()
 pool.join()
 print("Evaluation done.\n\n")
 
-"""for zdy_i,zdy_iou in enumerate(iou_range):
-	with open("accuracy_per_cls/cls_pku{:f}.txt".format(zdy_iou),"w") as zdy_f:
-		for zdy_cls in range(num_class):
-			zdy_f.write("{:d}\t{:.04f}\n".format(zdy_cls,ap_values[zdy_cls][zdy_i]))"""
-
-#map_iou = ap_values[1:,:].mean(axis=0)
-#mar = ar_values[1:,:].mean(axis=0)
 map_iou = ap_values.mean(axis=0)
 mar = ar_values.mean(axis=0)
 display_title = "Detection Performance on {}".format(args.dataset)

diff --git a/tc-ssn/combined_refine.py b/tc-ssn/combined_refine.py
@@ -1,6 +1,11 @@
 #!/usr/bin/python3
 
-#import json
+"""
+Refine the scores combined from actionness and completeness scores outputed by SSN.
+
+Last revision: Danyang Zhang @THU_IVG @Mar 6th, 2019 CST
+"""
+
 import numpy as np
 import os
 import os.path
@@ -14,7 +19,7 @@
 parser.add_argument("--target","-o",action="store",type=str,default="test_gt_score_combined_refined_fusion")
 args = parser.parse_args()
 
-constraints = np.load(args.constraints)
+constraints = np.load(args.constraints) # constraints matrix
 target_class_count,action_class_count = constraints.shape
 
 numpy_dir = args.src_score
@@ -32,17 +37,18 @@
 	vid = np_file[np_file.find("_")+1:np_file.rfind(".")]
 	premat = np.load(os.path.join(numpy_dir,np_file))
 	combined = premat[:,:,4]
-	#print(str(combined.shape))
 	video_combined = np.sum(combined,axis=0)
 	target_class_combined = np.zeros((target_class_count,))
 	for target_cls in range(target_class_count):
 		for act_cls in range(action_class_count):
 			if constraints[target_cls][act_cls]==1:
-				target_class_combined[target_cls] = video_combined[act_cls]
-	probable_target_class = np.argmax(target_class_combined)
+				target_class_combined[target_cls] += video_combined[act_cls]
+        # aggregate the scores of the action classes under the identical task/target class
+	probable_target_class = np.argmax(target_class_combined) # infer the probable task class
 	mask = np.full(combined.shape,math.exp(-2))
 	mask[:,0] = 1
 	mask[:,np.where(constraints[probable_target_class])[0]] = 1
 	combined *= mask
+        # refine the combined scores
 	premat[:,:,4] = combined
 	np.save(os.path.join(target_dir,np_file),premat)
diff --git a/tc-ssn/data/coin_small_tag_train_proposal_list.txt b/tc-ssn/data/coin_small_tag_train_proposal_list.txt
diff --git a/tc-ssn/data/coin_small_tag_val_proposal_list.txt b/tc-ssn/data/coin_small_tag_val_proposal_list.txt
diff --git a/tc-ssn/data/dataset_cfg.yaml b/tc-ssn/data/dataset_cfg.yaml
diff --git a/tc-ssn/data/reference_models.yaml b/tc-ssn/data/reference_models.yaml
diff --git a/tc-ssn/data_processing.py b/tc-ssn/data_processing.py
@@ -1,5 +1,11 @@
 #!/usr/bin/python3
 
+"""
+Transfer the pkl scores to npy.
+
+Last revision: Danyang Zhang @THU_IVG @Mar 6th, 2019 CST
+"""
+
 import numpy as np
 import json
 import os
@@ -23,49 +29,36 @@
 
 for v in scores:
 	vid = v.split("/")[-1]
-	#video_duration = annotations[vid]["end"]-annotations[vid]["start"]
 	video_duration = annotations[vid]["duration"]
 
 	proposals = scores[v][0]
 	actionness = scores[v][1]
 	completeness = scores[v][2]
 	regression = scores[v][3]
 
-	score_max = np.max(actionness[:,1:],axis=-1)
-	exp_score = np.exp(actionness[:,1:]-score_max[...,None])
+	score_max = np.max(actionness,axis=-1)
+	exp_score = np.exp(actionness-score_max[...,None])
 	exp_com = np.exp(completeness)
-	combined_scores = (exp_score/np.sum(exp_score,axis=-1)[...,None])*exp_com
+	combined_scores = (exp_score/np.sum(exp_score,axis=-1)[...,None])[:,1:]*exp_com
+        # combined scores are calculated as softmax(actionness)*exp(completeness) according to the code offered by SSN
 
 	proposal_count = len(proposals)
 	class_count = completeness.shape[1]
 	proposal_npy = np.zeros((proposal_count,class_count,7))
+        # the columns in proposal_npy: 
+        # start of the proposal range, end of the proposal range, exp(actionness), exp(completeness), combined score, actionness, completeness
+
 	for i in range(proposal_count):
 		start = proposals[i][0]*video_duration
 		end = proposals[i][1]*video_duration
 
 		for c in range(class_count):
-			center_proportion = (proposals[i][0]+proposals[i][1])/2.
-			duration_proportion = proposals[i][1]-proposals[i][0]
-			center_proportion += regression[i][c][0]*duration_proportion
-			duration_proportion *= math.exp(regression[i][c][1])
-			start_proportion = center_proportion-duration_proportion/2.
-			end_proportion = center_proportion+duration_proportion/2.
-			start_proportion = max(start_proportion,0.)
-			start_proportion = min(start_proportion,1.)
-			end_proportion = max(end_proportion,0.)
-			end_proportion = min(end_proportion,1.)
-			#pre_cls["regressed_interval"] = (start_proportion*video_duration,end_proportion*video_duration)
-
-			proposal_npy[i][c][0] = start_proportion*video_duration
-			proposal_npy[i][c][1] = end_proportion*video_duration
-			proposal_npy[i][c][2] = exp_score[i][c]
+			proposal_npy[i][c][0] = proposals[i][0]
+			proposal_npy[i][c][1] = proposals[i][1]
+			proposal_npy[i][c][2] = exp_score[i][c+1]
 			proposal_npy[i][c][3] = exp_com[i][c]
 			proposal_npy[i][c][4] = combined_scores[i][c]
 			proposal_npy[i][c][5] = actionness[i][c+1]
 			proposal_npy[i][c][6] = completeness[i][c]
-
 	npy_name = os.path.join(output_prefix,"proposal_" + vid)
 	np.save(npy_name,proposal_npy)
-	np.save(npy_name + "_groundtruth",groundtruth_npy)
-	#prediction_dict[vid]["prediction_numpy"] = npy_name + ".npy"
-	#prediction_dict[vid]["groundtruth_numpy"] = npy_name + "_groundtruth" + ".npy"
diff --git a/tc-ssn/eval_detection_results.py b/tc-ssn/eval_detection_results.py
@@ -40,8 +40,6 @@
 num_class = dataset_configs['num_class']
 test_prop_file = 'data/{}_proposal_list.txt'.format(dataset_configs['test_list'])
 evaluate.number_label = num_class
-# print('hhh')
-# print(test_prop_file)
 
 nms_threshold = args.nms_threshold if args.nms_threshold else dataset_configs['evaluation']['nms_threshold']
 top_k = args.top_k if args.top_k else dataset_configs['evaluation']['top_k']
@@ -239,47 +237,37 @@ def callback(rst):
 
 zdy_miou = np.zeros((num_class,))
 
-pku_gt_by_class = [[] for i in range(num_class)]
-pku_prediction_by_class = [[] for i in range(num_class)]
-pku_gt = []
-pku_prediction = []
+gt_by_class = [[] for i in range(num_class)]
+prediction_by_class = [[] for i in range(num_class)]
+gt = []
+prediction = []
 for cls in range(num_class):
 	for zdy_record in gt_by_cls[cls].itertuples():
-		pku_gt_by_class[cls].append([cls,zdy_record[2],zdy_record[3],1,zdy_record[1]])
-	pku_gt += pku_gt_by_class[cls]
+		gt_by_class[cls].append([cls,zdy_record[2],zdy_record[3],1,zdy_record[1]])
+	gt += gt_by_class[cls]
 	for zdy_record in plain_detections[cls].itertuples():
-		pku_prediction_by_class[cls].append([zdy_record[2],zdy_record[3],zdy_record[4],zdy_record[5],zdy_record[1]])
-	pku_prediction += pku_prediction_by_class[cls]
+		prediction_by_class[cls].append([zdy_record[2],zdy_record[3],zdy_record[4],zdy_record[5],zdy_record[1]])
+	prediction += prediction_by_class[cls]
 	if cls!=0:
-		zdy_miou[cls] = evaluate.miou(pku_prediction_by_class[cls],pku_gt_by_class[cls])
+		zdy_miou[cls] = evaluate.miou(prediction_by_class[cls],gt_by_class[cls])
 miou = zdy_miou[1:].mean()
 
-print(str(len(pku_gt)))
-print(str(len(pku_prediction)))
+print(str(len(gt)))
+print(str(len(prediction)))
 
 f1_values = np.zeros((len(iou_range),))
 
 pool = Pool(args.ap_workers)
 jobs = []
 for iou_idx, min_overlap in enumerate(iou_range):
-#for iou_idx, min_overlap in enumerate([0.6]):
 	for cls in range(num_class):
-	#for cls in [304]:
-		#jobs.append(pool.apply_async(eval_ap, args=([min_overlap], iou_idx, cls, gt_by_cls[cls], plain_detections[cls],),callback=callback))
-		jobs.append(pool.apply_async(eval_ap, args=([min_overlap], iou_idx, cls, pku_gt_by_class[cls], pku_prediction_by_class[cls],),callback=callback))
-	f1 = evaluate.f1(pku_prediction,min_overlap,pku_gt)
+		jobs.append(pool.apply_async(eval_ap, args=([min_overlap], iou_idx, cls, gt_by_class[cls], prediction_by_class[cls],),callback=callback))
+	f1 = evaluate.f1(prediction,min_overlap,gt)
 	f1_values[iou_idx] = f1
 pool.close()
 pool.join()
 print("Evaluation done.\n\n")
 
-"""for zdy_i,zdy_iou in enumerate(iou_range):
-	with open("accuracy_per_cls/cls_pku{:f}.txt".format(zdy_iou),"w") as zdy_f:
-		for zdy_cls in range(num_class):
-			zdy_f.write("{:d}\t{:.04f}\n".format(zdy_cls,ap_values[zdy_cls][zdy_i]))"""
-
-#map_iou = ap_values[1:,:].mean(axis=0)
-#mar = ar_values[1:,:].mean(axis=0)
 map_iou = ap_values.mean(axis=0)
 mar = ar_values.mean(axis=0)
 display_title = "Detection Performance on {}".format(args.dataset)