Skip to content

Commit

Permalink
Mar 6th
Browse files Browse the repository at this point in the history
Revision for README and the codes in tc-ssn.
  • Loading branch information
zdy023 committed Mar 6, 2019
1 parent 3eda4a6 commit 02026c7
Show file tree
Hide file tree
Showing 38 changed files with 110 additions and 225 deletions.
7 changes: 6 additions & 1 deletion README.md
100755 → 100644
Original file line number Diff line number Diff line change
Expand Up @@ -17,8 +17,13 @@ Note that, these methods use frame-wise fisher vector as video representation, w

### References
[1] Y. Zhao, Y. Xiong, L. Wang, Z. Wu, X. Tang, and D. Lin. Temporal action detection with structured segment networks. In ICCV, pages 2933–2942, 2017.

[2] H. Xu, A. Das, and K. Saenko. R-C3D: region convolutional 3d network for temporal activity detection. In ICCV, pages 5794–5803, 2017.

[3] A. Richard, H. Kuehne, and J. Gall. Action sets: Weakly supervised action segmentation without ordering constraints. In CVPR, pages 5987–5996, 2018.

[4] A. Richard, H. Kuehne, A. Iqbal, and J. Gall. Neuralnetwork-viterbi: A framework for weakly supervised video learning. In CVPR, pages 7386–7395, 2018.

[5] L. Ding and C. Xu. Weakly-supervised action segmentation with iterative soft boundary assignment. In CVPR, pages 6508–6516, 2018.
[6] J. Donahue, L. A. Hendricks, M. Rohrbach, S. Venugopalan, S. Guadarrama, K. Saenko, and T. Darrell. Long-term recurrent convolutional networks for visual recognition and description. TPAMI, 39(4):677–691, 2017.

[6] J. Donahue, L. A. Hendricks, M. Rohrbach, S. Venugopalan, S. Guadarrama, K. Saenko, and T. Darrell. Long-term recurrent convolutional networks for visual recognition and description. TPAMI, 39(4):677–691, 2017.
16 changes: 15 additions & 1 deletion tc-ssn/README.md
100755 → 100644
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,20 @@
- terminaltables 3.1.0
- pandas 0.23.4

### The structure of SSN score file

The score file dumped by SSN is in format of `pkl`. It is serialised from a python `dict` in which the paths of video frames serve as keys and a 4-element tuple of numpy arrays serve as values. The meaning of four arrays is described as following:

* The shape of the 1st array in the tuple is (N,2) where N denotes the proposal number. The elements in this array indicates the lower and higher bounds of the proposal ranges.
* The shape of the 2nd array in the tuple is (N,K+1) where K denotes the number of action classes. There are the actionness scores in this array.
* The shape of the 3rd array in the tuple is (N,K). There are the completeness scores presented by SSN in this array.
* The shape of the 4th array in the tuple is (N,K,2). There are the regression scores in this array. The regression score is given as a 2-element array \[`center_regression`, `duration_regression`\]. The regression operation could be formularised as:

```
regressed_center = range_renter+range_duration*center_regression
regressed_duration = range_duration*exp(duration_regression)
```

### Get combined score file

The standalone score file of combined scores is required while refining the combined scores of RGB and Flow modality. The program derived from the original evaluation program is used to export the combined scores to a standalone `pkl` file. These programs are `fusion_pkl_generation_eval_detection_results.py` and `fusion_eval_detection_results.py`. Either the program exports the same `pkl` file.
Expand Down Expand Up @@ -49,4 +63,4 @@ python3 combined_refine.py -c <npy_constrains> -i <src_scores> -o <refined_score

```sh
python3 combined_eval_detection_results.py coin_small <combined_score> --externel_score <external_score>
```
```
Empty file modified tc-ssn/anet_toolkit/.gitignore
100755 → 100644
Empty file.
Empty file modified tc-ssn/anet_toolkit/Evaluation/eval_detection.py
100755 → 100644
Empty file.
Empty file modified tc-ssn/anet_toolkit/Evaluation/utils.py
100755 → 100644
Empty file.
42 changes: 15 additions & 27 deletions tc-ssn/combined_eval_detection_results.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,8 +43,6 @@
num_class = dataset_configs['num_class']
test_prop_file = 'data/{}_proposal_list.txt'.format(dataset_configs['test_list'])
evaluate.number_label = num_class
# print('hhh')
# print(test_prop_file)

nms_threshold = args.nms_threshold if args.nms_threshold else dataset_configs['evaluation']['nms_threshold']
top_k = args.top_k if args.top_k else dataset_configs['evaluation']['top_k']
Expand Down Expand Up @@ -125,7 +123,7 @@ def gen_detection_results(video_id, score_tp):

# load combined scores from external numpys
ex_vid = video_id.split("/")[-1]
ex_scores = np.load(os.path.join(args.externel_score,ex_vid + ".npy"))
ex_scores = np.load(os.path.join(args.externel_score,"proposal_" + ex_vid + ".npy"))
combined_scores = ex_scores[:,:,4]

keep_idx = np.argsort(combined_scores.ravel())[-top_k:]
Expand Down Expand Up @@ -245,49 +243,39 @@ def callback(rst):
ap_values[rst[0], rst[1]] = rst[2][0]
ar_values[rst[0], rst[1]] = rst[2][1]

zdy_miou = np.zeros((num_class,))
zdy_miou = np.zeros((num_class,)) # used to store the mIoU of each classes

pku_gt_by_class = [[] for i in range(num_class)]
pku_prediction_by_class = [[] for i in range(num_class)]
pku_gt = []
pku_prediction = []
gt_by_class = [[] for i in range(num_class)]
prediction_by_class = [[] for i in range(num_class)]
gt = []
prediction = []
for cls in range(num_class):
for zdy_record in gt_by_cls[cls].itertuples():
pku_gt_by_class[cls].append([cls,zdy_record[2],zdy_record[3],1,zdy_record[1]])
pku_gt += pku_gt_by_class[cls]
gt_by_class[cls].append([cls,zdy_record[2],zdy_record[3],1,zdy_record[1]])
gt += gt_by_class[cls]
for zdy_record in plain_detections[cls].itertuples():
pku_prediction_by_class[cls].append([zdy_record[2],zdy_record[3],zdy_record[4],zdy_record[5],zdy_record[1]])
pku_prediction += pku_prediction_by_class[cls]
prediction_by_class[cls].append([zdy_record[2],zdy_record[3],zdy_record[4],zdy_record[5],zdy_record[1]])
prediction += prediction_by_class[cls]
if cls!=0:
zdy_miou[cls] = evaluate.miou(pku_prediction_by_class[cls],pku_gt_by_class[cls])
zdy_miou[cls] = evaluate.miou(prediction_by_class[cls],gt_by_class[cls])
miou = zdy_miou[1:].mean()

print(str(len(pku_gt)))
print(str(len(pku_prediction)))
print(str(len(gt)))
print(str(len(prediction)))

f1_values = np.zeros((len(iou_range),))

pool = Pool(args.ap_workers)
jobs = []
for iou_idx, min_overlap in enumerate(iou_range):
#for iou_idx, min_overlap in enumerate([0.6]):
for cls in range(num_class):
#for cls in [304]:
#jobs.append(pool.apply_async(eval_ap, args=([min_overlap], iou_idx, cls, gt_by_cls[cls], plain_detections[cls],),callback=callback))
jobs.append(pool.apply_async(eval_ap, args=([min_overlap], iou_idx, cls, pku_gt_by_class[cls], pku_prediction_by_class[cls],),callback=callback))
f1 = evaluate.f1(pku_prediction,min_overlap,pku_gt)
jobs.append(pool.apply_async(eval_ap, args=([min_overlap], iou_idx, cls, gt_by_class[cls], prediction_by_class[cls],),callback=callback))
f1 = evaluate.f1(prediction,min_overlap,gt)
f1_values[iou_idx] = f1
pool.close()
pool.join()
print("Evaluation done.\n\n")

"""for zdy_i,zdy_iou in enumerate(iou_range):
with open("accuracy_per_cls/cls_pku{:f}.txt".format(zdy_iou),"w") as zdy_f:
for zdy_cls in range(num_class):
zdy_f.write("{:d}\t{:.04f}\n".format(zdy_cls,ap_values[zdy_cls][zdy_i]))"""

#map_iou = ap_values[1:,:].mean(axis=0)
#mar = ar_values[1:,:].mean(axis=0)
map_iou = ap_values.mean(axis=0)
mar = ar_values.mean(axis=0)
display_title = "Detection Performance on {}".format(args.dataset)
Expand Down
16 changes: 11 additions & 5 deletions tc-ssn/combined_refine.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,11 @@
#!/usr/bin/python3

#import json
"""
Refine the scores combined from actionness and completeness scores outputed by SSN.
Last revision: Danyang Zhang @THU_IVG @Mar 6th, 2019 CST
"""

import numpy as np
import os
import os.path
Expand All @@ -14,7 +19,7 @@
parser.add_argument("--target","-o",action="store",type=str,default="test_gt_score_combined_refined_fusion")
args = parser.parse_args()

constraints = np.load(args.constraints)
constraints = np.load(args.constraints) # constraints matrix
target_class_count,action_class_count = constraints.shape

numpy_dir = args.src_score
Expand All @@ -32,17 +37,18 @@
vid = np_file[np_file.find("_")+1:np_file.rfind(".")]
premat = np.load(os.path.join(numpy_dir,np_file))
combined = premat[:,:,4]
#print(str(combined.shape))
video_combined = np.sum(combined,axis=0)
target_class_combined = np.zeros((target_class_count,))
for target_cls in range(target_class_count):
for act_cls in range(action_class_count):
if constraints[target_cls][act_cls]==1:
target_class_combined[target_cls] = video_combined[act_cls]
probable_target_class = np.argmax(target_class_combined)
target_class_combined[target_cls] += video_combined[act_cls]
# aggregate the scores of the action classes under the identical task/target class
probable_target_class = np.argmax(target_class_combined) # infer the probable task class
mask = np.full(combined.shape,math.exp(-2))
mask[:,0] = 1
mask[:,np.where(constraints[probable_target_class])[0]] = 1
combined *= mask
# refine the combined scores
premat[:,:,4] = combined
np.save(os.path.join(target_dir,np_file),premat)
Empty file modified tc-ssn/data/coin_small_tag_train_proposal_list.txt
100755 → 100644
Empty file.
Empty file modified tc-ssn/data/coin_small_tag_val_proposal_list.txt
100755 → 100644
Empty file.
Empty file modified tc-ssn/data/dataset_cfg.yaml
100755 → 100644
Empty file.
Empty file modified tc-ssn/data/reference_models.yaml
100755 → 100644
Empty file.
39 changes: 16 additions & 23 deletions tc-ssn/data_processing.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,11 @@
#!/usr/bin/python3

"""
Transfer the pkl scores to npy.
Last revision: Danyang Zhang @THU_IVG @Mar 6th, 2019 CST
"""

import numpy as np
import json
import os
Expand All @@ -23,49 +29,36 @@

for v in scores:
vid = v.split("/")[-1]
#video_duration = annotations[vid]["end"]-annotations[vid]["start"]
video_duration = annotations[vid]["duration"]

proposals = scores[v][0]
actionness = scores[v][1]
completeness = scores[v][2]
regression = scores[v][3]

score_max = np.max(actionness[:,1:],axis=-1)
exp_score = np.exp(actionness[:,1:]-score_max[...,None])
score_max = np.max(actionness,axis=-1)
exp_score = np.exp(actionness-score_max[...,None])
exp_com = np.exp(completeness)
combined_scores = (exp_score/np.sum(exp_score,axis=-1)[...,None])*exp_com
combined_scores = (exp_score/np.sum(exp_score,axis=-1)[...,None])[:,1:]*exp_com
# combined scores are calculated as softmax(actionness)*exp(completeness) according to the code offered by SSN

proposal_count = len(proposals)
class_count = completeness.shape[1]
proposal_npy = np.zeros((proposal_count,class_count,7))
# the columns in proposal_npy:
# start of the proposal range, end of the proposal range, exp(actionness), exp(completeness), combined score, actionness, completeness

for i in range(proposal_count):
start = proposals[i][0]*video_duration
end = proposals[i][1]*video_duration

for c in range(class_count):
center_proportion = (proposals[i][0]+proposals[i][1])/2.
duration_proportion = proposals[i][1]-proposals[i][0]
center_proportion += regression[i][c][0]*duration_proportion
duration_proportion *= math.exp(regression[i][c][1])
start_proportion = center_proportion-duration_proportion/2.
end_proportion = center_proportion+duration_proportion/2.
start_proportion = max(start_proportion,0.)
start_proportion = min(start_proportion,1.)
end_proportion = max(end_proportion,0.)
end_proportion = min(end_proportion,1.)
#pre_cls["regressed_interval"] = (start_proportion*video_duration,end_proportion*video_duration)

proposal_npy[i][c][0] = start_proportion*video_duration
proposal_npy[i][c][1] = end_proportion*video_duration
proposal_npy[i][c][2] = exp_score[i][c]
proposal_npy[i][c][0] = proposals[i][0]
proposal_npy[i][c][1] = proposals[i][1]
proposal_npy[i][c][2] = exp_score[i][c+1]
proposal_npy[i][c][3] = exp_com[i][c]
proposal_npy[i][c][4] = combined_scores[i][c]
proposal_npy[i][c][5] = actionness[i][c+1]
proposal_npy[i][c][6] = completeness[i][c]

npy_name = os.path.join(output_prefix,"proposal_" + vid)
np.save(npy_name,proposal_npy)
np.save(npy_name + "_groundtruth",groundtruth_npy)
#prediction_dict[vid]["prediction_numpy"] = npy_name + ".npy"
#prediction_dict[vid]["groundtruth_numpy"] = npy_name + "_groundtruth" + ".npy"
38 changes: 13 additions & 25 deletions tc-ssn/eval_detection_results.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,8 +40,6 @@
num_class = dataset_configs['num_class']
test_prop_file = 'data/{}_proposal_list.txt'.format(dataset_configs['test_list'])
evaluate.number_label = num_class
# print('hhh')
# print(test_prop_file)

nms_threshold = args.nms_threshold if args.nms_threshold else dataset_configs['evaluation']['nms_threshold']
top_k = args.top_k if args.top_k else dataset_configs['evaluation']['top_k']
Expand Down Expand Up @@ -239,47 +237,37 @@ def callback(rst):

zdy_miou = np.zeros((num_class,))

pku_gt_by_class = [[] for i in range(num_class)]
pku_prediction_by_class = [[] for i in range(num_class)]
pku_gt = []
pku_prediction = []
gt_by_class = [[] for i in range(num_class)]
prediction_by_class = [[] for i in range(num_class)]
gt = []
prediction = []
for cls in range(num_class):
for zdy_record in gt_by_cls[cls].itertuples():
pku_gt_by_class[cls].append([cls,zdy_record[2],zdy_record[3],1,zdy_record[1]])
pku_gt += pku_gt_by_class[cls]
gt_by_class[cls].append([cls,zdy_record[2],zdy_record[3],1,zdy_record[1]])
gt += gt_by_class[cls]
for zdy_record in plain_detections[cls].itertuples():
pku_prediction_by_class[cls].append([zdy_record[2],zdy_record[3],zdy_record[4],zdy_record[5],zdy_record[1]])
pku_prediction += pku_prediction_by_class[cls]
prediction_by_class[cls].append([zdy_record[2],zdy_record[3],zdy_record[4],zdy_record[5],zdy_record[1]])
prediction += prediction_by_class[cls]
if cls!=0:
zdy_miou[cls] = evaluate.miou(pku_prediction_by_class[cls],pku_gt_by_class[cls])
zdy_miou[cls] = evaluate.miou(prediction_by_class[cls],gt_by_class[cls])
miou = zdy_miou[1:].mean()

print(str(len(pku_gt)))
print(str(len(pku_prediction)))
print(str(len(gt)))
print(str(len(prediction)))

f1_values = np.zeros((len(iou_range),))

pool = Pool(args.ap_workers)
jobs = []
for iou_idx, min_overlap in enumerate(iou_range):
#for iou_idx, min_overlap in enumerate([0.6]):
for cls in range(num_class):
#for cls in [304]:
#jobs.append(pool.apply_async(eval_ap, args=([min_overlap], iou_idx, cls, gt_by_cls[cls], plain_detections[cls],),callback=callback))
jobs.append(pool.apply_async(eval_ap, args=([min_overlap], iou_idx, cls, pku_gt_by_class[cls], pku_prediction_by_class[cls],),callback=callback))
f1 = evaluate.f1(pku_prediction,min_overlap,pku_gt)
jobs.append(pool.apply_async(eval_ap, args=([min_overlap], iou_idx, cls, gt_by_class[cls], prediction_by_class[cls],),callback=callback))
f1 = evaluate.f1(prediction,min_overlap,gt)
f1_values[iou_idx] = f1
pool.close()
pool.join()
print("Evaluation done.\n\n")

"""for zdy_i,zdy_iou in enumerate(iou_range):
with open("accuracy_per_cls/cls_pku{:f}.txt".format(zdy_iou),"w") as zdy_f:
for zdy_cls in range(num_class):
zdy_f.write("{:d}\t{:.04f}\n".format(zdy_cls,ap_values[zdy_cls][zdy_i]))"""

#map_iou = ap_values[1:,:].mean(axis=0)
#mar = ar_values[1:,:].mean(axis=0)
map_iou = ap_values.mean(axis=0)
mar = ar_values.mean(axis=0)
display_title = "Detection Performance on {}".format(args.dataset)
Expand Down
Loading

0 comments on commit 02026c7

Please sign in to comment.