forked from cocodataset/cocoapi
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathcompute_sigmas.py
125 lines (98 loc) · 4.27 KB
/
compute_sigmas.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
import os, sys
import pickle
import numpy as np
import skimage.io as io
import matplotlib.pyplot as plt
import scipy.spatial.distance as dist
def estimate_top_view_sigmas(pkl_file):
''' call this on 'AMT_data/top/AMT15_csv.pkl'
we estimate keypoint accuracy in terms of the Object Keypoint Similarity (OKS) after Ronchi and Perona 2017.
The OKS between a detection theta-hat(p) and the annotation theta(p) of a person p is the average over the
labeled parts in the ground-truth (v_i) of the Keypoint Similarity between corresponding keypoint pairs.
From equation 1 in that paper, the Keypoint Similarity (ks) is defined as:
ks(theta-hat(p), theta(p)) = exp( - ||theta-hat_(p) - theta(p)||^2_2 / (2*s^2*k^2)
Where s is the area of the instance, measured in pixels, and k is a keypoint-specific standard deviation
that estimates the degree of between-annotator variability in labeling that point.
In the CoCo evaluation API, the value of k for annotations of human poses ranges from 0.025 to 0.107,
with more reliably placed body parts (such as the eyes) having the lowest values of k.
should return:
array([0.03889304, 0.04519708, 0.04505679, 0.04170943, 0.06655941,
0.0666152 , 0.04375891, 0.06663153, 0.08359647])
'''
with open(pkl_file,'rb') as fp:
D = pickle.load(fp)
area_vec = []
X = []
Y= []
for j in range(9):
x = []
y = []
for i in range(len(D)):
x.append((D[i]['ann_B']['X'][:,j]*1024).tolist()) # scale by top-view camera dimensions
x.append((D[i]['ann_W']['X'][:,j]*1024).tolist())
y.append((D[i]['ann_B']['Y'][:,j]*570).tolist())
y.append((D[i]['ann_W']['Y'][:,j]*570).tolist())
X.append(x)
Y.append(y)
for i in range(len(D)):
area_vec.append(D[i]['ann_B']['area'])
area_vec.append(D[i]['ann_W']['area'])
X = np.asarray(X)
Y = np.asarray(Y)
area_vec = np.asarray(area_vec)
D = np.zeros((len(X),len(X[0])))
sigma = np.zeros(len(X))
for j in range(len(X)):
for i in range(len(X[0])):
xy = np.asarray([X[j][i], Y[j][i]]).reshape((2,len(X[j][i]))).T
gt = np.median(xy,0)
D[j][i] = np.mean(np.std(xy,0))
D[j][i] /= np.sqrt(area_vec[i])*np.sqrt(2) # normalization to match CoCo definition of sigma
sigma[j] = np.mean(D[j])
return sigma
def estimate_front_view_sigmas(pkl_file):
''' call this on 'AMT_data/top/AMT15_csv.pkl'
we estimate keypoint accuracy in terms of the Object Keypoint Similarity (OKS) after Ronchi and Perona 2017.
The OKS between a detection theta-hat(p) and the annotation theta(p) of a person p is the average over the
labeled parts in the ground-truth (v_i) of the Keypoint Similarity between corresponding keypoint pairs.
From equation 1 in that paper, the Keypoint Similarity (ks) is defined as:
ks(theta-hat(p), theta(p)) = exp( - ||theta-hat_(p) - theta(p)||^2_2 / (2*s^2*k^2)
Where s is the area of the instance, measured in pixels, and k is a keypoint-specific standard deviation
that estimates the degree of between-annotator variability in labeling that point.
In the CoCo evaluation API, the value of k for annotations of human poses ranges from 0.025 to 0.107,
with more reliably placed body parts (such as the eyes) having the lowest values of k.
should return:
array([0.0873309 , 0.08602119, 0.0868102 , 0.09301264, 0.12411955,
0.12454425, 0.0861696 , 0.1080549 , 0.14485149, 0.1274594 ,
0.12496106, 0.12034103, 0.12748996])
'''
with open(pkl_file,'rb') as fp:
D = pickle.load(fp)
area_vec = []
X = []
Y = []
for j in range(13):
x = []
y = []
for i in range(len(D)):
x.append((D[i]['ann_B']['X'][:,j]*1280).tolist()) # scale by front-view camera dimensions
x.append((D[i]['ann_W']['X'][:,j]*1280).tolist())
y.append((D[i]['ann_B']['Y'][:,j]*500).tolist())
y.append((D[i]['ann_W']['Y'][:,j]*500).tolist())
X.append(x)
Y.append(y)
for i in range(len(D)):
area_vec.append(D[i]['ann_B']['area'])
area_vec.append(D[i]['ann_W']['area'])
X = np.asarray(X)
Y = np.asarray(Y)
area_vec = np.asarray(area_vec)
D = np.zeros((len(X),len(X[0])))
sigma = np.zeros(len(X))
for j in range(len(X)):
for i in range(len(X[0])):
xy = np.asarray([X[j][i], Y[j][i]]).reshape((2,len(X[j][i]))).T
D[j][i] = np.mean(np.std(xy,0))
D[j][i] /= np.sqrt(area_vec[i])*np.sqrt(2) # normalization to match CoCo definition of sigma
sigma[j] = np.mean(D[j])
return sigma