-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathutils.py
107 lines (92 loc) · 2.87 KB
/
utils.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
class AverageMeter(object):
"""Computes and stores the average and current value"""
def __init__(self):
self.reset()
def reset(self):
self.val = 0
self.avg = 0
self.sum = 0
self.count = 0
def update(self, val, n=1):
self.val = val
self.sum += val * n
self.count += n
self.avg = self.sum / self.count
import pandas as pd
import numpy as np
def select_for_metric(df_answer, df_submission):
'''
Args | df_answer: Pandas DataFrame | df_submission: Pandas DataFrame
Return | true: Numpy Array | pred: Numpy Array
'''
df_1 = df_answer
df_2 = df_submission
id_column = df_1.columns[0]
df_1.index = df_1[id_column]
df_2.index = df_2[id_column]
df_2 = df_2.loc[df_1.index]
return df_1, df_2
def gap(y_true, y_pred):
"""
Compute Global Average Precision score (GAP)
Parameters
----------
y_true : Dict[Any, Any]
Dictionary with query ids and true ids for query samples
y_pred : Dict[Any, Tuple[Any, float]]
Dictionary with query ids and predictions (predicted id, confidence
level)
Returns
-------
float
GAP score
Examples
--------
>>> y_true = {
... 'id_001': 123,
... 'id_002': 123,
... 'id_003': 999,
... 'id_004': 123,
... 'id_005': 999,
... 'id_006': 888,
... 'id_007': 666,
... 'id_008': 666,
... 'id_009': 123,
... 'id_010': 666,
... }
>>> y_pred = {
... 'id_001': (123, 0.15),
... 'id_002': (123, 0.10),
... 'id_003': (999, 0.30),
... 'id_005': (999, 0.40),
... 'id_007': (555, 0.60),
... 'id_008': (666, 0.70),
... 'id_010': (666, 0.99),
... }
>>> gap(y_true, y_pred)
0.5479166666666666
>>> it’s 1 if the i-th prediction is correct, and 0 otherwise
"""
# true_df, pred_df = select_for_metric(true_df, pred_df)
# y_pred = {}
# for i, value in zip(pred_df['id'], pred_df[['landmark_id', 'conf']].values):
# y_pred[i] = tuple(value)
#
# y_true = {}
# for i, value in zip(true_df['id'], true_df[['landmark_id']].values):
# y_true[i] = tuple(value)
indexes = list(y_pred.keys())
indexes.sort(
key=lambda x: -y_pred[x][1],
)
queries_with_target = len([i for i in y_true.values() if i is not None])
correct_predictions = 0
total_score = 0.
for i, k in enumerate(indexes, 1):
relevance_of_prediction_i = 0
if y_true[k] == y_pred[k][0]:
correct_predictions += 1
relevance_of_prediction_i = 1
precision_at_rank_i = correct_predictions / i
total_score += precision_at_rank_i * relevance_of_prediction_i
return 1 / queries_with_target * total_score