-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy patheval.py
144 lines (121 loc) · 5.49 KB
/
eval.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
from tqdm import tqdm
import re
import os
def generate_result(_tokenizer, _model, _prompts: [str], _device='cuda'):
_inputs = _tokenizer(_prompts, return_tensors='pt', padding=True).to(_device)
_model.to(_device)
_outputs = _model.generate(input_ids=_inputs['input_ids'], attention_mask=_inputs['attention_mask'],
do_sample=False, max_new_tokens=128)
return _tokenizer.batch_decode(_outputs, skip_special_tokens=True)
def out_filter(text):
pattern = r'\) '
text = re.sub(pattern, '\n', text)
if text.endswith(')'):
text = text[:-1]
result = []
for item in text.split('\n'):
try:
if ', ' in item:
item = item[1:]
result.append({
item.split(', ')[1]: item.split(', ')[0]
})
except Exception as e:
continue
return result
def evaluation(predictions, labels, entity_type, print_all=False, raw_text=None):
entity_type = set([i[2:] for i in entity_type])
predictions = [out_filter(p[0]) for p in predictions]
# token num
predict_num = {i: 0 for i in entity_type}
correct_num = {i: 0 for i in entity_type}
for i, seq in enumerate(predictions):
for e in seq:
for key in e:
if key in raw_text[i]:
try:
# NER forced callback
predict_num[e[key]] += 1
except KeyError:
continue
temp = []
for i in labels:
tmp = []
for e in i.values():
if {e['word']: e['entity']} not in tmp:
tmp.append({e['word']: e['entity']})
temp.append(tmp)
labels = temp
true_entity = {i: 0 for i in entity_type}
for entities in labels:
for e in entities:
for i in e.values():
true_entity[i] += 1
predict_num = {i: 0 for i in entity_type}
for index in range(len(predictions)):
temp = labels[index][:]
for entity in predictions[index]:
if entity in temp:
correct_num[list(entity.values())[0]] += len(list(entity.keys())[0].split(' '))
predict_num[list(entity.values())[0]] += len(list(entity.keys())[0].split(' '))
true_entity[list(entity.values())[0]] += len(list(entity.keys())[0].split(' ')) - 1
else:
if list(entity.keys())[0] in raw_text[index]:
# NER forced callback
try:
predict_num[list(entity.values())[0]] += len(list(entity.keys())[0].split(' '))
except KeyError:
continue
precision = {k: (correct_num[k] / predict_num[k] if predict_num[k] != 0 else 0) for k in correct_num.keys()}
recall = {k: (correct_num[k] / true_entity[k] if true_entity[k] != 0 else 0) for k in correct_num.keys()}
tp, fp, fn = sum(correct_num.values()), sum(predict_num.values()) - sum(correct_num.values()), sum(
true_entity.values()) - sum(correct_num.values())
p = 0 if tp + fp == 0 else 1. * tp / (tp + fp)
r = 0 if tp + fn == 0 else 1. * tp / (tp + fn)
f = 0 if p + r == 0 else 2 * p * r / (p + r)
macro_f1 = sum([(2 * precision[k] * recall[k] / (recall[k] + precision[k]))
* true_entity[k] / sum(true_entity.values()) if recall[k] + precision[k] != 0 else 0 for k in
correct_num.keys()])
if print_all:
print("-----precision-----")
for i in precision:
print('{}\t{:.4f}'.format(i, precision[i]))
print("------recall-------")
for i in recall:
print('{}\t{:.4f}'.format(i, recall[i]))
return f, macro_f1
def test(_model, _tokenizer, _data, _types, save_output='', print_all=False):
_test_seq = list(list(_data.values())[:])
_sentences = ['{}\n{}'.format(i['instruction'], i['text']) for i in _test_seq]
_outs = []
for seq in tqdm(_sentences):
_outs.append(generate_result(_tokenizer, _model, seq, _device='cuda'))
if save_output:
if os.path.isfile(save_output):
with open(save_output, 'a', encoding='utf-8') as f:
f.write(json.dumps(_outs, ensure_ascii=False) + '\n')
else:
with open(save_output, 'w', encoding='utf-8') as f:
f.write(json.dumps(_outs, ensure_ascii=False) + '\n')
_a, _b = evaluation(_outs, [i['entity'] for i in _test_seq], _types, print_all=print_all,
raw_text=[i['text'] for i in _test_seq])
return _a, _b
if __name__ == '__main__':
from transformers import T5Tokenizer, T5ForConditionalGeneration
from utils.data_processor import *
for name in ['ai',
'literature',
'music',
'politics',
'science']:
# CrossNER Dateset
types = entity_types('./ner_data/{}/test.txt'.format(name))
test_data = ner_data_process('./ner_data/{}/test.txt'.format(name))
# load your model here
model = T5ForConditionalGeneration.from_pretrained('Model_path')
tokenizer = T5Tokenizer.from_pretrained('Model_path')
test_seq = list(list(test_data.values())[:])
sentences = [i['text'] for i in test_seq]
test_result = test(model, tokenizer, test_data, types, save_output='./output/{}.json'.format(name))
e = evaluation(test_result, [i['entity'] for i in test_seq], types, raw_text=sentences)
print('{0}\t{1}\t{2}'.format(name, e[0], e[1]))