forked from vered1986/OKR
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathcreate_predicate_entailment_resource.py
68 lines (49 loc) · 1.9 KB
/
create_predicate_entailment_resource.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
import bsddb
import codecs
from docopt import docopt
def main():
"""
Converts the Berant et al. resource to a bsddb dictionary
"""
args = docopt("""Converts the Berant et al. resource to a bsddb dictionary
Usage:
create_predicate_entailment_resource.py <in_res_file> <out_res_file>
<in_res_file> = the input resource file (reverb_local_clsf_all.txt)
<out_res_file> = the output bsddb file
""")
in_res_file = args['<in_res_file>']
out_res_file = args['<out_res_file>']
entailment_rules = bsddb.btopen(out_res_file, 'c')
for (lhs, rhs, score) in load_resource(in_res_file):
entailment_rules[lhs + '###' + rhs] = str(score)
entailment_rules.sync()
def load_resource(res_file):
"""
Loads the Berant et al. resource and returns a list of rules (lhs, rhs, score)
:param res_file the resource file
:return a list of rules (lhs, rhs, score)
"""
rules = []
with open(res_file) as f_in:
for line in f_in:
lhs, rhs, score = line.strip().split('\t')
lhs, rhs = format_predicate(lhs), format_predicate(rhs)
rules.append((lhs, rhs, score))
return rules
def format_predicate(pred):
"""
Receives a predicate in the original format (from the Berant et al. resource)
and returns a format suitable for the entailment component in the baseline system.
:param pred the predicate in the original format
:return the predicate in a format suitable for the entailment component in the baseline system
"""
is_reversed = False
if pred.endswith('@R'):
pred = rule[:-2]
is_reversed = True
first_arg = 'X' if not is_reversed else 'Y'
second_arg = 'Y' if not is_reversed else 'X'
pred = first_arg + ' ' + pred + ' ' + second_arg
return pred
if __name__ == '__main__':
main()