-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathquery.py
184 lines (163 loc) · 6.46 KB
/
query.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
import secret # DB URL
import log # set up logging
import tools # conversions of characters
import psycopg # connect to database
import json # return data as json
import argparse # cli argument parsing
import regex # check for linear b text
# get postgres connection parameters from secret file
connection_dict = psycopg.conninfo.conninfo_to_dict(secret.DB_URI)
# get logger from main file
logger = log.logger
def get_database_size():
try:
with psycopg.connect(**connection_dict) as conn:
with conn.cursor() as cur:
# count length of each table
cur.execute("select count(*) from dict_entry")
dict_entry_count = cur.fetchone()[0]
cur.execute("select count(*) from form")
form_count = cur.fetchone()[0]
cur.execute("select count(*) from inflection")
inflection_count = cur.fetchone()[0]
print(
json.dumps(
{
"lexicon_size": dict_entry_count,
"form_count": form_count,
"inflection_count": inflection_count
},
indent=2, ensure_ascii=False
)
)
except Exception as e:
print(
json.dumps(
{
"error": "Could not get database size",
"exception": str(e)
},
indent=2, ensure_ascii=False
)
)
def parse(word):
try:
if regex.search(r'[\U00010000-\U000100FA]', word, regex.IGNORECASE):
# linear b characters found, convert to transliteration
word = tools.linear_b_to_latin(word)
with psycopg.connect(**connection_dict) as conn:
with conn.cursor() as cur:
# look up in a join of the inflection table and the form table
cur.execute("select dict_entry, formdeclension, formcase, formgender, formnumber, uncertaingender, formpronunciation from inflection, form where inflection.form=form.formid and inflection = %s", (word,))
output_dict = cur.fetchall()
# # debug:
# result_dict = {}
# for form in output_dict:
# print(list(form))
# print(form[0])
# form_list = list(form).remove(form[0])
# result_dict[form[0]] = form_list
# print(result_dict)
if len(output_dict) != 0:
# as long as there are results
print(
json.dumps(
output_dict,
indent=2, ensure_ascii=False
)
)
else:
print(
json.dumps(
{
"error": "Word not found"
},
indent=2, ensure_ascii=False
)
)
except Exception as e:
print(
json.dumps(
{
"error": "Couldn't parse word",
"exception": str(e)
},
indent=2, ensure_ascii=False
)
)
def lookup(entry):
try:
# user can give either entryid or just the word, need to convert if number
try:
entry = int(entry)
except:
entry = str(entry)
with psycopg.connect(**connection_dict) as conn:
with conn.cursor() as cur:
# user enters an entryid
if isinstance(entry, int):
cur.execute("select word, entrydefinition, category, stem from dict_entry where entryid = %s", (entry,))
result = cur.fetchone()
word = result[0]
definition = result[1]
category = result[2]
stem = result[3]
# user enters a word
elif isinstance(entry, str):
if regex.search(r'[\U00010000-\U000100FA]', entry, regex.IGNORECASE):
# linear b characters found, convert to transliteration
entry = tools.linear_b_to_latin(entry)
cur.execute("select entryid, word, entrydefinition, category, stem from dict_entry where word = %s", (entry,))
result = cur.fetchone()
entry = result[0]
word = result[1]
definition = result[2]
category = result[3]
stem = result[4]
print(
json.dumps(
# TODO check if this if statement is redundant
{
"entry_id": int(entry),
"word": word,
"definition": definition,
"category": category,
"stem": stem
} if isinstance(entry, int) else
{
"entry_id": entry,
"word": word,
"definition": definition,
"category": category,
"stem": stem
},
indent=2, ensure_ascii=False
)
)
except Exception as e:
import traceback
print(
json.dumps(
{
"error": "Word not found",
"exception": traceback.format_tb(e.__traceback__)
},
indent=2, ensure_ascii=False
)
)
if __name__ == "__main__":
# get cli arguments to pick function to run
parser = argparse.ArgumentParser(description="Query the Tiripode database for forms and lexicon entries")
parser.add_argument("--parse", help="get a word parsed")
parser.add_argument("--lookup", help="get a dictionary entry")
parser.add_argument("--size", help="get number of entries in database", action="store_true")
parser.add_argument("--debug", help="print detailed info for debugging", action="store_true")
args = parser.parse_args()
if args.debug:
logger.setLevel(log.logging.DEBUG)
if args.size:
get_database_size()
elif args.parse:
parse(args.parse)
elif args.lookup:
lookup(args.lookup)