-
Notifications
You must be signed in to change notification settings - Fork 16
/
Copy pathfaq.py
255 lines (218 loc) · 9.91 KB
/
faq.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
from db import *
import re
from urllib import FancyURLopener
import pdb
import random
import nltk
from nltk.corpus import stopwords
import time
from pattern.en import conjugate
from pattern.en import pluralize
from pattern.en import parse, split
from pattern.search import search
from extractor.extractor import extract
CSCI3651_PREREQ = "CSCI 2911, CSCI 2912"
CSCI3651_TEXT = "Artificial Intelligence for Games"
CSCI3651_CRN = "3335"
CSCI3211_TEXT = "Engineering Long Lasting Software"
CSCI3211_CRN = "2802"
START = "Tuesday, September 4th, 2012"
END= "Sunday, December 16th, 2012"
courseCache = {"CSCI3651":
{"textbook":CSCI3651_TEXT,
"CRN":CSCI3651_CRN,
"start date":START,
"end date":END},
"CSCI3211":
{"textbook":CSCI3211_TEXT,
"CRN":CSCI3211_CRN,
"start date":START,
"end date":END}
}
aspectList = { "textbook":set(["textbook","book","text","reading"]), # could be doing wordnet lookup here ...
"CRN":set(["crn","reference","CRN"]),
"start date":set(["begin","start"]),
"end date":set(["end","finish","over","stop"]) }
courseList = {"CSCI3651":["3651","game programming","game"],
"CSCI3211":["3211","systems analysis","software engineering"]
}
courses = courseList.keys()
DATABASE_NAME = "faq.db"
def query(userSaid,conversationTitle=None,talking=None,database_name = DATABASE_NAME):
'''natural language (hopefully) interface to store information and query it too'''
userSaid = userSaid.strip()
raw = userSaid.lower()
if raw.startswith('@bot'):
userSaid = userSaid[len('@bot'):]
if raw.startswith('@chatbot'):
userSaid = userSaid[len('@chatbot'):]
if raw.startswith('@hpuchatbot'):
userSaid = userSaid[len('@hpuchatbot'):]
userSaid = userSaid.strip()
if not userSaid.lower().startswith("what"):
statementCheck = process(userSaid,database_name)
if statementCheck:
return statementCheck
if userSaid.lower().startswith("what happens"):
return processAction(userSaid,database_name)
userSplit = re.split(r'\W+',userSaid)
stoppedUserSplit = [w for w in userSplit if not w in stopwords.words('english')]
#print userSplit
#courseMatches = list(set(courses).intersection(set(userSplit))) # we could avoid splitting and be doing lookup on the sentence ...
lowerUserSaid = userSaid.lower()
courseMatch = None
aspect = None
tableMatch = None
bigrams = nltk.bigrams(stoppedUserSplit)
trigrams = nltk.trigrams(stoppedUserSplit)
searchList = stoppedUserSplit + [bigram[0]+" "+bigram[1] for bigram in bigrams]+ [trigram[0]+" "+trigram[1]+" "+trigram[2] for trigram in trigrams]
searchList = list(set(searchList))
searchList = [item for item in searchList if item != '']
for ident in searchList:
(table,result) = findTableContainingEntityWithIdentOrName(ident, database_name, True)
if table:
column_names = grabColumnNames(table, database_name)
humanized_column_names = [col_name.replace('_',' ') for col_name in column_names]
for index,name in enumerate(humanized_column_names):
#raise Exception(lowerUserSaid + "::" + str(humanized_column_names))
if lowerUserSaid.find(name)>0: # would love to get syn sets for names from wordnet
if result[index] == None:
break
return humanizedQuestion(ident,name,result[index])
break
return allIKnow(table, ident, result, humanized_column_names)#"I'm not sure about that aspect of " + ident # here we could return what we know wbout that thing
final = "not sure what you mean ..."
if database_name != "test.db":
try:
myopener = MyOpener()
page = myopener.open('http://google.com/search?btnI=1&q='+userSaid)
page.read()
time.sleep(1)
response = page.geturl()
final = "Does this help? "+ response
except IOError as e:
final = "I'm sorry but I think I'm not connected to the internet - my subconcious is telling me that '%s'" % e
#pdb.set_trace()
return final
# working with the conversations title might allow us to answer things like "what's the textbook for this course",
# but we should check for presence of column name, and things like "this course"
#lowerConversationTitle = conversationTitle.lower()
#if not courseMatch
#for synonym in courseList[course]:
#if lowerConversationTitle.find(synonym)>0:
#courseMatch = course
#break
def allIKnow(table, ident, result, humanized_column_names):
possessive = "his" if table == "people" else "its" # would be nice to switch on gender here, or is that something we could learn?
allIKnow = "All I know about %s is that %s " % (ident, possessive)
return allIKnow+(", and %s "%possessive).join([name + " is " + result[index] for index,name in enumerate(humanized_column_names)if name != "ident" and result[index] != None] )
def getAspect(userSplitSet):
for aspect, aspectSet in aspectList.items():
if userSplitSet.intersection(aspectSet):
return aspect
def question(course, aspect):
if not courseCache.get(course):
return "duh ..."
#countryCache[course] = json.loads(urlopen(url+country+api_id).read())['geonames'][0]
return courseCache[course][aspect]
def humanize(camelCase):
return re.sub("([a-z])([A-Z])","\g<1> \g<2>",camelCase).lower()
def humanizedQuestion(course, aspect, answer="Unknown"):
if not answer:
question(course,aspect)
return "The " + aspect + " for " + course + " is '" + answer + "'"
def greetings():
return random.choice(["sup, dog!","hello","hi there","dude","zaapp?"])
class MyOpener(FancyURLopener):
version = 'Mozilla/5.0 (Windows; U; Windows NT 5.1; it; rv:1.8.1.11) Gecko/20071127 Firefox/2.0.0.11'
def process(statement,database_name = DATABASE_NAME):
''' Allows us to create entities via statements like "There is a course CSCI4702 called Mobile Programming"
and modify entities with statements like "CSCI4702 has a start date of Jan 31st 2013"
already encountering a statement like "There is a game engine Unity3d" gives us trouble
seems like we need named entity recognition to be able to extract types like that ... or perhaps rely on capitalization
which doesn't really work for things like CTO as a category of items, hmm
>>> sent = "There is a game engine Unreal Engine".split()
>>> print nltk.ne_chunk(nltk.pos_tag(sent))
'''
# this runs real fast, but it doesn't quite get the NN/NNP combination I hoped for from "There is a game engine Unity3D"
# although it does now with light=True setting, but now it doesn't get the NNP in "There is a game engine Source"
s = parse(statement, relations=True, lemmata=True, light=True)
s = split(s)
#result = search('There be DT NN+ (DT) (RB) (JJ) NNP+ (call) (DT) (RB) (JJ) (NNPS|NNP)+', s)
s, result = extract(statement)
if result:
#try:
noun = search('(NN)+', s)[0].string
table = pluralize(noun.replace(' ','_'))
result = search('(JJ|NNPS|NNP)+', s) # this pulls in adjectives, but there's supposed to be a better fix coming
ident = result[0].string
name = result[1].string if len(result) > 1 else ident
#raise Exception(table+"; "+ident+"; "+name)
return newTable(table,ident,name,database_name)
#except:
#return regexMatch(statement,database_name)
else:
return regexMatch(statement,database_name)
def regexMatch(statement,database_name = DATABASE_NAME):
match = re.search(r'There is an? ([\w]+) ([\s\w]+) called ([\s\w]+)\.?',statement)
if match:
table = pluralize(match.group(1))
ident = match.group(2)
name = match.group(3)
return newTable(table,ident,name,database_name)
return processNewAspect(statement,database_name)
def newTable(table,ident,name,database_name = DATABASE_NAME):
try:
createTable(table, ["ident"], database_name)
except sqlite3.OperationalError as e:
if str(e) == "table "+table+" already exists":
pass
else:
raise(e)
addEntity(table, {"ident":ident,"name":name},database_name)
return "OK"
def processAction(statement,database_name = DATABASE_NAME):
#raise Exception(statement)
match = re.search(r"what happens (?:(?:if)|(?:when)) (?:the)? ([\s\w]+) ([\s\w]+?) ([\s\w]+)\??",statement)
#raise Exception(match.group(0))
if match:
# need to search action table for
subj = match.group(1)
verb = match.group(2)
verb = conjugate(verb,tense='infinitive')
obj = match.group(3)
result = queryTable("actions",{"origin":subj,"ident":verb,"target":obj},database_name)
if result == None:
return "Sorry, I don't what happens when " + subj + " " + verb + " " + obj
result = queryTable("reactions",{"origin":obj,"action":verb},database_name)
(table,thing) = findTableContainingEntityWithIdentOrName(obj, database_name)
return thing[0] + " says " + result['name']
def processNewAspect(statement,database_name = DATABASE_NAME):
#raise Exception(statement)
match = re.search(r"([\s\w]+?)(?:(?: has an?)|(?:\'s)) ([\s\w]+) (?:(?:of)|(?:called)|(?:is(?: called)?)) ([\s\w:/\.]+)\.?",statement)
#raise Exception(match.group(0))
if match:
# need to search all tables
ident = match.group(1)
(table,result) = findTableContainingEntityWithIdentOrName(ident, database_name)
if table == None:
return "Sorry, I don't know about " + ident
new_column = match.group(2).lower()
try:
modifyTable(table, new_column, database_name)
except sqlite3.OperationalError as e:
if str(e).startswith("duplicate column name: "):
pass
else:
raise e
updateEntity(table, {"ident":ident,new_column:match.group(3)},database_name)
return "OK"
return None
if __name__ == "__main__":
n = ""
print greetings()
while True:
n = raw_input("> ")
if n in ["quit","exit","stop"]:
break;
print query(n)