Skip to content

Commit

Permalink
using the new extractor module have gotten all the tests to pass, yay
Browse files Browse the repository at this point in the history
  • Loading branch information
Sam Joseph committed Sep 27, 2012
1 parent cb624b8 commit 14f6f10
Show file tree
Hide file tree
Showing 10 changed files with 7,383 additions and 29 deletions.
2 changes: 2 additions & 0 deletions README.markdown
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,8 @@ Note we now have some other chatbot related initiatives in the [faq](faq) and [w

2. Apply patch to allow svm_predict to produce quiet output `cp svmutil.patch LIBSVM_HOME/python & cd LIBSVM_HOME/python & patch < svmutil.patch`

[N.B. here's how I add libsvm to my PYTHONPATH: export PYTHONPATH="/Users/samueljoseph/Code/libsvm-3.12/python/:$PYTHONPATH"]

3. Download TWSS source data into data directory in current project

4. You can run some limited unit tests like so `python testTokeniseContents.py`
Expand Down
4 changes: 2 additions & 2 deletions faq/README.markdown
Original file line number Diff line number Diff line change
Expand Up @@ -10,11 +10,11 @@ For development run [sniffer](http://pypi.python.org/pypi/sniffer) in the faq di

Through the faqbot you can create new tables like so:

> "There is a Course Probabilistic Graphical Models called PGM"
> "There is a course Probabilistic Graphical Models called PGM"
Which would create a table "courses" with columns "ident" and "name" and a single entry with ident "Probabilistic Graphical Models" and name "PGM". Further entries can be added using similar statements:

> "There is a Course Machine Learning called ML"
> "There is a course Machine Learning called ML" <-- although at the moment the parser is not recognizing two letter acronyms ...
Which will add another row to the same table, with the expected contents. Additional columns can be added to table's like so:

Expand Down
7 changes: 4 additions & 3 deletions faq/db.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,15 +42,15 @@ def addEntity(name, hashtable, database_name):
# We can also close the cursor if we are done with it
c.close()

def findTableContainingEntityWithIdent(ident, database_name,flag=False):
def findTableContainingEntityWithIdentOrName(ident, database_name,flag=False):
conn = sqlite3.connect(database_name)
c = conn.cursor()
c.execute("SELECT name FROM sqlite_master WHERE type = 'table'")
results = c.fetchall()
#if flag: raise Exception(database_name)
for result in results:
result = result[0]
sql = "SELECT * FROM %s WHERE ident = '%s'" % (result,ident)
sql = "SELECT * FROM %s WHERE ident = '%s' OR name = '%s'" % (result,ident,ident)
# TODO would like case insensitive match here - not sure how to do that in sqlite
#raise Exception(sql)
#if flag:
Expand Down Expand Up @@ -112,7 +112,8 @@ def updateEntity(table, hashtable ,database_name):
c = conn.cursor()
table = scrub(table)
update = ', '.join([key.replace(' ','_')+" = '"+value+"'" for (key,value) in hashtable.items()])
sql = "UPDATE %s SET %s WHERE ident = '%s'" % (table,update,hashtable["ident"])
# TODO should search that thing we are trying to update exists, or else we effectively fail silently ...
sql = "UPDATE %s SET %s WHERE ident = '%s' OR name = '%s'" % (table,update,hashtable["ident"],hashtable["ident"])
#raise Exception(sql)
c.execute(sql)
c.close()
Expand Down
7 changes: 5 additions & 2 deletions faq/extractor/extractor.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,8 @@
from pattern.en import Sentence, parse
#from patten.en.tree import Word

MATCH_STRING = "There be DT {JJ? NN+} {NNP+} (call DT? {JJ? NNP+})"
MATCH_STRING = "There be DT {JJ? NN+} {NNP+}"
MATCH_STRING_EXT = "There be DT {JJ? NN+} {NNP+} call DT? {JJ? NNP+}"

def extract(statement):

Expand All @@ -21,7 +22,9 @@ def extract(statement):
s = find_entities(s)

# not sure about this "be" thing - happy to match plural (is/are) but not sure about past tense ...
match = search(MATCH_STRING, s)
match = search(MATCH_STRING_EXT, s)
if not match:
match = search(MATCH_STRING, s)
#raise Exception(match)
return s, match

Expand Down
20 changes: 16 additions & 4 deletions faq/extractor/test_extractor.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,31 +10,43 @@ def checkExtraction(sentence,type,name):
def testSource(self):
sentence = "There is a game engine Source"
s, match = extract(sentence)
self.assertNotEqual(len(match),0,"no match found: '" + str(s)+"' against '"+ MATCH_STRING + "'")
self.assertNotEqual(len(match),0,"no match found: '" + str(s)+"' against '"+ MATCH_STRING + "' or '"+ MATCH_STRING_EXT + "'")
self.assertEqual(match[0].group(1).string,"game engine","from '" + str(s)+"'")
self.assertEqual(match[0].group(2).string,"Source","from '" + str(s)+"'")

def testUnrealEngine(self):
sentence = "There is a game engine Unreal Engine"
s, match = extract(sentence)
self.assertNotEqual(len(match),0,"no match found: '" + str(s)+"' against '"+ MATCH_STRING + "'")
self.assertNotEqual(len(match),0,"no match found: '" + str(s)+"' against '"+ MATCH_STRING + "' or '"+ MATCH_STRING_EXT + "'")
self.assertEqual(match[0].group(1).string,"game engine","game engine is not "+match[0].group(1).string+" from'" + str(s)+"'")
self.assertEqual(match[0].group(2).string,"Unreal Engine","Unreal Engine is not "+match[0].group(2).string+" from '" + str(s)+"'")
# not sure how we make sure we greedy grab two NNPs here ...

def testUnity3D(self):
sentence = "There is a game engine Unity3D called Unity3D"
s, match = extract(sentence)
self.assertNotEqual(len(match),0,"no match found: '" + str(s)+"' against '"+ MATCH_STRING + "'")
self.assertNotEqual(len(match),0,"no match found: '" + str(s)+"' against '"+ MATCH_STRING + "' or '"+ MATCH_STRING_EXT + "'")
firstHit = match[0].group(1).string
self.assertEqual(match[0].pattern.groups,3, "incorrect number of groups: '" + str(s)+"' against '"+ MATCH_STRING + "'")
self.assertEqual(len(match[0].pattern.groups),3, "incorrect number of groups: '" +str(len(match[0].pattern.groups))+"; "+ str(s)+"' against '"+ "' or '"+ MATCH_STRING_EXT + "'")
self.assertEqual(match[0].group(1).string,"game engine", "found '" +firstHit+ "' instead of 'game engine' in '" + sentence+"'")
self.assertEqual(match[0].group(2).string,"Unity3D","'" + str(s)+"'")
#raise Exception(str(s))
# annoying - no way to get the number of groups ... aha ... match[0].pattern.groups
#seems like () and {} do not play well together ...
self.assertEqual(match[0].group(3).string,"Unity3D","Unity3D is not '" + match[0].group(3).string+"' from " + str(s))

def testMachineLearning(self):
sentence = "There is a course Machine Learning called ML"
s, match = extract(sentence)
self.assertNotEqual(len(match),0,"no match found: '" + str(s)+"' against '"+ MATCH_STRING + "' or '"+ MATCH_STRING_EXT + "'")
firstHit = match[0].group(1).string
self.assertEqual(len(match[0].pattern.groups),3, "incorrect number of groups: '" +str(len(match[0].pattern.groups))+"; "+ str(s)+"' against '"+ "' or '"+ MATCH_STRING_EXT + "'")
self.assertEqual(match[0].group(1).string,"course", "found '" +firstHit+ "' instead of 'course' in '" + sentence+"'")
self.assertEqual(match[0].group(2).string,"Machine Learning","'" + str(s)+"'")

self.assertEqual(match[0].group(3).string,"ML","ML is not '" + match[0].group(3).string+"' from " + str(s))



def testBasicExtract(self):
match = basicExtract('There is a red ball')
Expand Down
29 changes: 17 additions & 12 deletions faq/faq.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
from pattern.en import pluralize
from pattern.en import parse, split
from pattern.search import search
from extractor.extractor import extract

CSCI3651_PREREQ = "CSCI 2911, CSCI 2912"
CSCI3651_TEXT = "Artificial Intelligence for Games"
Expand Down Expand Up @@ -77,7 +78,7 @@ def query(userSaid,conversationTitle=None,talking=None,database_name = DATABASE_
searchList = [item for item in searchList if item != '']

for ident in searchList:
(table,result) = findTableContainingEntityWithIdent(ident, database_name, True)
(table,result) = findTableContainingEntityWithIdentOrName(ident, database_name, True)
if table:
column_names = grabColumnNames(table, database_name)
humanized_column_names = [col_name.replace('_',' ') for col_name in column_names]
Expand All @@ -92,12 +93,15 @@ def query(userSaid,conversationTitle=None,talking=None,database_name = DATABASE_
final = "not sure what you mean ..."

if database_name != "test.db":
myopener = MyOpener()
page = myopener.open('http://google.com/search?btnI=1&q='+userSaid)
page.read()
time.sleep(1)
response = page.geturl()
final = "Does this help? "+ response
try:
myopener = MyOpener()
page = myopener.open('http://google.com/search?btnI=1&q='+userSaid)
page.read()
time.sleep(1)
response = page.geturl()
final = "Does this help? "+ response
except IOError as e:
final = "I'm sorry but I think I'm not connected to the internet - my subconcious is telling me that '%s'" % e

#pdb.set_trace()
return final
Expand Down Expand Up @@ -160,12 +164,13 @@ def process(statement,database_name = DATABASE_NAME):
s = parse(statement, relations=True, lemmata=True, light=True)
s = split(s)

result = search('There be DT NN+ (DT) (RB) (JJ) NNP+ (call) (DT) (RB) (JJ) (NNPS|NNP)+', s)
#result = search('There be DT NN+ (DT) (RB) (JJ) NNP+ (call) (DT) (RB) (JJ) (NNPS|NNP)+', s)
s, result = extract(statement)
if result:
#try:
noun = search('(NN)+', s)[0].string
table = pluralize(noun.replace(' ','_'))
result = search('(NNPS|NNP)+', s) # at the moment I'm unclear here about pulling in adjectives etc ...
result = search('(JJ|NNPS|NNP)+', s) # this pulls in adjectives, but there's supposed to be a better fix coming
ident = result[0].string
name = result[1].string if len(result) > 1 else ident
#raise Exception(table+"; "+ident+"; "+name)
Expand Down Expand Up @@ -209,7 +214,7 @@ def processAction(statement,database_name = DATABASE_NAME):
if result == None:
return "Sorry, I don't what happens when " + subj + " " + verb + " " + obj
result = queryTable("reactions",{"origin":obj,"action":verb},database_name)
(table,thing) = findTableContainingEntityWithIdent(obj, database_name)
(table,thing) = findTableContainingEntityWithIdentOrName(obj, database_name)
return thing[0] + " says " + result['name']


Expand All @@ -220,7 +225,7 @@ def processNewAspect(statement,database_name = DATABASE_NAME):
if match:
# need to search all tables
ident = match.group(1)
(table,result) = findTableContainingEntityWithIdent(ident, database_name)
(table,result) = findTableContainingEntityWithIdentOrName(ident, database_name)
if table == None:
return "Sorry, I don't know about " + ident
new_column = match.group(2).lower()
Expand All @@ -240,7 +245,7 @@ def processNewAspect(statement,database_name = DATABASE_NAME):
print greetings()
while True:
n = raw_input("> ")
if n == "quit":
if n in ["quit","exit","stop"]:
break;
print query(n)

Expand Down
21 changes: 17 additions & 4 deletions faq/test_faq.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,9 +11,13 @@ def setUp(self):
pass
except IOError as e:
None
# ideally we'd also be testing all this against a copy of the production database and then copy that back over to production if all tests pass

# "there is a game engine Scirra Construct called Construct" - need a log of all the statements/sayings and convert that to tests

def checkEntity(self, table, ident, attributeValues, database):
entity = grabEntity(table, ident, database)
self.assertIsNotNone(entity,("No entity of id '%s' in table '%s'"%(ident,table)))
for key, value in attributeValues.items():
self.assertEquals(entity[key],value)

Expand All @@ -32,6 +36,9 @@ def testCreateGameEngine(self):
self.sayAndCheckEntity("Unity3D has a type of 3D","OK","game_engines", "Unity3D", {"type":"3D"})
self.sayAndCheck("What type of game engine is Unity3D?","The type for Unity3D is '3D'")

self.sayAndCheckEntity("There is a game engine Crysis", "OK", "game_engines", "Crysis", {"name":"Crysis","ident":"Crysis"})
self.sayAndCheckEntity("There is a game engine Source", "OK", "game_engines", "Source", {"name":"Source","ident":"Source"})
self.sayAndCheckEntity("Source has a URL of http://source.valvesoftware.com/sourcesdk/sourceu.php", "OK", "game_engines", "Source", {"url":"http://source.valvesoftware.com/sourcesdk/sourceu.php"})
#Crysis - said no to use in an online class
#Unity3d - http://www.studica.com/unity
#Source http://source.valvesoftware.com/sourcesdk/sourceu.php
Expand All @@ -41,6 +48,10 @@ def testCreateGameEngine(self):
#Scirra
#Torque 3d

# might make better progress if we worked to support phrases like
#"Unreal Engine is a game engine"
#"There is a game engine called Unreal Engine"

def testActions(self):
''' test we can handle actions '''
self.sayAndCheckEntity("There is a person evil wizard called Sam", "OK", "people", "evil wizard", {"name":"Sam","ident":"evil wizard"})
Expand Down Expand Up @@ -110,7 +121,9 @@ def testCreateGame(self):
# TODO self.sayAndCheck("do you know any games?","I know about The Graveyard")
# this would require further work still?


# NOTE, this way round doesn't really match how I have been doing the HPU courses ...
# where I put the acronym as the ident/id and the full name as the "name"
# really I want to be able to say something like "Probabilistic Graphical Models is a Coursera course, also called PGM"
def testCreateCourseraCourses(self):
self.sayAndCheckEntity("There is a course Probabilistic Graphical Models called PGM", "OK", "courses", "Probabilistic Graphical Models", {"name":"PGM","ident":"Probabilistic Graphical Models"})
self.sayAndCheckEntity("There is a course Machine Learning called ML", "OK", "courses", "Machine Learning", {"name":"ML","ident":"Machine Learning"})
Expand All @@ -127,13 +140,13 @@ def testCreatePerson(self):
self.sayAndCheckEntity("There is a person Henry Garner called Henry", "OK", "people", "Henry Garner", {"name":"Henry","ident":"Henry Garner"})
self.sayAndCheckEntity("Henry Garner has a favourite colour of red","OK","people", "Henry Garner", {"favourite_colour":"red"})
self.sayAndCheckEntity("Henry Garner has a favourite colour of teal","OK","people", "Henry Garner", {"favourite_colour":"teal"})
self.sayAndCheck("do you know about Henry Garner?","All I know about Henry Garner is that his name is Henry, and his favourite colour is teal")
self.sayAndCheck("do you know about Henry Garner?","All I know about Henry is that his name is Henry, and his favourite colour is teal")

def testCreateCTO(self):
self.sayAndCheckEntity("There is a CTO Henry Garner called Henry", "OK", "CTOs", "Henry Garner", {"name":"Henry","ident":"Henry Garner"})
self.sayAndCheckEntity("Henry Garner has a favourite colour of red","OK","CTOs", "Henry Garner", {"favourite_colour":"red"})
self.sayAndCheckEntity("Henry Garner has a favourite colour of teal","OK","CTOs", "Henry Garner", {"favourite_colour":"teal"})
self.sayAndCheck("do you know about Henry Garner?","All I know about Henry Garner is that its name is Henry, and its favourite colour is teal")
self.sayAndCheck("do you know about Henry Garner?","All I know about Henry is that its name is Henry, and its favourite colour is teal")

def testCreation(self):
''' test we can create and modify arbitrary entities '''
Expand All @@ -146,4 +159,4 @@ def testOtherCreation(self):
''' test we can create and modify arbitrary entities '''
self.sayAndCheckEntity("There is a professor Sam Joseph called Sam","OK", "professors", "Sam Joseph", {"name":"Sam","ident":"Sam Joseph"})
self.sayAndCheckEntity("Sam Joseph has a birth date of May 13th 1972","OK", "professors", "Sam Joseph", {"name":"Sam","ident":"Sam Joseph",'birth_date':"May 13th 1972"})
self.sayAndCheck("What's Sam Joseph's birth date?","The birth date for Sam Joseph is 'May 13th 1972'")
self.sayAndCheck("What's Sam Joseph's birth date?","The birth date for Sam is 'May 13th 1972'")
Binary file modified nao/.DS_Store
Binary file not shown.
Loading

0 comments on commit 14f6f10

Please sign in to comment.