Skip to content

Commit

Permalink
first version of writable natural language faq system - at present we…
Browse files Browse the repository at this point in the history
… can create stuff (tests in place), but need to hook up to query
  • Loading branch information
Sam Joseph committed Jul 3, 2012
1 parent d94a6fb commit 78c83ee
Show file tree
Hide file tree
Showing 6 changed files with 351 additions and 1 deletion.
100 changes: 100 additions & 0 deletions faq/db.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,100 @@
import sqlite3
import pdb

def createTable(name, columns, database_name):
conn = sqlite3.connect(database_name)
c = conn.cursor()
columns = [scrub(column) for column in columns]
body = 'name text, ' + ', '.join([column + " text" for column in columns])
name = scrub(name)
# Create table
# http://stackoverflow.com/questions/3247183/variable-table-name-in-sqlite
sql = 'CREATE TABLE IF NOT EXISTS %s ( %s )' % (name, body)
#raise Exception(sql)
c.execute(sql) #[name,','.join(columns)]

# Save (commit) the changes
conn.commit()

# We can also close the cursor if we are done with it
c.close()

def scrub(name):
return ''.join( chr for chr in name if chr.isalnum() or chr == '_' )

def scrubQuoted(name):
return ''.join( chr for chr in name if chr.isalnum() or chr == '_' or chr == ' ')

def dictValuePad(key):
return '"' + str(key) + '"'

def addEntity(name, hashtable, database_name):
conn = sqlite3.connect(database_name)
c = conn.cursor()
name = scrub(name)
# Insert a row of data
sql = "INSERT INTO %s (%s) VALUES (%s)"% (name, ','.join(hashtable.keys()),', '.join(['"'+value+'"'for value in hashtable.values()]))
try:
c.execute(sql)
except sqlite3.OperationalError, e:
raise Exception(str(e.message) + ":" + sql)
conn.commit()

# We can also close the cursor if we are done with it
c.close()

def findTableContainingEntityWithIdent(ident, database_name):
conn = sqlite3.connect(database_name)
c = conn.cursor()
c.execute("SELECT name FROM sqlite_master WHERE type = 'table'")
results = c.fetchall()
#raise Exception(result)
for result in results:
result = result[0]
sql = "SELECT * FROM %s WHERE ident = '%s'" % (result,ident)
#raise Exception(str(c.execute("PRAGMA table_info(courses)").fetchall()) + sql)
c.execute(sql)
fromThisTable = c.fetchone()
if fromThisTable:
c.close()
return result
c.close()
return None

def modifyTable(table, new_column, database_name):
conn = sqlite3.connect(database_name)
c = conn.cursor()
new_column = new_column.replace(' ','_')
sql = "ALTER TABLE %s ADD COLUMN %s TEXT" % (table, new_column)
#raise Exception(sql)
c.execute(sql)
c.close()
conn.commit()

def grabEntity(name, ident, database_name):
conn = sqlite3.connect(database_name)
conn.row_factory = sqlite3.Row
c = conn.cursor()
name = scrub(name)
ident = scrubQuoted(ident)
sql = "SELECT * FROM %s WHERE ident = '%s'" % (name,ident)
#raise Exception(sql)
c.execute(sql)
result = c.fetchone()
c.close()
return result

def updateEntity(table, hashtable ,database_name):
conn = sqlite3.connect(database_name)
conn.row_factory = sqlite3.Row
c = conn.cursor()
table = scrub(table)
update = ', '.join([key.replace(' ','_')+" = '"+value+"'" for (key,value) in hashtable.items()])
sql = "UPDATE %s SET %s WHERE ident = '%s'" % (table,update,hashtable["ident"])
#raise Exception(sql)
c.execute(sql)
c.close()
conn.commit()



151 changes: 151 additions & 0 deletions faq/faq.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,151 @@
from test_faq import *
from db import *
import re
from urllib import FancyURLopener
import pdb
import random
import inflect
p = inflect.engine()

CSCI3651_PREREQ = "CSCI 2911, CSCI 2912"
CSCI3651_TEXT = "Artificial Intelligence for Games"
CSCI3651_CRN = "3335"
CSCI3211_TEXT = "Engineering Long Lasting Software"
CSCI3211_CRN = "2802"
START = "Tuesday, September 4th, 2012"
END= "Sunday, December 16th, 2012"

courseCache = {"CSCI3651":
{"textbook":CSCI3651_TEXT,
"CRN":CSCI3651_CRN,
"start date":START,
"end date":END},
"CSCI3211":
{"textbook":CSCI3211_TEXT,
"CRN":CSCI3211_CRN,
"start date":START,
"end date":END}
}

aspectList = { "textbook":set(["textbook","book","text","reading"]), # could be doing wordnet lookup here ...
"CRN":set(["crn","reference","CRN"]),
"start date":set(["begin","start"]),
"end date":set(["end","finish","over","stop"]) }

courseList = {"CSCI3651":["3651","game programming","game"],
"CSCI3211":["3211","systems analysis","software engineering"]
}
courses = courseList.keys()

def query(userSaid,conversationTitle=None,talking=None):
'''natural language (hopefully) interface to store information and query it too'''
statementCheck = process(userSaid)
if statementCheck:
return statementCheck
userSplit = re.split(r'\W+',userSaid)
#print userSplit
#courseMatches = list(set(courses).intersection(set(userSplit))) # we could avoid splitting and be doing lookup on the sentence ...

courseMatch = None
lowerUserSaid = userSaid.lower()
for course in courses:
for synonym in courseList[course]:
if lowerUserSaid.find(synonym)>0:
courseMatch = course
break

# this could allow us to answer things like "what's the textbook for this course", but we should check for
# presence of aspect, and things like "this course" - really should get set up with sniffer or something
# to start managing all these things ...

#lowerConversationTitle = conversationTitle.lower()
#if not courseMatch
#for synonym in courseList[course]:
#if lowerConversationTitle.find(synonym)>0:
#courseMatch = course
#break

if courseMatch:
course = courseMatch
aspect = getAspect(set(userSplit))
if aspect:
return humanizedQuestion(course,aspect)
else:
return "I'm not sure about that aspect of " + course # could do hpu.edu site specific IFL search here
else:
myopener = MyOpener()
page = myopener.open('http://google.com/search?btnI=1&q='+userSaid)
page.read()
response = page.geturl()
#pdb.set_trace()
return "Does this help? "+ response

def getAspect(userSplitSet):
for aspect, aspectSet in aspectList.items():
if userSplitSet.intersection(aspectSet):
return aspect

def question(course, aspect):
if not courseCache.get(course):
return "duh ..."
#countryCache[course] = json.loads(urlopen(url+country+api_id).read())['geonames'][0]
return courseCache[course][aspect]

def humanize(camelCase):
return re.sub("([a-z])([A-Z])","\g<1> \g<2>",camelCase).lower()

def humanizedQuestion(course, aspect):
return "The " + aspect + " for " + course + " is '" + question(course,aspect) + "'"

def greetings():
return random.choice(["sup, dog!","hello","hi there","dude","zaapp?"])

class MyOpener(FancyURLopener):
version = 'Mozilla/5.0 (Windows; U; Windows NT 5.1; it; rv:1.8.1.11) Gecko/20071127 Firefox/2.0.0.11'

def process(statement,database_name = "faq.db"):
''' Allows us to create entities via statements like "There is a course CSCI4702 called Mobile Programming"
and modify entities with statements like "CSCI4702 has a start date of Jan 31st 2013"'''
match = re.search(r'There is a (\w+) ((?:\s|\w+)+) called ((?:\s|\w+)+)',statement)
if match:
table = p.plural(match.group(1))
try:
createTable(table, ["ident"], database_name)
except sqlite3.OperationalError as e:
if str(e) == "table "+table+" already exists":
pass
else:
raise(e)
addEntity(table, {"ident":match.group(2),"name":match.group(3)},database_name)
return "OK"
match = re.search(r'((?:\s|\w+)+?) has a ((?:\s|\w+)+) of ((?:\s|\w+)+)',statement)
#raise Exception(statement)
if match:
# need to search all tables
ident = match.group(1)
table = findTableContainingEntityWithIdent(ident, database_name)
new_column = match.group(2)
try:
modifyTable(table, new_column, database_name)
except sqlite3.OperationalError as e:
if str(e) == "table "+table+" already has a column called "+new_column:
pass
else:
raise(e)
updateEntity(table, {"ident":ident,new_column:match.group(3)},database_name)
return "OK"
return None

if __name__ == "__main__":
n = ""
print greetings()
while True:
n = raw_input("> ")
if n == "quit":
break;
print query(n)





Binary file added faq/test.db
Binary file not shown.
42 changes: 42 additions & 0 deletions faq/test_db.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
from db import *
import unittest
import os

TEST_DATABASE = "test.db"

CSCI3651_PREREQ = "CSCI 2911, CSCI 2912"
CSCI3651_TEXT = "Artificial Intelligence for Games"
CSCI3651_CRN = "3335"
CSCI3211_TEXT = "Engineering Long Lasting Software"
CSCI3211_CRN = "2802"
START = "Tuesday, September 4th, 2012"
END= "Sunday, December 16th, 2012"

CSCI3651 = {"name":"Game Programming","ident":"CSCI3651","textbook":CSCI3651_TEXT,"CRN":CSCI3651_CRN,"start_date":START,"end_date":END}

class TestDb(unittest.TestCase):
def setUp(self):
try:
with open(TEST_DATABASE) as f:
os.remove(TEST_DATABASE)
pass
except IOError as e:
None

def test_create_table(self):
createTable("courses", ["ident","crn","textbook","start_date","end_date"], TEST_DATABASE)
addEntity("courses",CSCI3651, TEST_DATABASE)
entity = grabEntity("courses",CSCI3651["ident"], TEST_DATABASE)
self.assertEquals(entity['crn'],CSCI3651_CRN)
self.assertEquals(entity['textbook'],CSCI3651_TEXT)
self.assertEquals(entity['start_date'],START)

def test_scrub(self):
self.assertEquals("DROPTABLES",scrub("DROP TABLES --"))
self.assertEquals("column_name",scrub("column_name"))






57 changes: 57 additions & 0 deletions faq/test_faq.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
from faq import *
import unittest
from test_db import *

class TestFaq(unittest.TestCase):
def setUp(self):
try:
with open(TEST_DATABASE) as f:
os.remove(TEST_DATABASE)
pass
except IOError as e:
None

def test(self):
result = ""
aspects = aspectList.keys()
for course in courses:
for aspect in aspects:
result += humanizedQuestion(course,aspect) + "\n"
self.assertEqual(query("What's the CRN of CSCI3651"),"The CRN for CSCI3651 is '3335'")
self.assertEqual(query("What's the textbook of Systems Analysis?"),"The textbook for CSCI3211 is 'Engineering Long Lasting Software'")
self.assertEqual(query("what's the CRN for Systems Analysis"),"The CRN for CSCI3211 is '2802'")
self.assertEqual(query("What's the start date of 3651"),"The start date for CSCI3651 is 'Tuesday, September 4th, 2012'")
#self.assertEqual(query("So, what are you wearing?"),"Does this help? http://uk.gamespot.com/the-elder-scrolls-v-skyrim/forum/so-what-are-you-wearing-63261933/")

def testCreation(self):
''' test we can create and modify arbitrary entities '''
process("There is a course CSCI4702 called Mobile Programming", TEST_DATABASE)
entity = grabEntity("courses", "CSCI4702", TEST_DATABASE)
self.assertEquals(entity['name'],"Mobile Programming")
self.assertEquals(entity['ident'],"CSCI4702")
process("CSCI4702 has a start date of Jan 31st 2013", TEST_DATABASE)
entity = grabEntity("courses", "CSCI4702", TEST_DATABASE)
self.assertEquals(entity['start_date'],"Jan 31st 2013")
self.assertEquals(entity['name'],"Mobile Programming")
self.assertEquals(entity['ident'],"CSCI4702")

def testOtherCreation(self):
''' test we can create and modify arbitrary entities '''
process("There is a professor Sam Joseph called Sam", TEST_DATABASE)
entity = grabEntity("professors", "Sam Joseph", TEST_DATABASE)
self.assertEquals(entity['name'],"Sam")
self.assertEquals(entity['ident'],"Sam Joseph")
process("Sam Joseph has a birth date of May 13th 1972", TEST_DATABASE)
entity = grabEntity("professors", "Sam Joseph", TEST_DATABASE)
self.assertEquals(entity['birth_date'],"May 13th 1972")
self.assertEquals(entity['name'],"Sam")
self.assertEquals(entity['ident'],"Sam Joseph")









2 changes: 1 addition & 1 deletion twss.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ def twss(sentence,vocabList,model):
x = processSentence(sentence, vocabList)
#print [x]
p_label, p_acc, p_val = svm_predict([1], [x], model, '-b 1 -q')
#print p_label, p_acc, p_val
print p_label, p_acc, p_val
if p_label[0] == 1:
return "That's what she said!\n"
else:
Expand Down

0 comments on commit 78c83ee

Please sign in to comment.