-
Notifications
You must be signed in to change notification settings - Fork 16
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
first version of writable natural language faq system - at present we…
… can create stuff (tests in place), but need to hook up to query
- Loading branch information
Sam Joseph
committed
Jul 3, 2012
1 parent
d94a6fb
commit 78c83ee
Showing
6 changed files
with
351 additions
and
1 deletion.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,100 @@ | ||
import sqlite3 | ||
import pdb | ||
|
||
def createTable(name, columns, database_name): | ||
conn = sqlite3.connect(database_name) | ||
c = conn.cursor() | ||
columns = [scrub(column) for column in columns] | ||
body = 'name text, ' + ', '.join([column + " text" for column in columns]) | ||
name = scrub(name) | ||
# Create table | ||
# http://stackoverflow.com/questions/3247183/variable-table-name-in-sqlite | ||
sql = 'CREATE TABLE IF NOT EXISTS %s ( %s )' % (name, body) | ||
#raise Exception(sql) | ||
c.execute(sql) #[name,','.join(columns)] | ||
|
||
# Save (commit) the changes | ||
conn.commit() | ||
|
||
# We can also close the cursor if we are done with it | ||
c.close() | ||
|
||
def scrub(name): | ||
return ''.join( chr for chr in name if chr.isalnum() or chr == '_' ) | ||
|
||
def scrubQuoted(name): | ||
return ''.join( chr for chr in name if chr.isalnum() or chr == '_' or chr == ' ') | ||
|
||
def dictValuePad(key): | ||
return '"' + str(key) + '"' | ||
|
||
def addEntity(name, hashtable, database_name): | ||
conn = sqlite3.connect(database_name) | ||
c = conn.cursor() | ||
name = scrub(name) | ||
# Insert a row of data | ||
sql = "INSERT INTO %s (%s) VALUES (%s)"% (name, ','.join(hashtable.keys()),', '.join(['"'+value+'"'for value in hashtable.values()])) | ||
try: | ||
c.execute(sql) | ||
except sqlite3.OperationalError, e: | ||
raise Exception(str(e.message) + ":" + sql) | ||
conn.commit() | ||
|
||
# We can also close the cursor if we are done with it | ||
c.close() | ||
|
||
def findTableContainingEntityWithIdent(ident, database_name): | ||
conn = sqlite3.connect(database_name) | ||
c = conn.cursor() | ||
c.execute("SELECT name FROM sqlite_master WHERE type = 'table'") | ||
results = c.fetchall() | ||
#raise Exception(result) | ||
for result in results: | ||
result = result[0] | ||
sql = "SELECT * FROM %s WHERE ident = '%s'" % (result,ident) | ||
#raise Exception(str(c.execute("PRAGMA table_info(courses)").fetchall()) + sql) | ||
c.execute(sql) | ||
fromThisTable = c.fetchone() | ||
if fromThisTable: | ||
c.close() | ||
return result | ||
c.close() | ||
return None | ||
|
||
def modifyTable(table, new_column, database_name): | ||
conn = sqlite3.connect(database_name) | ||
c = conn.cursor() | ||
new_column = new_column.replace(' ','_') | ||
sql = "ALTER TABLE %s ADD COLUMN %s TEXT" % (table, new_column) | ||
#raise Exception(sql) | ||
c.execute(sql) | ||
c.close() | ||
conn.commit() | ||
|
||
def grabEntity(name, ident, database_name): | ||
conn = sqlite3.connect(database_name) | ||
conn.row_factory = sqlite3.Row | ||
c = conn.cursor() | ||
name = scrub(name) | ||
ident = scrubQuoted(ident) | ||
sql = "SELECT * FROM %s WHERE ident = '%s'" % (name,ident) | ||
#raise Exception(sql) | ||
c.execute(sql) | ||
result = c.fetchone() | ||
c.close() | ||
return result | ||
|
||
def updateEntity(table, hashtable ,database_name): | ||
conn = sqlite3.connect(database_name) | ||
conn.row_factory = sqlite3.Row | ||
c = conn.cursor() | ||
table = scrub(table) | ||
update = ', '.join([key.replace(' ','_')+" = '"+value+"'" for (key,value) in hashtable.items()]) | ||
sql = "UPDATE %s SET %s WHERE ident = '%s'" % (table,update,hashtable["ident"]) | ||
#raise Exception(sql) | ||
c.execute(sql) | ||
c.close() | ||
conn.commit() | ||
|
||
|
||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,151 @@ | ||
from test_faq import * | ||
from db import * | ||
import re | ||
from urllib import FancyURLopener | ||
import pdb | ||
import random | ||
import inflect | ||
p = inflect.engine() | ||
|
||
CSCI3651_PREREQ = "CSCI 2911, CSCI 2912" | ||
CSCI3651_TEXT = "Artificial Intelligence for Games" | ||
CSCI3651_CRN = "3335" | ||
CSCI3211_TEXT = "Engineering Long Lasting Software" | ||
CSCI3211_CRN = "2802" | ||
START = "Tuesday, September 4th, 2012" | ||
END= "Sunday, December 16th, 2012" | ||
|
||
courseCache = {"CSCI3651": | ||
{"textbook":CSCI3651_TEXT, | ||
"CRN":CSCI3651_CRN, | ||
"start date":START, | ||
"end date":END}, | ||
"CSCI3211": | ||
{"textbook":CSCI3211_TEXT, | ||
"CRN":CSCI3211_CRN, | ||
"start date":START, | ||
"end date":END} | ||
} | ||
|
||
aspectList = { "textbook":set(["textbook","book","text","reading"]), # could be doing wordnet lookup here ... | ||
"CRN":set(["crn","reference","CRN"]), | ||
"start date":set(["begin","start"]), | ||
"end date":set(["end","finish","over","stop"]) } | ||
|
||
courseList = {"CSCI3651":["3651","game programming","game"], | ||
"CSCI3211":["3211","systems analysis","software engineering"] | ||
} | ||
courses = courseList.keys() | ||
|
||
def query(userSaid,conversationTitle=None,talking=None): | ||
'''natural language (hopefully) interface to store information and query it too''' | ||
statementCheck = process(userSaid) | ||
if statementCheck: | ||
return statementCheck | ||
userSplit = re.split(r'\W+',userSaid) | ||
#print userSplit | ||
#courseMatches = list(set(courses).intersection(set(userSplit))) # we could avoid splitting and be doing lookup on the sentence ... | ||
|
||
courseMatch = None | ||
lowerUserSaid = userSaid.lower() | ||
for course in courses: | ||
for synonym in courseList[course]: | ||
if lowerUserSaid.find(synonym)>0: | ||
courseMatch = course | ||
break | ||
|
||
# this could allow us to answer things like "what's the textbook for this course", but we should check for | ||
# presence of aspect, and things like "this course" - really should get set up with sniffer or something | ||
# to start managing all these things ... | ||
|
||
#lowerConversationTitle = conversationTitle.lower() | ||
#if not courseMatch | ||
#for synonym in courseList[course]: | ||
#if lowerConversationTitle.find(synonym)>0: | ||
#courseMatch = course | ||
#break | ||
|
||
if courseMatch: | ||
course = courseMatch | ||
aspect = getAspect(set(userSplit)) | ||
if aspect: | ||
return humanizedQuestion(course,aspect) | ||
else: | ||
return "I'm not sure about that aspect of " + course # could do hpu.edu site specific IFL search here | ||
else: | ||
myopener = MyOpener() | ||
page = myopener.open('http://google.com/search?btnI=1&q='+userSaid) | ||
page.read() | ||
response = page.geturl() | ||
#pdb.set_trace() | ||
return "Does this help? "+ response | ||
|
||
def getAspect(userSplitSet): | ||
for aspect, aspectSet in aspectList.items(): | ||
if userSplitSet.intersection(aspectSet): | ||
return aspect | ||
|
||
def question(course, aspect): | ||
if not courseCache.get(course): | ||
return "duh ..." | ||
#countryCache[course] = json.loads(urlopen(url+country+api_id).read())['geonames'][0] | ||
return courseCache[course][aspect] | ||
|
||
def humanize(camelCase): | ||
return re.sub("([a-z])([A-Z])","\g<1> \g<2>",camelCase).lower() | ||
|
||
def humanizedQuestion(course, aspect): | ||
return "The " + aspect + " for " + course + " is '" + question(course,aspect) + "'" | ||
|
||
def greetings(): | ||
return random.choice(["sup, dog!","hello","hi there","dude","zaapp?"]) | ||
|
||
class MyOpener(FancyURLopener): | ||
version = 'Mozilla/5.0 (Windows; U; Windows NT 5.1; it; rv:1.8.1.11) Gecko/20071127 Firefox/2.0.0.11' | ||
|
||
def process(statement,database_name = "faq.db"): | ||
''' Allows us to create entities via statements like "There is a course CSCI4702 called Mobile Programming" | ||
and modify entities with statements like "CSCI4702 has a start date of Jan 31st 2013"''' | ||
match = re.search(r'There is a (\w+) ((?:\s|\w+)+) called ((?:\s|\w+)+)',statement) | ||
if match: | ||
table = p.plural(match.group(1)) | ||
try: | ||
createTable(table, ["ident"], database_name) | ||
except sqlite3.OperationalError as e: | ||
if str(e) == "table "+table+" already exists": | ||
pass | ||
else: | ||
raise(e) | ||
addEntity(table, {"ident":match.group(2),"name":match.group(3)},database_name) | ||
return "OK" | ||
match = re.search(r'((?:\s|\w+)+?) has a ((?:\s|\w+)+) of ((?:\s|\w+)+)',statement) | ||
#raise Exception(statement) | ||
if match: | ||
# need to search all tables | ||
ident = match.group(1) | ||
table = findTableContainingEntityWithIdent(ident, database_name) | ||
new_column = match.group(2) | ||
try: | ||
modifyTable(table, new_column, database_name) | ||
except sqlite3.OperationalError as e: | ||
if str(e) == "table "+table+" already has a column called "+new_column: | ||
pass | ||
else: | ||
raise(e) | ||
updateEntity(table, {"ident":ident,new_column:match.group(3)},database_name) | ||
return "OK" | ||
return None | ||
|
||
if __name__ == "__main__": | ||
n = "" | ||
print greetings() | ||
while True: | ||
n = raw_input("> ") | ||
if n == "quit": | ||
break; | ||
print query(n) | ||
|
||
|
||
|
||
|
||
|
Binary file not shown.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,42 @@ | ||
from db import * | ||
import unittest | ||
import os | ||
|
||
TEST_DATABASE = "test.db" | ||
|
||
CSCI3651_PREREQ = "CSCI 2911, CSCI 2912" | ||
CSCI3651_TEXT = "Artificial Intelligence for Games" | ||
CSCI3651_CRN = "3335" | ||
CSCI3211_TEXT = "Engineering Long Lasting Software" | ||
CSCI3211_CRN = "2802" | ||
START = "Tuesday, September 4th, 2012" | ||
END= "Sunday, December 16th, 2012" | ||
|
||
CSCI3651 = {"name":"Game Programming","ident":"CSCI3651","textbook":CSCI3651_TEXT,"CRN":CSCI3651_CRN,"start_date":START,"end_date":END} | ||
|
||
class TestDb(unittest.TestCase): | ||
def setUp(self): | ||
try: | ||
with open(TEST_DATABASE) as f: | ||
os.remove(TEST_DATABASE) | ||
pass | ||
except IOError as e: | ||
None | ||
|
||
def test_create_table(self): | ||
createTable("courses", ["ident","crn","textbook","start_date","end_date"], TEST_DATABASE) | ||
addEntity("courses",CSCI3651, TEST_DATABASE) | ||
entity = grabEntity("courses",CSCI3651["ident"], TEST_DATABASE) | ||
self.assertEquals(entity['crn'],CSCI3651_CRN) | ||
self.assertEquals(entity['textbook'],CSCI3651_TEXT) | ||
self.assertEquals(entity['start_date'],START) | ||
|
||
def test_scrub(self): | ||
self.assertEquals("DROPTABLES",scrub("DROP TABLES --")) | ||
self.assertEquals("column_name",scrub("column_name")) | ||
|
||
|
||
|
||
|
||
|
||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,57 @@ | ||
from faq import * | ||
import unittest | ||
from test_db import * | ||
|
||
class TestFaq(unittest.TestCase): | ||
def setUp(self): | ||
try: | ||
with open(TEST_DATABASE) as f: | ||
os.remove(TEST_DATABASE) | ||
pass | ||
except IOError as e: | ||
None | ||
|
||
def test(self): | ||
result = "" | ||
aspects = aspectList.keys() | ||
for course in courses: | ||
for aspect in aspects: | ||
result += humanizedQuestion(course,aspect) + "\n" | ||
self.assertEqual(query("What's the CRN of CSCI3651"),"The CRN for CSCI3651 is '3335'") | ||
self.assertEqual(query("What's the textbook of Systems Analysis?"),"The textbook for CSCI3211 is 'Engineering Long Lasting Software'") | ||
self.assertEqual(query("what's the CRN for Systems Analysis"),"The CRN for CSCI3211 is '2802'") | ||
self.assertEqual(query("What's the start date of 3651"),"The start date for CSCI3651 is 'Tuesday, September 4th, 2012'") | ||
#self.assertEqual(query("So, what are you wearing?"),"Does this help? http://uk.gamespot.com/the-elder-scrolls-v-skyrim/forum/so-what-are-you-wearing-63261933/") | ||
|
||
def testCreation(self): | ||
''' test we can create and modify arbitrary entities ''' | ||
process("There is a course CSCI4702 called Mobile Programming", TEST_DATABASE) | ||
entity = grabEntity("courses", "CSCI4702", TEST_DATABASE) | ||
self.assertEquals(entity['name'],"Mobile Programming") | ||
self.assertEquals(entity['ident'],"CSCI4702") | ||
process("CSCI4702 has a start date of Jan 31st 2013", TEST_DATABASE) | ||
entity = grabEntity("courses", "CSCI4702", TEST_DATABASE) | ||
self.assertEquals(entity['start_date'],"Jan 31st 2013") | ||
self.assertEquals(entity['name'],"Mobile Programming") | ||
self.assertEquals(entity['ident'],"CSCI4702") | ||
|
||
def testOtherCreation(self): | ||
''' test we can create and modify arbitrary entities ''' | ||
process("There is a professor Sam Joseph called Sam", TEST_DATABASE) | ||
entity = grabEntity("professors", "Sam Joseph", TEST_DATABASE) | ||
self.assertEquals(entity['name'],"Sam") | ||
self.assertEquals(entity['ident'],"Sam Joseph") | ||
process("Sam Joseph has a birth date of May 13th 1972", TEST_DATABASE) | ||
entity = grabEntity("professors", "Sam Joseph", TEST_DATABASE) | ||
self.assertEquals(entity['birth_date'],"May 13th 1972") | ||
self.assertEquals(entity['name'],"Sam") | ||
self.assertEquals(entity['ident'],"Sam Joseph") | ||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters