Skip to content

Commit

Permalink
trying to get compound Pattern constraints to work
Browse files Browse the repository at this point in the history
  • Loading branch information
Sam Joseph committed Jul 27, 2012
1 parent 50590a8 commit ca4d008
Show file tree
Hide file tree
Showing 5 changed files with 110 additions and 15 deletions.
46 changes: 46 additions & 0 deletions faq/extractor/extractor.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
from pattern.search import *
from pattern.en import *

def extract(statement):

s = Sentence(parse(statement, relations=True, lemmata=True, light=True))

c1 = Constraint.fromstring("There be DT")
c2 = Constraint.fromstring("NN+")
c3 = Constraint.fromstring("(DT)")
c4 = Constraint.fromstring("(RB) (JJ) NNP+")
c5 = Constraint.fromstring("(call) (DT)")
c6 = Constraint.fromstring("(RB) (JJ) (NNPS|NNP)+")
p = Pattern(sequence=[c1, c2, c3, c4, c5, c6])
result = p.search(statement)
#raise Exception(result)
return result


def basicExtract(statement):

s = Sentence(parse(statement, relations=True, lemmata=True, light=True))
p = Pattern.fromstring('(DT) (RB) (JJ) NN+')
result = p.search(s)
return result

def myExtract(statement):

s = Sentence(parse(statement, relations=True, lemmata=True, light=True))
p = Pattern.fromstring('There be DT NN+')
result = p.search(s)
#raise Exception(result)
return result


def constraintSequenceExtract(statement):

s = Sentence(parse(statement, relations=True, lemmata=True, light=True))

c1 = Constraint.fromstring("There be DT")
c2 = Constraint.fromstring("NN+")
p = Pattern(sequence=[c1, c2])
result = p.search(s)
return result
#raise Exception(result)

34 changes: 34 additions & 0 deletions faq/extractor/test_extractor.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
import unittest
import os
from extractor import *

class TestExtractor(unittest.TestCase):

def testUnrealEngine(self):
match = extract("There is a game engine Unreal Engine")
self.assertNotEqual(len(match),0,"no match found") # "game_engines", "Unreal Engine", {"name":"Unreal Engine","ident":"Unreal Engine"})

def testUnity3D(self):
match = extract("There is a game engine Unity3D called Unity3D")
self.assertNotEqual(len(match),0,"no match found") # "game_engines", "Unity3D", {"name":"Unity3D","ident":"Unity3D"})

def testBasicExtract(self):
match = basicExtract('tasty cat food')
self.assertNotEqual(len(match),0,"no match found")

def testMyExtract(self):
match = myExtract("There is a game engine")
self.assertNotEqual(len(match),0,"no match found")

def testConstraintSequenceExtract(self):
match = constraintSequenceExtract("There is a game engine")
self.assertNotEqual(len(match),0,"no match found")

#Crysis - said no to use in an online class
#Unity3d - http://www.studica.com/unity
#Source http://source.valvesoftware.com/sourcesdk/sourceu.php
#Unreal engine
#Game Maker
#Game Salad
#Scirra
#Torque 3d
21 changes: 13 additions & 8 deletions faq/faq.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,8 @@
import time
from pattern.en import conjugate
from pattern.en import pluralize
from pattern.en import parse, split
from pattern.search import search

CSCI3651_PREREQ = "CSCI 2911, CSCI 2912"
CSCI3651_TEXT = "Artificial Intelligence for Games"
Expand Down Expand Up @@ -153,20 +155,23 @@ def process(statement,database_name = DATABASE_NAME):
>>> print nltk.ne_chunk(nltk.pos_tag(sent))
'''
# this runs real fast, but it doesn't quite get the NN/NNP combination I hoped for from "There is a game engine Unity3D"
from pattern.en import parse, split
from pattern.search import search
# although it does now with light=True setting, but now it doesn't get the NNP in "There is a game engine Source"

s = parse(statement, relations=True, lemmata=True, light=True)
s = split(s)
result = search('There be DT (NN)+ (DT) (RB) (JJ) (NNP)+ call (DT) (RB) (JJ) (NNPS|NNP)+', s)

result = search('There be DT NN+ (DT) (RB) (JJ) NNP+ (call) (DT) (RB) (JJ) (NNPS|NNP)+', s)
if result:
try:
#try:
noun = search('(NN)+', s)[0].string
table = pluralize(noun.replace(' ','_'))
ident = search('(NNPS|NNP)+', s)[0].string
name = search('(NNPS|NNP)+', s)[1].string
result = search('(NNPS|NNP)+', s) # at the moment I'm unclear here about pulling in adjectives etc ...
ident = result[0].string
name = result[1].string if len(result) > 1 else ident
#raise Exception(table+"; "+ident+"; "+name)
return newTable(table,ident,name,database_name)
except:
return regexMatch(statement,database_name)
#except:
#return regexMatch(statement,database_name)
else:
return regexMatch(statement,database_name)

Expand Down
Binary file modified faq/test.db
Binary file not shown.
24 changes: 17 additions & 7 deletions faq/test_faq.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,23 @@ def sayAndCheckEntity(self,sentence, response, table, ident, attributeValues, da
def sayAndCheck(self,sentence, response, database = TEST_DATABASE):
self.assertEquals(query(sentence, database_name = database), response)

def testCreateGameEngine(self):
self.sayAndCheckEntity("There is a game engine Unreal Engine", "OK", "game_engines", "Unreal Engine", {"name":"Unreal Engine","ident":"Unreal Engine"})
self.sayAndCheckEntity("There is a game engine Unity3D called Unity3D", "OK", "game_engines", "Unity3D", {"name":"Unity3D","ident":"Unity3D"})
self.sayAndCheckEntity("Unity3D has a URL of http://www.studica.com/unity", "OK", "game_engines", "Unity3D", {"url":"http://www.studica.com/unity"})
self.sayAndCheckEntity("Unity3D has a type of integrated","OK","game_engines", "Unity3D", {"type":"integrated"})
self.sayAndCheckEntity("Unity3D has a type of 3D","OK","game_engines", "Unity3D", {"type":"3D"})
self.sayAndCheck("What type of game engine is Unity3D?","The type for Unity3D is '3D'")

#Crysis - said no to use in an online class
#Unity3d - http://www.studica.com/unity
#Source http://source.valvesoftware.com/sourcesdk/sourceu.php
#Unreal engine
#Game Maker
#Game Salad
#Scirra
#Torque 3d

def testActions(self):
''' test we can handle actions '''
self.sayAndCheckEntity("There is a person evil wizard called Sam", "OK", "people", "evil wizard", {"name":"Sam","ident":"evil wizard"})
Expand Down Expand Up @@ -93,13 +110,6 @@ def testCreateGame(self):
# TODO self.sayAndCheck("do you know any games?","I know about The Graveyard")
# this would require further work still?

def testCreateGameEngine(self):
self.sayAndCheckEntity("There is a game engine Unity3D called Unity3D", "OK", "game_engines", "Unity3D", {"name":"Unity3D","ident":"Unity3D"})
self.sayAndCheckEntity("Unity3D has a URL of http://www.studica.com/unity", "OK", "game_engines", "Unity3D", {"url":"http://www.studica.com/unity"})
self.sayAndCheckEntity("Unity3D has a type of integrated","OK","game_engines", "Unity3D", {"type":"integrated"})
self.sayAndCheckEntity("Unity3D has a type of 3D","OK","game_engines", "Unity3D", {"type":"3D"})
self.sayAndCheck("What type of game engine is Unity3D?","The type for Unity3D is '3D'")


def testCreateCourseraCourses(self):
self.sayAndCheckEntity("There is a course Probabilistic Graphical Models called PGM", "OK", "courses", "Probabilistic Graphical Models", {"name":"PGM","ident":"Probabilistic Graphical Models"})
Expand Down

0 comments on commit ca4d008

Please sign in to comment.