From ca4d0083ce9a2e446a10fa789ec561fc8aef9f18 Mon Sep 17 00:00:00 2001 From: Sam Joseph Date: Sat, 28 Jul 2012 06:24:36 +0900 Subject: [PATCH] trying to get compound Pattern constraints to work --- faq/extractor/extractor.py | 46 ++++++++++++++++++++++++++++++++ faq/extractor/test_extractor.py | 34 +++++++++++++++++++++++ faq/faq.py | 21 +++++++++------ faq/test.db | Bin 2048 -> 2048 bytes faq/test_faq.py | 24 ++++++++++++----- 5 files changed, 110 insertions(+), 15 deletions(-) create mode 100644 faq/extractor/extractor.py create mode 100644 faq/extractor/test_extractor.py diff --git a/faq/extractor/extractor.py b/faq/extractor/extractor.py new file mode 100644 index 0000000..65c8468 --- /dev/null +++ b/faq/extractor/extractor.py @@ -0,0 +1,46 @@ +from pattern.search import * +from pattern.en import * + +def extract(statement): + + s = Sentence(parse(statement, relations=True, lemmata=True, light=True)) + + c1 = Constraint.fromstring("There be DT") + c2 = Constraint.fromstring("NN+") + c3 = Constraint.fromstring("(DT)") + c4 = Constraint.fromstring("(RB) (JJ) NNP+") + c5 = Constraint.fromstring("(call) (DT)") + c6 = Constraint.fromstring("(RB) (JJ) (NNPS|NNP)+") + p = Pattern(sequence=[c1, c2, c3, c4, c5, c6]) + result = p.search(statement) + #raise Exception(result) + return result + + +def basicExtract(statement): + + s = Sentence(parse(statement, relations=True, lemmata=True, light=True)) + p = Pattern.fromstring('(DT) (RB) (JJ) NN+') + result = p.search(s) + return result + +def myExtract(statement): + + s = Sentence(parse(statement, relations=True, lemmata=True, light=True)) + p = Pattern.fromstring('There be DT NN+') + result = p.search(s) + #raise Exception(result) + return result + + +def constraintSequenceExtract(statement): + + s = Sentence(parse(statement, relations=True, lemmata=True, light=True)) + + c1 = Constraint.fromstring("There be DT") + c2 = Constraint.fromstring("NN+") + p = Pattern(sequence=[c1, c2]) + result = p.search(s) + return result + #raise Exception(result) + diff --git a/faq/extractor/test_extractor.py b/faq/extractor/test_extractor.py new file mode 100644 index 0000000..88339ca --- /dev/null +++ b/faq/extractor/test_extractor.py @@ -0,0 +1,34 @@ +import unittest +import os +from extractor import * + +class TestExtractor(unittest.TestCase): + + def testUnrealEngine(self): + match = extract("There is a game engine Unreal Engine") + self.assertNotEqual(len(match),0,"no match found") # "game_engines", "Unreal Engine", {"name":"Unreal Engine","ident":"Unreal Engine"}) + + def testUnity3D(self): + match = extract("There is a game engine Unity3D called Unity3D") + self.assertNotEqual(len(match),0,"no match found") # "game_engines", "Unity3D", {"name":"Unity3D","ident":"Unity3D"}) + + def testBasicExtract(self): + match = basicExtract('tasty cat food') + self.assertNotEqual(len(match),0,"no match found") + + def testMyExtract(self): + match = myExtract("There is a game engine") + self.assertNotEqual(len(match),0,"no match found") + + def testConstraintSequenceExtract(self): + match = constraintSequenceExtract("There is a game engine") + self.assertNotEqual(len(match),0,"no match found") + + #Crysis - said no to use in an online class + #Unity3d - http://www.studica.com/unity + #Source http://source.valvesoftware.com/sourcesdk/sourceu.php + #Unreal engine + #Game Maker + #Game Salad + #Scirra + #Torque 3d \ No newline at end of file diff --git a/faq/faq.py b/faq/faq.py index 4a66329..6aa13a9 100644 --- a/faq/faq.py +++ b/faq/faq.py @@ -8,6 +8,8 @@ import time from pattern.en import conjugate from pattern.en import pluralize +from pattern.en import parse, split +from pattern.search import search CSCI3651_PREREQ = "CSCI 2911, CSCI 2912" CSCI3651_TEXT = "Artificial Intelligence for Games" @@ -153,20 +155,23 @@ def process(statement,database_name = DATABASE_NAME): >>> print nltk.ne_chunk(nltk.pos_tag(sent)) ''' # this runs real fast, but it doesn't quite get the NN/NNP combination I hoped for from "There is a game engine Unity3D" - from pattern.en import parse, split - from pattern.search import search + # although it does now with light=True setting, but now it doesn't get the NNP in "There is a game engine Source" + s = parse(statement, relations=True, lemmata=True, light=True) s = split(s) - result = search('There be DT (NN)+ (DT) (RB) (JJ) (NNP)+ call (DT) (RB) (JJ) (NNPS|NNP)+', s) + + result = search('There be DT NN+ (DT) (RB) (JJ) NNP+ (call) (DT) (RB) (JJ) (NNPS|NNP)+', s) if result: - try: + #try: noun = search('(NN)+', s)[0].string table = pluralize(noun.replace(' ','_')) - ident = search('(NNPS|NNP)+', s)[0].string - name = search('(NNPS|NNP)+', s)[1].string + result = search('(NNPS|NNP)+', s) # at the moment I'm unclear here about pulling in adjectives etc ... + ident = result[0].string + name = result[1].string if len(result) > 1 else ident + #raise Exception(table+"; "+ident+"; "+name) return newTable(table,ident,name,database_name) - except: - return regexMatch(statement,database_name) + #except: + #return regexMatch(statement,database_name) else: return regexMatch(statement,database_name) diff --git a/faq/test.db b/faq/test.db index ac058beb4e2b9df76b0f6ee22d140c5eaf217cb9..77995f5f3a99a180c45a57c8977136ee8545e886 100644 GIT binary patch delta 28 jcmZn=Xb_l?%q_#qz`(%Byp4f*+h#!)59ZC=n7vp4TjK_Z delta 28 jcmZn=Xb_l?%q`8!z`(%Byp4f*+h#!)59ZC=n7vp4Th0cD diff --git a/faq/test_faq.py b/faq/test_faq.py index 668190e..3a1bd46 100644 --- a/faq/test_faq.py +++ b/faq/test_faq.py @@ -24,6 +24,23 @@ def sayAndCheckEntity(self,sentence, response, table, ident, attributeValues, da def sayAndCheck(self,sentence, response, database = TEST_DATABASE): self.assertEquals(query(sentence, database_name = database), response) + def testCreateGameEngine(self): + self.sayAndCheckEntity("There is a game engine Unreal Engine", "OK", "game_engines", "Unreal Engine", {"name":"Unreal Engine","ident":"Unreal Engine"}) + self.sayAndCheckEntity("There is a game engine Unity3D called Unity3D", "OK", "game_engines", "Unity3D", {"name":"Unity3D","ident":"Unity3D"}) + self.sayAndCheckEntity("Unity3D has a URL of http://www.studica.com/unity", "OK", "game_engines", "Unity3D", {"url":"http://www.studica.com/unity"}) + self.sayAndCheckEntity("Unity3D has a type of integrated","OK","game_engines", "Unity3D", {"type":"integrated"}) + self.sayAndCheckEntity("Unity3D has a type of 3D","OK","game_engines", "Unity3D", {"type":"3D"}) + self.sayAndCheck("What type of game engine is Unity3D?","The type for Unity3D is '3D'") + + #Crysis - said no to use in an online class + #Unity3d - http://www.studica.com/unity + #Source http://source.valvesoftware.com/sourcesdk/sourceu.php + #Unreal engine + #Game Maker + #Game Salad + #Scirra + #Torque 3d + def testActions(self): ''' test we can handle actions ''' self.sayAndCheckEntity("There is a person evil wizard called Sam", "OK", "people", "evil wizard", {"name":"Sam","ident":"evil wizard"}) @@ -93,13 +110,6 @@ def testCreateGame(self): # TODO self.sayAndCheck("do you know any games?","I know about The Graveyard") # this would require further work still? - def testCreateGameEngine(self): - self.sayAndCheckEntity("There is a game engine Unity3D called Unity3D", "OK", "game_engines", "Unity3D", {"name":"Unity3D","ident":"Unity3D"}) - self.sayAndCheckEntity("Unity3D has a URL of http://www.studica.com/unity", "OK", "game_engines", "Unity3D", {"url":"http://www.studica.com/unity"}) - self.sayAndCheckEntity("Unity3D has a type of integrated","OK","game_engines", "Unity3D", {"type":"integrated"}) - self.sayAndCheckEntity("Unity3D has a type of 3D","OK","game_engines", "Unity3D", {"type":"3D"}) - self.sayAndCheck("What type of game engine is Unity3D?","The type for Unity3D is '3D'") - def testCreateCourseraCourses(self): self.sayAndCheckEntity("There is a course Probabilistic Graphical Models called PGM", "OK", "courses", "Probabilistic Graphical Models", {"name":"PGM","ident":"Probabilistic Graphical Models"})