Skip to content

Commit

Permalink
Adding the HW04 files
Browse files Browse the repository at this point in the history
  • Loading branch information
Ravi Agrawal committed Dec 6, 2017
1 parent ad1e454 commit c742395
Show file tree
Hide file tree
Showing 8 changed files with 12,699 additions and 0 deletions.
Binary file added .DS_Store
Binary file not shown.
Binary file added hw4.zip
Binary file not shown.
98 changes: 98 additions & 0 deletions hw4/cky.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,98 @@
from pprint import pprint

# The productions rules have to be binarized.

grammar_text = """
S -> NP VP
NP -> Det Noun
VP -> Verb NP
PP -> Prep NP
NP -> NP PP
VP -> VP PP
"""

lexicon = {
'Noun': set(['cat', 'dog', 'table', 'food']),
'Verb': set(['attacked', 'saw', 'loved', 'hated']),
'Prep': set(['in', 'of', 'on', 'with']),
'Det': set(['the', 'a']),
}

# Process the grammar rules. You should not have to change this.
grammar_rules = []
for line in grammar_text.strip().split("\n"):
if not line.strip(): continue
left, right = line.split("->")
left = left.strip()
children = right.split()
rule = (left, tuple(children))
grammar_rules.append(rule)
possible_parents_for_children = {}
for parent, (leftchild, rightchild) in grammar_rules:
if (leftchild, rightchild) not in possible_parents_for_children:
possible_parents_for_children[leftchild, rightchild] = []
possible_parents_for_children[leftchild, rightchild].append(parent)
# Error checking
all_parents = set(x[0] for x in grammar_rules) | set(lexicon.keys())
for par, (leftchild, rightchild) in grammar_rules:
if leftchild not in all_parents:
assert False, "Nonterminal %s does not appear as parent of prod rule, nor in lexicon." % leftchild
if rightchild not in all_parents:
assert False, "Nonterminal %s does not appear as parent of prod rule, nor in lexicon." % rightchild

# print "Grammar rules in tuple form:"
# pprint(grammar_rules)
# print "Rule parents indexed by children:"
# pprint(possible_parents_for_children)


def cky_acceptance(sentence):
# return True or False depending whether the sentence is parseable by the grammar.
global grammar_rules, lexicon

# Set up the cells data structure.
# It is intended that the cell indexed by (i,j)
# refers to the span, in python notation, sentence[i:j],
# which is start-inclusive, end-exclusive, which means it includes tokens
# at indexes i, i+1, ... j-1.
# So sentence[3:4] is the 3rd word, and sentence[3:6] is a 3-length phrase,
# at indexes 3, 4, and 5.
# Each cell would then contain a list of possible nonterminal symbols for that span.
# If you want, feel free to use a totally different data structure.
N = len(sentence)
cells = {}
for i in range(N):
for j in range(i + 1, N + 1):
cells[(i, j)] = []

# TODO replace the below with an implementation
pprint(cells)
return False


def cky_parse(sentence):
# Return one of the legal parses for the sentence.
# If nothing is legal, return None.
# This will be similar to cky_acceptance(), except with backpointers.
global grammar_rules, lexicon

N = len(sentence)
cells = {}
for i in range(N):
for j in range(i + 1, N + 1):
cells[(i, j)] = []

# TODO replace the below with an implementation
return None

## some examples of calling these things...
## you probably want to call only one sentence at a time to help debug more easily.

# print cky_acceptance(['the','cat','attacked','the','food'])
# pprint( cky_parse(['the','cat','attacked','the','food']))
# pprint( cky_acceptance(['the','the']))
# pprint( cky_parse(['the','the']))
# print cky_acceptance(['the','cat','attacked','the','food','with','a','dog'])
# pprint( cky_parse(['the','cat','attacked','the','food','with','a','dog']) )
# pprint( cky_parse(['the','cat','with','a','table','attacked','the','food']) )
#
Loading

0 comments on commit c742395

Please sign in to comment.