Skip to content

Commit

Permalink
Merge pull request #46 from clusterfudge/context
Browse files Browse the repository at this point in the history
Context
  • Loading branch information
Steve Penrod authored Nov 22, 2016
2 parents e9a693e + b89702f commit 01bf0e9
Show file tree
Hide file tree
Showing 10 changed files with 324 additions and 34 deletions.
91 changes: 91 additions & 0 deletions adapt/context.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,91 @@
from six.moves import xrange

__author__ = "seanfitz"


class ContextManagerFrame(object):
"""
Manages entities and context for a single frame of conversation.
Provides simple equality querying.
"""
def __init__(self, entities=[], metadata={}):
self.entities = entities
self.metadata = metadata

def metadata_matches(self, query={}):
result = len(query.keys()) > 0
for key in query.keys():
result = result and query[key] == self.metadata.get(key)

return result

def merge_context(self, tag, metadata):
self.entities.append(tag)
for k in metadata.keys():
if k not in self.metadata:
self.metadata[k] = k


class ContextManager(object):
"""
ContextManager
Use to track context throughout the course of a conversational session. How to manage a session's
lifecycle is not captured here.
"""
def __init__(self):
self.frame_stack = []

def inject_context(self, entity, metadata={}):
"""
:param entity:
format {'data': 'Entity tag as <str>', 'key': 'entity proper name as <str>', 'confidence': <float>'}
:param metadata: dict, arbitrary metadata about the entity being added
:return:
"""
top_frame = self.frame_stack[0] if len(self.frame_stack) > 0 else None
if top_frame and top_frame.metadata_matches(metadata):
top_frame.merge_context(entity, metadata)
else:
frame = ContextManagerFrame(entities=[entity], metadata=metadata.copy())
self.frame_stack.insert(0, frame)

def get_context(self, max_frames=None, missing_entities=[]):
"""
Constructs a list of entities from the context.
:param max_frames: integer, max number of frames to look back
:param missing_entities: a list or set of tag names, as strings
:return: a list of entities
"""
if not max_frames:
max_frames = len(self.frame_stack)

missing_entities = list(missing_entities)
context = []
for i in xrange(max_frames):
frame_entities = [entity.copy() for entity in self.frame_stack[i].entities]
for entity in frame_entities:
entity['confidence'] = entity.get('confidence', 1.0) / (2.0 + i)
context += frame_entities

result = []
if len(missing_entities) > 0:
for entity in context:
if entity.get('data') in missing_entities:
result.append(entity)
# NOTE: this implies that we will only ever get one
# of an entity kind from context, unless specified
# multiple times in missing_entities. Cannot get
# an arbitrary number of an entity kind.
missing_entities.remove(entity.get('data'))
else:
result = context

return result



37 changes: 29 additions & 8 deletions adapt/engine.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,34 +30,55 @@ def __init__(self, tokenizer=None, trie=None):
self.tagger = EntityTagger(self.trie, self.tokenizer, self.regular_expressions_entities)
self.intent_parsers = []

def __best_intent(self, parse_result):
def __best_intent(self, parse_result, context=[]):
best_intent = None
best_tags = None
context_as_entities = [{'entities': [c]} for c in context]
for intent in self.intent_parsers:
i = intent.validate(parse_result.get('tags'), parse_result.get('confidence'))
i, tags = intent.validate_with_tags(parse_result.get('tags') + context_as_entities, parse_result.get('confidence'))
if not best_intent or (i and i.get('confidence') > best_intent.get('confidence')):
best_intent = i
best_tags = tags

return best_intent
return best_intent, best_tags

def determine_intent(self, utterance, num_results=1):
def __get_unused_context(self, parse_result, context):
tags_keys = set([t['key'] for t in parse_result['tags'] if t['from_context']])
result_context = [c for c in context if c['key'] not in tags_keys]
return result_context

def determine_intent(self, utterance, num_results=1, include_tags=False, context_manager=None):
"""
Given an utterance, provide a valid intent.
:param utterance: an ascii or unicode string representing natural language speech
:param include_tags: includes the parsed tags (including position and confidence)
as part of result
:param context_manager: a context manager to provide context to the utterance
:param num_results: a maximum number of results to be returned.
:return: A generator the yields dictionaries.
:return: A generator that yields dictionaries.
"""
parser = Parser(self.tokenizer, self.tagger)
parser.on('tagged_entities',
(lambda result:
self.emit("tagged_entities", result)))

for result in parser.parse(utterance, N=num_results):
context = []
if context_manager:
context = context_manager.get_context()

for result in parser.parse(utterance, N=num_results, context=context):
self.emit("parse_result", result)
best_intent = self.__best_intent(result)
# create a context without entities used in result
remaining_context = self.__get_unused_context(result, context)
best_intent, tags = self.__best_intent(result, remaining_context)
if best_intent and best_intent.get('confidence', 0.0) > 0:
if include_tags:
best_intent['__tags__'] = tags
yield best_intent

def register_entity(self, entity_value, entity_type, alias_of=None):
Expand All @@ -71,7 +92,7 @@ def register_entity(self, entity_value, entity_type, alias_of=None):
:return: None
"""
if alias_of:
self.trie.insert(entity_value, data=(alias_of, entity_type))
self.trie.insert(entity_value.lower(), data=(alias_of, entity_type))
else:
self.trie.insert(entity_value.lower(), data=(entity_value, entity_type))
self.trie.insert(entity_type.lower(), data=(entity_type, 'Concept'))
Expand Down
26 changes: 23 additions & 3 deletions adapt/entity_tagger.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,12 +32,15 @@ def _sort_and_merge_tags(self, tags):
decorated.sort(key=lambda x: (x[0], x[1]))
return [tag for start_token, end_token, tag in decorated]

def tag(self, utterance):
def tag(self, utterance, context_trie=None):
"""
Tag known entities within the utterance.
:param utterance: a string of natural language text
:param context_trie: optional, a trie containing only entities from context
for this request
:return: dictionary, with the following keys
match: str - the proper entity matched
Expand Down Expand Up @@ -70,6 +73,7 @@ def tag(self, utterance):
entities.append(sub_entity)
additional_sort = len(entities) > 0

context_entities = []
for i in xrange(len(tokens)):
part = ' '.join(tokens[i:])

Expand All @@ -80,10 +84,26 @@ def tag(self, utterance):
'key': new_entity.get('key'),
'start_token': i,
'entities': [new_entity],
'end_token': i + len(self.tokenizer.tokenize(new_entity.get('match'))) - 1
'end_token': i + len(self.tokenizer.tokenize(new_entity.get('match'))) - 1,
'from_context': False
})

if context_trie:
for new_entity in context_trie.gather(part):
new_entity['data'] = list(new_entity['data'])
new_entity['confidence'] *= 2.0 # context entities get double the weight!
context_entities.append({
'match': new_entity.get('match'),
'key': new_entity.get('key'),
'start_token': i,
'entities': [new_entity],
'end_token': i + len(self.tokenizer.tokenize(new_entity.get('match'))) - 1,
'from_context': True
})

additional_sort = additional_sort or len(entities) > 0

if additional_sort:
entities = self._sort_and_merge_tags(entities)
entities = self._sort_and_merge_tags(entities + context_entities)

return entities
3 changes: 2 additions & 1 deletion adapt/expander.py
Original file line number Diff line number Diff line change
Expand Up @@ -139,7 +139,8 @@ def _sub_expand(self, tags):
'confidence': entities.get(entity_name)[1] * old_tag.get('confidence', 1.0),
'end_token': old_tag.get('end_token'),
'match': old_tag.get('entities')[0].get('match'),
'key': old_tag.get('entities')[0].get('key')
'key': old_tag.get('entities')[0].get('key'),
'from_context': old_tag.get('from_context', False)
}
result.append(tag)
result = sorted(result, key=lambda e: e.get('start_token'))
Expand Down
39 changes: 26 additions & 13 deletions adapt/intent.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,10 +15,10 @@ def find_first_tag(tags, entity_type, after_index=-1):
for tag in tags:
for entity in tag.get('entities'):
for v, t in entity.get('data'):
if t.lower() == entity_type.lower() and tag.get('start_token') > after_index:
return tag, v
if t.lower() == entity_type.lower() and tag.get('start_token', 0) > after_index:
return tag, v, entity.get('confidence')

return None, None
return None, None, None


def find_next_tag(tags, end_index=0):
Expand Down Expand Up @@ -47,7 +47,7 @@ def resolve_one_of(tags, at_least_one):
last_end_index = -1
if entity_type in resolution:
last_end_index = resolution.get[entity_type][-1].get('end_token')
tag, value = find_first_tag(tags, entity_type, after_index=last_end_index)
tag, value, c = find_first_tag(tags, entity_type, after_index=last_end_index)
if not tag:
break
else:
Expand All @@ -68,46 +68,59 @@ def __init__(self, name, requires, at_least_one, optional):
self.optional = optional

def validate(self, tags, confidence):
intent, tags = self.validate_with_tags(tags, confidence)
return intent

def validate_with_tags(self, tags, confidence):
result = {'intent_type': self.name}
intent_confidence = 0.0
local_tags = tags[:]
used_tags = []

for require_type, attribute_name in self.requires:
required_tag, canonical_form = find_first_tag(local_tags, require_type)
required_tag, canonical_form, confidence = find_first_tag(local_tags, require_type)
if not required_tag:
result['confidence'] = 0.0
return result
return result, []

result[attribute_name] = canonical_form
local_tags.remove(required_tag)
if required_tag in local_tags:
local_tags.remove(required_tag)
used_tags.append(required_tag)
# TODO: use confidence based on edit distance and context
intent_confidence += 1.0
intent_confidence += confidence

if len(self.at_least_one) > 0:
best_resolution = resolve_one_of(tags, self.at_least_one)
if not best_resolution:
result['confidence'] = 0.0
return result
return result, []
else:
for key in best_resolution:
result[key] = best_resolution[key][0].get('key') # TODO: at least one must support aliases
intent_confidence += 1.0
used_tags.append(best_resolution)
if best_resolution in local_tags:
local_tags.remove(best_resolution)

for optional_type, attribute_name in self.optional:
optional_tag, canonical_form = find_first_tag(local_tags, optional_type)
optional_tag, canonical_form, conf = find_first_tag(local_tags, optional_type)
if not optional_tag or attribute_name in result:
continue
result[attribute_name] = canonical_form
local_tags.remove(optional_tag)
if optional_tag in local_tags:
local_tags.remove(optional_tag)
used_tags.append(optional_tag)
intent_confidence += 1.0

total_confidence = intent_confidence / len(tags) * confidence

target_client, canonical_form = find_first_tag(local_tags, CLIENT_ENTITY_NAME)
target_client, canonical_form, confidence = find_first_tag(local_tags, CLIENT_ENTITY_NAME)

result['target'] = target_client.get('key') if target_client else None
result['confidence'] = total_confidence

return result
return result, used_tags


class IntentBuilder(object):
Expand Down
27 changes: 24 additions & 3 deletions adapt/parser.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import pyee
import time
from adapt.expander import BronKerboschExpander

from adapt.tools.text.trie import Trie

__author__ = 'seanfitz'

Expand All @@ -15,9 +15,30 @@ def __init__(self, tokenizer, tagger):
self._tokenizer = tokenizer
self._tagger = tagger

def parse(self, utterance, relevance_store=None, N=1):
def parse(self, utterance, context=None, N=1):
"""
:param utterance:
:param context: a list of entities
:param N:
:return:
"""
start = time.time()
tagged = self._tagger.tag(utterance.lower())
context_trie = None
if context and isinstance(context, list):
# sort by confidence in ascending order, so
# highest confidence for an entity is last.
# see comment on TrieNode ctor
context.sort(key=lambda x: x.get('confidence'))

context_trie = Trie()
for entity in context:
entity_value, entity_type = entity.get('data')[0]
context_trie.insert(entity_value.lower(),
data=(entity_value, entity_type),
weight=entity.get('confidence'))

tagged = self._tagger.tag(utterance.lower(), context_trie=context_trie)
self.emit("tagged_entities",
{
'utterance': utterance,
Expand Down
Loading

0 comments on commit 01bf0e9

Please sign in to comment.