From a455803d49e55071364cbe86c5b6a20b283e60e0 Mon Sep 17 00:00:00 2001 From: Saleh Bakhit Date: Fri, 27 Nov 2020 14:02:59 -0500 Subject: [PATCH] add appending words to wordslist support --- wordninja.py | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/wordninja.py b/wordninja.py index 52c0681..7281a78 100644 --- a/wordninja.py +++ b/wordninja.py @@ -27,12 +27,22 @@ class LanguageModel(object): def __init__(self, word_file): + self.word_file = word_file # Build a cost dictionary, assuming Zipf's law and cost = -math.log(probability). with gzip.open(word_file) as f: words = f.read().decode().split() self._wordcost = dict((k, log((i+1)*log(len(words)))) for i,k in enumerate(words)) self._maxword = max(len(x) for x in words) - + + + def add_word(self, w): + with gzip.open(self.word_file, 'a') as f: + f.write(b'%b' % (w.encode('utf-8'))) + + num_words = len(self._wordcost) + 1 + self._wordcost[w] = log((num_words)*log(num_words)) + self._maxword = max(self._maxword, len(w)) + def split(self, s): """Uses dynamic programming to infer the location of spaces in a string without spaces.""" @@ -83,4 +93,7 @@ def best_match(i): def split(s): return DEFAULT_LANGUAGE_MODEL.split(s) +def add_word(w): + return DEFAULT_LANGUAGE_MODEL.add_word(w) +