-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
declension table correction facility. See #11
- Loading branch information
1 parent
4c32f00
commit 6fa44dd
Showing
5 changed files
with
168 additions
and
2 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
corrections/corrections_j.txt |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,72 @@ | ||
""" corrections.py | ||
""" | ||
import sys,re,codecs | ||
import tableread | ||
|
||
def add_manual_tables(d,filein): | ||
""" modify d | ||
""" | ||
recs = tableread.init_table(filein) | ||
for rec in recs: | ||
# a Table object | ||
key = (rec.model,rec.key2) | ||
# Do not allow possibility of duplicate 'keys' | ||
#if key in d: | ||
# print('add_manual_tables WARNING: duplicate key',key,"in file",filein) | ||
#else: | ||
# d[key] = rec.table | ||
|
||
# Allow possibility of duplicate 'keys' | ||
if key not in d: | ||
d[key] = [] | ||
else: | ||
print('manual duplicate key',key,filein) | ||
#d[key].append(rec.table) | ||
d[key] = rec | ||
|
||
def init_manual_tables(filein): | ||
with codecs.open(filein,"r","utf-8") as f: | ||
filenames = [x.rstrip() for x in f if not x.startswith(';')] | ||
d = {} | ||
for filename in filenames: | ||
add_manual_tables(d,filename) | ||
return d | ||
|
||
def process(line,d,iline): | ||
""" If line has a correction in d, change and return new line | ||
otherwise, return line. | ||
line is assumed to be in format written by decline_file | ||
'%s\t%s\t%s\t%s' %(rec.model,rec.key2,rec.refs,rec.inflection) | ||
""" | ||
try: | ||
model,key2,refs,inflection = line.split('\t') | ||
except: | ||
print('process ERROR. wrong format of old line #',iline+1) | ||
print(line) | ||
exit(1) | ||
dkey = (model,key2) # consistent with add_manual_tables | ||
if dkey not in d: | ||
return line | ||
rec = d[dkey] | ||
new_inflection = rec.tabstring | ||
newline = '%s\t%s\t%s\t%s' % (model,key2,refs,new_inflection) | ||
rec.nused = rec.nused + 1 | ||
print('new inflection for',dkey) | ||
return newline | ||
if __name__ == "__main__": | ||
filein = sys.argv[1] # tables produced by decline_file | ||
filein1 = sys.argv[2] # inventory of correction files | ||
fileout = sys.argv[3] # output tables, same format as filein | ||
|
||
d = init_manual_tables(filein1) | ||
with codecs.open(filein,"r","utf-8") as f: | ||
with codecs.open(fileout,"w","utf-8") as fout: | ||
for iline,line in enumerate(f): | ||
line = line.rstrip() | ||
lineout = process(line,d,iline) | ||
fout.write(lineout+'\n') | ||
# check unused tables | ||
dupkeys = [k for k in d.keys() if d[k].nused == 0] | ||
print(len(d.keys()),' = # of correction records') | ||
print(len(dupkeys),'= # of unused corrections') | ||
|
40 changes: 40 additions & 0 deletions
40
nominals/pysanskritv2/tables/corrections/corrections_j.txt
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,40 @@ | ||
; Corrections for declensions of words ending in 'j' | ||
;Declension of f_1_j sfj C=sfk, H=sfw | ||
;Declension of m_1_j sfj C=sfk, H=sfw | ||
;Declension of n_1_j sfj C=sfk=H | ||
; MW viSva—sfj (nom. -sfk or incorrectly -sfw | ||
; Goldman, p. 378 -sfw | ||
; Follow Goldman 10-24-2020 | ||
Declension of m_1_j viSva-sfj | ||
Case 1: viSvasfw viSvasfjO viSvasfjaH | ||
Case 2: viSvasfjam viSvasfjO viSvasfjaH | ||
Case 3: viSvasfjA viSvasfqByAm viSvasfqBiH | ||
Case 4: viSvasfje viSvasfqByAm viSvasfqByaH | ||
Case 5: viSvasfjaH viSvasfqByAm viSvasfqByaH | ||
Case 6: viSvasfjaH viSvasfjoH viSvasfjAm | ||
Case 7: viSvasfji viSvasfjoH viSvasfwsu | ||
Case 8: viSvasfk viSvasfjO viSvasfjaH | ||
; Declension of m_1_j aDi-rAj [H 3 stems] | ||
; Declension of m_1_j vEdya-rAj [Huet 2 stems] | ||
; Declension of f_1_j asfj asfk | ||
; Declension of m_1_j asfj asfk | ||
; Declension of n_1_j asfj asfk | ||
; Declension of f_1_j KaYj ejf error? (MW,PWG Kan) | ||
; Declension of m_1_j KaYj | ||
; Declension of n_1_j KaYj | ||
; Declension of f_1_j naBrAj (C=naBrAk, H=naBrAw=PW | ||
; Declension of m_1_j naBrAj | ||
; Declension of n_1_j naBrAj | ||
; Declension of m_1_j mftaBraj C=mftaBrak, H=mftaBraw | ||
; Declension of f_1_j mftaBraj C=mftaBrak=H ? | ||
; Declension of f_1_j parivrAj ?? PW vrAw = H | ||
; Declension of m_1_j parivrAj H has two stems | ||
; Declension of n_1_j pravrAj k/w | ||
; Declension of f_1_j yuj [Huet 2 stems in 1,2,8] | ||
; Declension of m_1_j yuj [Huet 2 stems in 1,2,8] | ||
; Declension of n_1_j yuj [Huet 2 stems in 1,2,8] | ||
; sraj MW ifc. nom. sraw; sraj.f. nom. srak | ||
; there are 30 compounds ending in -sraj in MW | ||
; Declension of f_1_j sraj Note H=sragByAm | ||
; Declension of m_1_j sraj C=sragByAm, H=sraqByAm etc. | ||
; Declension of n_1_j sraj H=sragByAm |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,2 +1,6 @@ | ||
|
||
python3 decline_file.py ../stems/calc_stems.txt calc_tables.txt | ||
echo "REDO DECLENSION TABLES BEGINS" | ||
echo "calc_tables0" | ||
python3 decline_file.py ../stems/calc_stems.txt calc_tables0.txt | ||
echo "calc_tables (after corrections)" | ||
python3 corrections.py calc_tables0.txt correction_inventory.txt calc_tables.txt | ||
echo "REDO DECLENSION TABLES ENDS" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,49 @@ | ||
"""table.py | ||
Read conjugation table and construct dictionary | ||
""" | ||
class Table(object): | ||
def __init__(self,model,key2,table): | ||
self.model = model | ||
self.key2 = key2 | ||
self.table = table | ||
self.nused = 0 | ||
#print(len(table)) | ||
assert len(self.table) == 24 # declension table is 8*3 = 24 | ||
self.tabstring = ':'.join(self.table) | ||
def init_table(filein0): | ||
from os.path import dirname, abspath | ||
import os,codecs,re | ||
curdir = dirname(abspath(__file__)) | ||
filein = os.path.join(curdir,filein0) | ||
with codecs.open(filein,"r","utf-8") as f: | ||
lines = [line.rstrip() for line in f if not line.startswith(';')] | ||
#recs = [Table(line) for line in f if not line.startswith(';')] | ||
recs = [] | ||
#for iline,line in enumerate(lines): | ||
nlines = len(lines) | ||
for iline in range(0,nlines,9): | ||
# 1st line of form 'Conjugation of <model> <key2>' | ||
m = re.search(r'^Declension of (.*?) (.*?)$',lines[iline + 0]) | ||
if not m: | ||
print('tableread ERROR1 @ line:',lines[iline + 0]) | ||
exit(1) | ||
model = m.group(1) | ||
key2 = m.group(2) | ||
# initialize declension table | ||
tab = [] | ||
for icase in [1,2,3,4,5,6,7,8]: | ||
# 2nd-9th line of form 'Case c: x y z' | ||
m = re.search(r'^Case %s: (.*?) (.*?) (.*?)$'%icase,lines[iline + icase]) | ||
if not m: | ||
print('tableread ERROR2 @ line:',lines[iline + icase]) | ||
exit(1) | ||
tab.append(m.group(1)) | ||
tab.append(m.group(2)) | ||
tab.append(m.group(3)) | ||
# replace missing values ('_' or '?') with empty string | ||
tab = [x.replace('_','') for x in tab] | ||
tab = [x.replace('?','') for x in tab] | ||
# generate a Table record | ||
rec = Table(model,key2,tab) | ||
recs.append(rec) | ||
return recs |