Skip to content

Commit

Permalink
declension table correction facility. See #11
Browse files Browse the repository at this point in the history
  • Loading branch information
funderburkjim committed Oct 25, 2020
1 parent 4c32f00 commit 6fa44dd
Show file tree
Hide file tree
Showing 5 changed files with 168 additions and 2 deletions.
1 change: 1 addition & 0 deletions nominals/pysanskritv2/tables/correction_inventory.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
corrections/corrections_j.txt
72 changes: 72 additions & 0 deletions nominals/pysanskritv2/tables/corrections.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,72 @@
""" corrections.py
"""
import sys,re,codecs
import tableread

def add_manual_tables(d,filein):
""" modify d
"""
recs = tableread.init_table(filein)
for rec in recs:
# a Table object
key = (rec.model,rec.key2)
# Do not allow possibility of duplicate 'keys'
#if key in d:
# print('add_manual_tables WARNING: duplicate key',key,"in file",filein)
#else:
# d[key] = rec.table

# Allow possibility of duplicate 'keys'
if key not in d:
d[key] = []
else:
print('manual duplicate key',key,filein)
#d[key].append(rec.table)
d[key] = rec

def init_manual_tables(filein):
with codecs.open(filein,"r","utf-8") as f:
filenames = [x.rstrip() for x in f if not x.startswith(';')]
d = {}
for filename in filenames:
add_manual_tables(d,filename)
return d

def process(line,d,iline):
""" If line has a correction in d, change and return new line
otherwise, return line.
line is assumed to be in format written by decline_file
'%s\t%s\t%s\t%s' %(rec.model,rec.key2,rec.refs,rec.inflection)
"""
try:
model,key2,refs,inflection = line.split('\t')
except:
print('process ERROR. wrong format of old line #',iline+1)
print(line)
exit(1)
dkey = (model,key2) # consistent with add_manual_tables
if dkey not in d:
return line
rec = d[dkey]
new_inflection = rec.tabstring
newline = '%s\t%s\t%s\t%s' % (model,key2,refs,new_inflection)
rec.nused = rec.nused + 1
print('new inflection for',dkey)
return newline
if __name__ == "__main__":
filein = sys.argv[1] # tables produced by decline_file
filein1 = sys.argv[2] # inventory of correction files
fileout = sys.argv[3] # output tables, same format as filein

d = init_manual_tables(filein1)
with codecs.open(filein,"r","utf-8") as f:
with codecs.open(fileout,"w","utf-8") as fout:
for iline,line in enumerate(f):
line = line.rstrip()
lineout = process(line,d,iline)
fout.write(lineout+'\n')
# check unused tables
dupkeys = [k for k in d.keys() if d[k].nused == 0]
print(len(d.keys()),' = # of correction records')
print(len(dupkeys),'= # of unused corrections')

40 changes: 40 additions & 0 deletions nominals/pysanskritv2/tables/corrections/corrections_j.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
; Corrections for declensions of words ending in 'j'
;Declension of f_1_j sfj C=sfk, H=sfw
;Declension of m_1_j sfj C=sfk, H=sfw
;Declension of n_1_j sfj C=sfk=H
; MW viSva—sfj (nom. -sfk or incorrectly -sfw
; Goldman, p. 378 -sfw
; Follow Goldman 10-24-2020
Declension of m_1_j viSva-sfj
Case 1: viSvasfw viSvasfjO viSvasfjaH
Case 2: viSvasfjam viSvasfjO viSvasfjaH
Case 3: viSvasfjA viSvasfqByAm viSvasfqBiH
Case 4: viSvasfje viSvasfqByAm viSvasfqByaH
Case 5: viSvasfjaH viSvasfqByAm viSvasfqByaH
Case 6: viSvasfjaH viSvasfjoH viSvasfjAm
Case 7: viSvasfji viSvasfjoH viSvasfwsu
Case 8: viSvasfk viSvasfjO viSvasfjaH
; Declension of m_1_j aDi-rAj [H 3 stems]
; Declension of m_1_j vEdya-rAj [Huet 2 stems]
; Declension of f_1_j asfj asfk
; Declension of m_1_j asfj asfk
; Declension of n_1_j asfj asfk
; Declension of f_1_j KaYj ejf error? (MW,PWG Kan)
; Declension of m_1_j KaYj
; Declension of n_1_j KaYj
; Declension of f_1_j naBrAj (C=naBrAk, H=naBrAw=PW
; Declension of m_1_j naBrAj
; Declension of n_1_j naBrAj
; Declension of m_1_j mftaBraj C=mftaBrak, H=mftaBraw
; Declension of f_1_j mftaBraj C=mftaBrak=H ?
; Declension of f_1_j parivrAj ?? PW vrAw = H
; Declension of m_1_j parivrAj H has two stems
; Declension of n_1_j pravrAj k/w
; Declension of f_1_j yuj [Huet 2 stems in 1,2,8]
; Declension of m_1_j yuj [Huet 2 stems in 1,2,8]
; Declension of n_1_j yuj [Huet 2 stems in 1,2,8]
; sraj MW ifc. nom. sraw; sraj.f. nom. srak
; there are 30 compounds ending in -sraj in MW
; Declension of f_1_j sraj Note H=sragByAm
; Declension of m_1_j sraj C=sragByAm, H=sraqByAm etc.
; Declension of n_1_j sraj H=sragByAm
8 changes: 6 additions & 2 deletions nominals/pysanskritv2/tables/redo.sh
Original file line number Diff line number Diff line change
@@ -1,2 +1,6 @@

python3 decline_file.py ../stems/calc_stems.txt calc_tables.txt
echo "REDO DECLENSION TABLES BEGINS"
echo "calc_tables0"
python3 decline_file.py ../stems/calc_stems.txt calc_tables0.txt
echo "calc_tables (after corrections)"
python3 corrections.py calc_tables0.txt correction_inventory.txt calc_tables.txt
echo "REDO DECLENSION TABLES ENDS"
49 changes: 49 additions & 0 deletions nominals/pysanskritv2/tables/tableread.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
"""table.py
Read conjugation table and construct dictionary
"""
class Table(object):
def __init__(self,model,key2,table):
self.model = model
self.key2 = key2
self.table = table
self.nused = 0
#print(len(table))
assert len(self.table) == 24 # declension table is 8*3 = 24
self.tabstring = ':'.join(self.table)
def init_table(filein0):
from os.path import dirname, abspath
import os,codecs,re
curdir = dirname(abspath(__file__))
filein = os.path.join(curdir,filein0)
with codecs.open(filein,"r","utf-8") as f:
lines = [line.rstrip() for line in f if not line.startswith(';')]
#recs = [Table(line) for line in f if not line.startswith(';')]
recs = []
#for iline,line in enumerate(lines):
nlines = len(lines)
for iline in range(0,nlines,9):
# 1st line of form 'Conjugation of <model> <key2>'
m = re.search(r'^Declension of (.*?) (.*?)$',lines[iline + 0])
if not m:
print('tableread ERROR1 @ line:',lines[iline + 0])
exit(1)
model = m.group(1)
key2 = m.group(2)
# initialize declension table
tab = []
for icase in [1,2,3,4,5,6,7,8]:
# 2nd-9th line of form 'Case c: x y z'
m = re.search(r'^Case %s: (.*?) (.*?) (.*?)$'%icase,lines[iline + icase])
if not m:
print('tableread ERROR2 @ line:',lines[iline + icase])
exit(1)
tab.append(m.group(1))
tab.append(m.group(2))
tab.append(m.group(3))
# replace missing values ('_' or '?') with empty string
tab = [x.replace('_','') for x in tab]
tab = [x.replace('?','') for x in tab]
# generate a Table record
rec = Table(model,key2,tab)
recs.append(rec)
return recs

0 comments on commit 6fa44dd

Please sign in to comment.