Skip to content

Commit

Permalink
#4 version tagcount_ls_jim_29.12.24.txt
Browse files Browse the repository at this point in the history
  • Loading branch information
funderburkjim committed Dec 30, 2024
1 parent 8d9ff5a commit feb00c9
Show file tree
Hide file tree
Showing 29 changed files with 23,429 additions and 0 deletions.
404 changes: 404 additions & 0 deletions issues/issue4/BHS.Grammar_Front.pages.txt

Large diffs are not rendered by default.

93 changes: 93 additions & 0 deletions issues/issue4/adjust_tooltip.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,93 @@
# coding=utf-8
""" adjust_tooltip.py
"""
from __future__ import print_function
import sys, re,codecs

def read_lines(filein):
with codecs.open(filein,encoding='utf-8',mode='r') as f:
lines = [x.rstrip('\r\n') for x in f]
return lines

def write_recs(fileout,recs):
outrecs = []
for rec in recs:
outarr = []
if rec.newtooltip == None:
out = rec.line # no change
else:
parts0 = rec.parts0
parts0[3] = rec.newtooltip
out = '\t' . join(parts0)
outarr.append(out)
outrecs.append(outarr)
with codecs.open(fileout,"w","utf-8") as f:
for outarr in outrecs:
for out in outarr:
f.write(out+'\n')
print(len(recs),"records written to",fileout)


class Tagcount:
def __init__(self,line):
self.line = line
parts = line.split('\t') # tab-separated values
self.parts0 = parts
self.status = len(parts) == 4
self.parts = [p.strip() for p in parts]
self.countstr, self.lsstr,self.ls,self.tooltip = self.parts
assert self.lsstr in ('ls','lsfm','lsfm?')
self.newtooltip = None

def init_tagcount(filein):
lines = read_lines(filein)
recs = [Tagcount(line) for line in lines]
return recs

def generate_changes(lines):
group = None
for iline,line in enumerate(lines):
m = re.search('old: (.*)$',line)
if m != None:
old = m.group(1)
continue
m = re.search('new: (.*)$',line)
if m != None:
new = m.group(1)
group = (old,new)
yield group
old = None

def init_changes(filein):
lines = read_lines(filein)
changes = list(generate_changes(lines))
print(len(changes),"changes read from",filein)
return changes

def apply_changes(recs,changes):
d = {} # make changes a dictionary
for change in changes:
old,new = change
if old in d:
print('duplicate change found')
exit(1)
d[old] = new
n = 0
for rec in recs:
tip = rec.tooltip
if tip in d:
newtip = d[tip]
rec.newtooltip = newtip
n = n + 1
print(n,"records with tooltip change")

if __name__=="__main__":
filein = sys.argv[1] # tagcount_ls_1.txt
filein1 = sys.argv[2] # tooltip changes
fileout = sys.argv[3] # both files written to facilitate comparison
#fileout1 = sys.argv[4] # change stats

recs = init_tagcount(filein)
changes = init_changes(filein1)
apply_changes(recs,changes) # newtooltip attribute computed
write_recs(fileout,recs)
8 changes: 8 additions & 0 deletions issues/issue4/adjust_tooltip_anna.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
old: ;; Śikṣāsamuccaya, A Compendium of Buddhist Doctrine / transl. C.Bendall and W.H.D.Rouse. London: John Murray, 1922.
new: Śikṣāsamuccaya, ed. Bendall, St. Petersburg, 1897—1902; transl. Bendall and Rouse, London, 1922.
old: ;; Avadāna-śataka, ed. Speyer J. S., 2 vols., St. Petersburg, 1902, 1906.
new: Avadāna-śataka, ed. Speyer, 2 vols., St. Petersburg, 1902, 1906; transl. Feer, Annales du Musée Guimet 18 (1891).
old: ;; The Divyāvadāna: A Collection of Early Buddhist Legends, ed. E.B.Cowell and R.A.Neil, Cambridge, 1886.
new: Divyāvadāna, ed. Cowell and Neil, Cambridge, 1886.
old: ;; W.KirfelDie, Kosmographie der Inder: nach den Quellen dargestellt / Bonn–Leipzig: Kurt Schroeder, 1920.
new: Kosmographie der Inder.
360 changes: 360 additions & 0 deletions issues/issue4/bhsfm_abbr.txt

Large diffs are not rendered by default.

350 changes: 350 additions & 0 deletions issues/issue4/bhsfm_abbr_0.txt

Large diffs are not rendered by default.

133 changes: 133 additions & 0 deletions issues/issue4/compare_tagcount_ls.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,133 @@
# coding=utf-8
""" compare_tagcount_ls.py
"""
from __future__ import print_function
import sys, re,codecs

def read_lines(filein):
with codecs.open(filein,encoding='utf-8',mode='r') as f:
lines = [x.rstrip('\r\n') for x in f]
return lines

def write_diffs_check3(fileout,diffs):
outrecs = []
for idiff,diff in enumerate(diffs):
outarr = []
rec1,rec2,irec = diff
outarr.append('; diff %s at line %s' %(idiff+1,irec+1))
outarr.append('; cdsl')
outarr.append(rec1.line)
outarr.append('; anna')
outarr.append(rec2.line)
outarr.append('; ------------------------------------------------------------')
outrecs.append(outarr)
with codecs.open(fileout,"w","utf-8") as f:
for outarr in outrecs:
for out in outarr:
f.write(out+'\n')
print(len(diffs),"difference records written to",fileout)

def write_compare(fileout,recs1,recs2):
outrecs = []
for irec,rec1 in enumerate(recs1):
rec2 = recs2[irec]
assert rec1.parts[0:-1] == rec2.parts[0:-1]
a1 = [rec1.countstr,rec1.lsstr,rec1.ls]
outarr = []
assert ':' not in rec1.ls
assert ':' not in rec2.ls

# x is a 'status' field
if rec1.tooltip == rec2.tooltip:
x = '=='
elif rec2.tooltip.startswith(';;'):
x = ';;'
if rec1.tooltip.startswith('?'):
x = x + '?'
elif rec1.tooltip.startswith('?'):
#assert rec2.tooltip == rec2.tooltip
x = '_?'
else:
x = ''
a1.append(x)
a = ':'.join(a1)
# a = '%s:%s:%s' % (a,x)
outarr.append(a)
outarr.append('anna: %s' % rec2.tooltip)
if rec1.tooltip != rec2.tooltip:
outarr.append(';')
outarr.append('cdsl: %s' % rec1.tooltip)
#else:
# outarr.append('cdsl: %s' % 'SAME')
outarr.append('; ------------------------------------------------------------')
outrecs.append(outarr)
with codecs.open(fileout,"w","utf-8") as f:
for outarr in outrecs:
for out in outarr:
f.write(out+'\n')
print(len(recs1),"records written to",fileout)

class Tagcount:
def __init__(self,line):
self.line = line
parts = line.split('\t') # tab-separated values
self.parts0 = parts
self.status = len(parts) == 4
self.parts = [p.strip() for p in parts]
self.countstr, self.lsstr,self.ls,self.tooltip = self.parts
assert self.lsstr in ('ls','lsfm','lsfm?')

def init_tagcount(filein):
lines = read_lines(filein)
recs = [Tagcount(line) for line in lines]
return recs

def check1(recs):
recs1 = [rec for rec in recs if rec.status == False]
print(len(recs1),"records with wrong number of fields")
for irec,rec in enumerate(recs):
if rec.status == False:
print('line %s has %s parts' %(irec+1,len(rec.parts)))
for ipart,part in enumerate(rec.parts):
print('part[%s] = %s' %(ipart+1,rec.parts[ipart]))
def check2(recs1,filein1,recs2,filein2):
print('%s has %s records' % (filein1,len(recs1)))
check1(recs1)
print('%s has %s records' % (filein2,len(recs2)))
check1(recs2)

def check3(recs1,recs2):
n = 0
diffs = []
for irec,rec1 in enumerate(recs1):
rec2 = recs2[irec]
if ((rec1.countstr == rec2.countstr) and
(rec1.lsstr == rec2.lsstr) and
(rec1.ls == rec2.ls)):
pass
else:
n = n + 1
diff = (rec1,rec2,irec)
diffs.append(diff)
if False:
print('check3 difference at line %s' % (irec+1,))
print(rec1.line)
print(rec2.line)
print('check3 finds %s problems' %n)
return diffs

if __name__=="__main__":
filein = sys.argv[1] # tagcount_ls_0.txt (cdsl)
filein1 = sys.argv[2] # tagcount_ls_anna_0.txt
fileout = sys.argv[3] # both files written to facilitate comparison

recs1 = init_tagcount(filein)
recs2 = init_tagcount(filein1)
check2(recs1,filein,recs2,filein1)
assert len(recs1) == len(recs2)
diffs_check3 = check3(recs1,recs2)
if diffs_check3 != []:
write_diffs_check3(fileout,diffs_check3)
else:
write_compare(fileout,recs1,recs2)

Loading

0 comments on commit feb00c9

Please sign in to comment.