-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
#4 version tagcount_ls_jim_29.12.24.txt
- Loading branch information
1 parent
8d9ff5a
commit feb00c9
Showing
29 changed files
with
23,429 additions
and
0 deletions.
There are no files selected for viewing
Large diffs are not rendered by default.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,93 @@ | ||
# coding=utf-8 | ||
""" adjust_tooltip.py | ||
""" | ||
from __future__ import print_function | ||
import sys, re,codecs | ||
|
||
def read_lines(filein): | ||
with codecs.open(filein,encoding='utf-8',mode='r') as f: | ||
lines = [x.rstrip('\r\n') for x in f] | ||
return lines | ||
|
||
def write_recs(fileout,recs): | ||
outrecs = [] | ||
for rec in recs: | ||
outarr = [] | ||
if rec.newtooltip == None: | ||
out = rec.line # no change | ||
else: | ||
parts0 = rec.parts0 | ||
parts0[3] = rec.newtooltip | ||
out = '\t' . join(parts0) | ||
outarr.append(out) | ||
outrecs.append(outarr) | ||
with codecs.open(fileout,"w","utf-8") as f: | ||
for outarr in outrecs: | ||
for out in outarr: | ||
f.write(out+'\n') | ||
print(len(recs),"records written to",fileout) | ||
|
||
|
||
class Tagcount: | ||
def __init__(self,line): | ||
self.line = line | ||
parts = line.split('\t') # tab-separated values | ||
self.parts0 = parts | ||
self.status = len(parts) == 4 | ||
self.parts = [p.strip() for p in parts] | ||
self.countstr, self.lsstr,self.ls,self.tooltip = self.parts | ||
assert self.lsstr in ('ls','lsfm','lsfm?') | ||
self.newtooltip = None | ||
|
||
def init_tagcount(filein): | ||
lines = read_lines(filein) | ||
recs = [Tagcount(line) for line in lines] | ||
return recs | ||
|
||
def generate_changes(lines): | ||
group = None | ||
for iline,line in enumerate(lines): | ||
m = re.search('old: (.*)$',line) | ||
if m != None: | ||
old = m.group(1) | ||
continue | ||
m = re.search('new: (.*)$',line) | ||
if m != None: | ||
new = m.group(1) | ||
group = (old,new) | ||
yield group | ||
old = None | ||
|
||
def init_changes(filein): | ||
lines = read_lines(filein) | ||
changes = list(generate_changes(lines)) | ||
print(len(changes),"changes read from",filein) | ||
return changes | ||
|
||
def apply_changes(recs,changes): | ||
d = {} # make changes a dictionary | ||
for change in changes: | ||
old,new = change | ||
if old in d: | ||
print('duplicate change found') | ||
exit(1) | ||
d[old] = new | ||
n = 0 | ||
for rec in recs: | ||
tip = rec.tooltip | ||
if tip in d: | ||
newtip = d[tip] | ||
rec.newtooltip = newtip | ||
n = n + 1 | ||
print(n,"records with tooltip change") | ||
|
||
if __name__=="__main__": | ||
filein = sys.argv[1] # tagcount_ls_1.txt | ||
filein1 = sys.argv[2] # tooltip changes | ||
fileout = sys.argv[3] # both files written to facilitate comparison | ||
#fileout1 = sys.argv[4] # change stats | ||
|
||
recs = init_tagcount(filein) | ||
changes = init_changes(filein1) | ||
apply_changes(recs,changes) # newtooltip attribute computed | ||
write_recs(fileout,recs) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,8 @@ | ||
old: ;; Śikṣāsamuccaya, A Compendium of Buddhist Doctrine / transl. C.Bendall and W.H.D.Rouse. London: John Murray, 1922. | ||
new: Śikṣāsamuccaya, ed. Bendall, St. Petersburg, 1897—1902; transl. Bendall and Rouse, London, 1922. | ||
old: ;; Avadāna-śataka, ed. Speyer J. S., 2 vols., St. Petersburg, 1902, 1906. | ||
new: Avadāna-śataka, ed. Speyer, 2 vols., St. Petersburg, 1902, 1906; transl. Feer, Annales du Musée Guimet 18 (1891). | ||
old: ;; The Divyāvadāna: A Collection of Early Buddhist Legends, ed. E.B.Cowell and R.A.Neil, Cambridge, 1886. | ||
new: Divyāvadāna, ed. Cowell and Neil, Cambridge, 1886. | ||
old: ;; W.KirfelDie, Kosmographie der Inder: nach den Quellen dargestellt / Bonn–Leipzig: Kurt Schroeder, 1920. | ||
new: Kosmographie der Inder. |
Large diffs are not rendered by default.
Oops, something went wrong.
Large diffs are not rendered by default.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,133 @@ | ||
# coding=utf-8 | ||
""" compare_tagcount_ls.py | ||
""" | ||
from __future__ import print_function | ||
import sys, re,codecs | ||
|
||
def read_lines(filein): | ||
with codecs.open(filein,encoding='utf-8',mode='r') as f: | ||
lines = [x.rstrip('\r\n') for x in f] | ||
return lines | ||
|
||
def write_diffs_check3(fileout,diffs): | ||
outrecs = [] | ||
for idiff,diff in enumerate(diffs): | ||
outarr = [] | ||
rec1,rec2,irec = diff | ||
outarr.append('; diff %s at line %s' %(idiff+1,irec+1)) | ||
outarr.append('; cdsl') | ||
outarr.append(rec1.line) | ||
outarr.append('; anna') | ||
outarr.append(rec2.line) | ||
outarr.append('; ------------------------------------------------------------') | ||
outrecs.append(outarr) | ||
with codecs.open(fileout,"w","utf-8") as f: | ||
for outarr in outrecs: | ||
for out in outarr: | ||
f.write(out+'\n') | ||
print(len(diffs),"difference records written to",fileout) | ||
|
||
def write_compare(fileout,recs1,recs2): | ||
outrecs = [] | ||
for irec,rec1 in enumerate(recs1): | ||
rec2 = recs2[irec] | ||
assert rec1.parts[0:-1] == rec2.parts[0:-1] | ||
a1 = [rec1.countstr,rec1.lsstr,rec1.ls] | ||
outarr = [] | ||
assert ':' not in rec1.ls | ||
assert ':' not in rec2.ls | ||
|
||
# x is a 'status' field | ||
if rec1.tooltip == rec2.tooltip: | ||
x = '==' | ||
elif rec2.tooltip.startswith(';;'): | ||
x = ';;' | ||
if rec1.tooltip.startswith('?'): | ||
x = x + '?' | ||
elif rec1.tooltip.startswith('?'): | ||
#assert rec2.tooltip == rec2.tooltip | ||
x = '_?' | ||
else: | ||
x = '' | ||
a1.append(x) | ||
a = ':'.join(a1) | ||
# a = '%s:%s:%s' % (a,x) | ||
outarr.append(a) | ||
outarr.append('anna: %s' % rec2.tooltip) | ||
if rec1.tooltip != rec2.tooltip: | ||
outarr.append(';') | ||
outarr.append('cdsl: %s' % rec1.tooltip) | ||
#else: | ||
# outarr.append('cdsl: %s' % 'SAME') | ||
outarr.append('; ------------------------------------------------------------') | ||
outrecs.append(outarr) | ||
with codecs.open(fileout,"w","utf-8") as f: | ||
for outarr in outrecs: | ||
for out in outarr: | ||
f.write(out+'\n') | ||
print(len(recs1),"records written to",fileout) | ||
|
||
class Tagcount: | ||
def __init__(self,line): | ||
self.line = line | ||
parts = line.split('\t') # tab-separated values | ||
self.parts0 = parts | ||
self.status = len(parts) == 4 | ||
self.parts = [p.strip() for p in parts] | ||
self.countstr, self.lsstr,self.ls,self.tooltip = self.parts | ||
assert self.lsstr in ('ls','lsfm','lsfm?') | ||
|
||
def init_tagcount(filein): | ||
lines = read_lines(filein) | ||
recs = [Tagcount(line) for line in lines] | ||
return recs | ||
|
||
def check1(recs): | ||
recs1 = [rec for rec in recs if rec.status == False] | ||
print(len(recs1),"records with wrong number of fields") | ||
for irec,rec in enumerate(recs): | ||
if rec.status == False: | ||
print('line %s has %s parts' %(irec+1,len(rec.parts))) | ||
for ipart,part in enumerate(rec.parts): | ||
print('part[%s] = %s' %(ipart+1,rec.parts[ipart])) | ||
def check2(recs1,filein1,recs2,filein2): | ||
print('%s has %s records' % (filein1,len(recs1))) | ||
check1(recs1) | ||
print('%s has %s records' % (filein2,len(recs2))) | ||
check1(recs2) | ||
|
||
def check3(recs1,recs2): | ||
n = 0 | ||
diffs = [] | ||
for irec,rec1 in enumerate(recs1): | ||
rec2 = recs2[irec] | ||
if ((rec1.countstr == rec2.countstr) and | ||
(rec1.lsstr == rec2.lsstr) and | ||
(rec1.ls == rec2.ls)): | ||
pass | ||
else: | ||
n = n + 1 | ||
diff = (rec1,rec2,irec) | ||
diffs.append(diff) | ||
if False: | ||
print('check3 difference at line %s' % (irec+1,)) | ||
print(rec1.line) | ||
print(rec2.line) | ||
print('check3 finds %s problems' %n) | ||
return diffs | ||
|
||
if __name__=="__main__": | ||
filein = sys.argv[1] # tagcount_ls_0.txt (cdsl) | ||
filein1 = sys.argv[2] # tagcount_ls_anna_0.txt | ||
fileout = sys.argv[3] # both files written to facilitate comparison | ||
|
||
recs1 = init_tagcount(filein) | ||
recs2 = init_tagcount(filein1) | ||
check2(recs1,filein,recs2,filein1) | ||
assert len(recs1) == len(recs2) | ||
diffs_check3 = check3(recs1,recs2) | ||
if diffs_check3 != []: | ||
write_diffs_check3(fileout,diffs_check3) | ||
else: | ||
write_compare(fileout,recs1,recs2) | ||
|
Oops, something went wrong.