-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Improve markup of links to HARIVAMSA Calcutta edition.
Ref: #49
- Loading branch information
1 parent
45eb6de
commit 7a0cd64
Showing
11 changed files
with
18,152 additions
and
0 deletions.
There are no files selected for viewing
Large diffs are not rendered by default.
Oops, something went wrong.
Large diffs are not rendered by default.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,211 @@ | ||
#-*- coding:utf-8 -*- | ||
"""change_abnormal.py | ||
generate change transactions for lines in entries which | ||
are considered abnormal. The | ||
""" | ||
from __future__ import print_function | ||
import sys, re,codecs | ||
from parseheadline import parseheadline | ||
|
||
class Entry(object): | ||
Ldict = {} | ||
def __init__(self,lines,linenum1,linenum2): | ||
# linenum1,2 are int | ||
self.metaline = lines[0] | ||
self.lend = lines[-1] # the <LEND> line | ||
self.datalines = lines[1:-1] # the non-meta lines | ||
# parse the meta line into a dictionary | ||
#self.meta = Hwmeta(self.metaline) | ||
self.metad = parseheadline(self.metaline) | ||
self.linenum1 = linenum1 | ||
self.linenum2 = linenum2 | ||
#L = self.meta.L | ||
L = self.metad['L'] | ||
if L in self.Ldict: | ||
print("Entry init error: duplicate L",L,linenum1) | ||
exit(1) | ||
self.Ldict[L] = self | ||
self.lsarr = [] | ||
|
||
def init_entries(filein): | ||
# slurp lines | ||
with codecs.open(filein,encoding='utf-8',mode='r') as f: | ||
lines = [line.rstrip('\r\n') for line in f] | ||
recs=[] # list of Entry objects | ||
inentry = False | ||
idx1 = None | ||
idx2 = None | ||
for idx,line in enumerate(lines): | ||
if inentry: | ||
if line.startswith('<LEND>'): | ||
idx2 = idx | ||
entrylines = lines[idx1:idx2+1] | ||
linenum1 = idx1 + 1 | ||
linenum2 = idx2 + 1 | ||
entry = Entry(entrylines,linenum1,linenum2) | ||
recs.append(entry) | ||
# prepare for next entry | ||
idx1 = None | ||
idx2 = None | ||
inentry = False | ||
elif line.startswith('<L>'): # error | ||
print('init_entries Error 1. Not expecting <L>') | ||
print("line # ",idx+1) | ||
print(line.encode('utf-8')) | ||
exit(1) | ||
else: | ||
# keep looking for <LEND> | ||
continue | ||
else: | ||
# inentry = False. Looking for '<L>' | ||
if line.startswith('<L>'): | ||
idx1 = idx | ||
inentry = True | ||
elif line.startswith('<LEND>'): # error | ||
print('init_entries Error 2. Not expecting <LEND>') | ||
print("line # ",idx+1) | ||
print(line.encode('utf-8')) | ||
exit(1) | ||
else: | ||
# keep looking for <L> | ||
continue | ||
# when all lines are read, we should have inentry = False | ||
if inentry: | ||
print('init_entries Error 3. Last entry not closed') | ||
print('Open entry starts at line',idx1+1) | ||
exit(1) | ||
|
||
print(len(lines),"lines read from",filein) | ||
print(len(recs),"entries found") | ||
return recs | ||
|
||
|
||
class LSchange(object): | ||
def __init__(self,entry,iline,ls): | ||
self.entry = entry | ||
self.iline = iline | ||
self.ls = ls | ||
|
||
def find_abnormals_hariv(lspfx,entries): | ||
replacements = (('.','[.]'), ('(','\('), (')','\)')) | ||
tmp = lspfx | ||
for old,new in replacements: | ||
tmp = tmp.replace(old,new) | ||
#regexnorm = re.compile(r'^<ls>%s ([0-9]+), ( fg+[.])?</ls>$'%tmp) | ||
""" | ||
regexdata = [ | ||
[r'<ls>%s.*?</ls>'%tmp, | ||
[ (r'<ls>%s ([0-9]+), ([0-9]+[.]?)</ls>'%tmp , '1a'), | ||
(r'<ls>%s ([0-9]+), ([0-9]+[.]?) fgg?[.]</ls>' % tmp, '1b'), | ||
(r'<ls>%s ([0-9]+), ([0-9]+[.,]?) v[.] l[.]</ls>' % tmp, '1c'), | ||
(r'<ls>%s</ls>'%tmp , '1d'), | ||
(r'<ls>%s[^<]*ed[.] Bomb[.].*?</ls>'%tmp , '1e'), | ||
] | ||
], | ||
[r'<ls n="%s[^"]*">.*?</ls>'%tmp, | ||
[(r'<ls n="%s">([0-9]+), ([0-9]+[.]?)</ls>'%tmp , '2a'), | ||
(r'<ls n="%s">([0-9]+), ([0-9]+[.]?) fgg?[.]</ls>' % tmp, '2b'), | ||
(r'<ls n="%s">([0-9]+), ([0-9]+[.,]?) v[.] l[.]</ls>' % tmp, '2c'), | ||
(r'<ls n="%s ([0-9]+),">([0-9]+[.]?)</ls>'%tmp , '2d'), | ||
(r'<ls n="%s ([0-9]+),">([0-9]+[.]?) fgg?[.]</ls>' % tmp, '2e'), | ||
(r'<ls n="%s ([0-9]+),">([0-9]+[.,]?) v[.] l[.]</ls>' % tmp, '2f'), | ||
] | ||
] | ||
] | ||
""" | ||
regexdata = [ | ||
[r'<ls>%s.*?</ls>'%tmp, | ||
[ (r'<ls>%s ([0-9]+[.]?)</ls>'%tmp , '1a'), | ||
(r'<ls>%s ([0-9]+[.]?) fgg?[.]</ls>' % tmp, '1b'), | ||
(r'<ls>%s ([0-9]+[.,]?) v[.] l[.]</ls>' % tmp, '1c'), | ||
(r'<ls>%s</ls>'%tmp , '1d'), | ||
(r'<ls>%s[^<]*ed[.] Bomb[.].*?</ls>'%tmp , '1e'), | ||
] | ||
], | ||
[r'<ls n="%s[^"]*">.*?</ls>'%tmp, | ||
[(r'<ls n="%s">([0-9]+[.]?)</ls>'%tmp , '2a'), | ||
(r'<ls n="%s">([0-9]+[.]?) fgg?[.]</ls>' % tmp, '2b'), | ||
(r'<ls n="%s">([0-9]+[.,]?) v[.] l[.]</ls>' % tmp, '2c'), | ||
#(r'<ls n="%s ([0-9]+),">([0-9]+[.]?)</ls>'%tmp , '2d'), | ||
#(r'<ls n="%s ([0-9]+),">([0-9]+[.]?) fgg?[.]</ls>' % tmp, '2e'), | ||
#(r'<ls n="%s ([0-9]+),">([0-9]+[.,]?) v[.] l[.]</ls>' % tmp, '2f'), | ||
|
||
] | ||
] | ||
] | ||
abnormals = [] | ||
normals = [] | ||
for entry in entries: | ||
#text = '\n'.join(entry.datalines) | ||
for iline,line in enumerate(entry.datalines): | ||
abnormal = False | ||
for regex1,regexnorms in regexdata: | ||
lsarr = re.findall(regex1,line) | ||
#abnormal = | ||
for ls in lsarr: | ||
normal = False | ||
for regex1a,regextype in regexnorms: | ||
if re.search(regex1a,ls): | ||
#normal = LSinstance(entry,ls,regextype) | ||
#normals.append(normal) | ||
normal = True | ||
break | ||
if normal == False: | ||
abnormal = True | ||
break # for ls in lsarr | ||
if abnormal: | ||
break # for regex1a | ||
# | ||
if abnormal: | ||
# generate change transaction for this line | ||
instance = LSchange(entry,iline,ls) | ||
abnormals.append(instance) | ||
print(len(abnormals),'abnormal lines found') | ||
return abnormals | ||
|
||
def normals_summary(normals): | ||
d = {} | ||
for lsinstance in normals: | ||
t = lsinstance.type | ||
if t not in d: | ||
d[t] = 0 | ||
d[t] = d[t] + 1 | ||
types = sorted(d.keys()) | ||
tot = 0 | ||
for t in types: | ||
print(d[t],"ls instances of type",t) | ||
tot = tot + d[t] | ||
print('totals=',tot) | ||
|
||
def write_abnormals(fileout,abnormals): | ||
with codecs.open(fileout,"w","utf-8") as f: | ||
for x in abnormals: | ||
entry = x.entry | ||
iline = x.iline | ||
ls = x.ls # the abnormal ls | ||
lnum = entry.linenum1+iline+1 | ||
metaline = re.sub(r'<k2>.*$','',entry.metaline) | ||
line = entry.datalines[iline] | ||
outarr = [] | ||
outarr.append('; --------------------------------') | ||
outarr.append('; %s' % metaline) | ||
outarr.append('; Abnormal ls: %s' %ls) | ||
outarr.append('%s old %s' %(lnum,line)) | ||
outarr.append(';') | ||
outarr.append('%s new %s' %(lnum,line)) | ||
for out in outarr: | ||
f.write(out+'\n') | ||
print(len(abnormals),'change transactions',fileout) | ||
|
||
if __name__=="__main__": | ||
lspfx = sys.argv[1] | ||
filein = sys.argv[2] # xxx.txt (path to digitization of xxx) | ||
#filebib = sys.argv[2] # pwbib_input.txt | ||
fileout = sys.argv[3] # | ||
entries = init_entries(filein) | ||
if lspfx == 'HARIV.': | ||
abnormals = find_abnormals_hariv(lspfx,entries) | ||
write_abnormals(fileout,abnormals) | ||
else: | ||
print('Not implemented for lspfx = %s' %lspfx) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,156 @@ | ||
; -------------------------------- | ||
; <L>371<pc>1-0030<k1>agnija | ||
; Abnormal ls: <ls>HARIV. S. 927, Z. 5</ls> | ||
3232 old <ls>HARIV. S. 927, Z. 5</ls>, v. u. | ||
; | ||
3232 new <ls>HARIV. S. 927, Z. 5</ls>, v. u. | ||
; -------------------------------- | ||
; <L>521<pc>1-0039<k1>agra | ||
; Abnormal ls: <ls>HARIV. S. 927</ls> | ||
4342 old <ls>HARIV. S. 927</ls>, ult. erscheint {#agra#} im Voc. als ein Beiname <is>Viṣṇu's</is>. | ||
; | ||
4342 new <ls>HARIV. S. 927</ls>, ult. erscheint {#agra#} im Voc. als ein Beiname <is>Viṣṇu's</is>. | ||
; -------------------------------- | ||
; <L>526<pc>1-0040<k1>agraja | ||
; Abnormal ls: <ls>HARIV. S. 927</ls> | ||
4386 old <ls>HARIV. S. 927</ls>, ult. | ||
; | ||
4386 new <ls>HARIV. S. 927</ls>, ult. | ||
; -------------------------------- | ||
; <L>873<pc>1-0066<k1>aja | ||
; Abnormal ls: <ls>HARIV. S. 927, Z. 4</ls> | ||
7646 old <ls>HARIV. S. 927, Z. 4</ls>, v. u. | ||
; | ||
7646 new <ls>HARIV. S. 927, Z. 4</ls>, v. u. | ||
; -------------------------------- | ||
; <L>924<pc>1-0069<k1>ajaya | ||
; Abnormal ls: <ls>HARIV. S. 927, Z. 4</ls> | ||
8021 old <ls>HARIV. S. 927, Z. 4</ls>, v. u. | ||
; | ||
8021 new <ls>HARIV. S. 927, Z. 4</ls>, v. u. | ||
; -------------------------------- | ||
; <L>1016<pc>1-0074<k1>ajEkapAd | ||
; Abnormal ls: <ls>HARIV. S. 928, Z. 1.</ls> | ||
8755 old <ls>HARIV. S. 928, Z. 1.</ls> | ||
; | ||
8755 new <ls>HARIV. S. 928, Z. 1.</ls> | ||
; -------------------------------- | ||
; <L>5286<pc>1-0384<k1>ambarIza | ||
; Abnormal ls: <ls n="HARIV.">83, N. 6.</ls> | ||
44108 old <ls n="HARIV.">83, N. 6.</ls> <is>Māṃdhātar's</is> | ||
; | ||
44108 new <ls n="HARIV.">83, N. 6.</ls> <is>Māṃdhātar's</is> | ||
; -------------------------------- | ||
; <L>11578<pc>1-0937<k1>unnetar | ||
; Abnormal ls: <ls>HARIV. LANGL. II, 297</ls> | ||
106622 old <ls>13.</ls> <ls>HARIV. LANGL. II, 297</ls>; der gedr. Text <ls>HARIV. 11363</ls> : {#sunetar#} . | ||
; | ||
106622 new <ls>13.</ls> <ls>HARIV. LANGL. II, 297</ls>; der gedr. Text <ls>HARIV. 11363</ls> : {#sunetar#} . | ||
; -------------------------------- | ||
; <L>14182<pc>2-0009<k1>kakudmin | ||
; Abnormal ls: <ls>HARIV. S. 927, Z. 4.</ls> | ||
130173 old <ls>HARIV. S. 927, Z. 4.</ls> v. u. | ||
; | ||
130173 new <ls>HARIV. S. 927, Z. 4.</ls> v. u. | ||
; -------------------------------- | ||
; <L>14857<pc>2-0063<k1>kapAlin | ||
; Abnormal ls: <ls>HARIV. LANGL. I, 513.</ls> | ||
136842 old <ls>VYĀḌI</ls> zu <ls>210.</ls> <ls>HARIV. LANGL. I, 513.</ls> | ||
; | ||
136842 new <ls>VYĀḌI</ls> zu <ls>210.</ls> <ls>HARIV. LANGL. I, 513.</ls> | ||
; -------------------------------- | ||
; <L>14891<pc>2-0065<k1>kapila | ||
; Abnormal ls: <ls n="HARIV.">S. 927, Z. 5 v. u.</ls> | ||
137119 old <ls>HARIV. 788.</ls> <ls n="HARIV.">2219.</ls> <ls n="HARIV.">7595.</ls> <ls n="HARIV.">11495.</ls> <ls n="HARIV.">12439.</ls> <ls n="HARIV.">S. 927, Z. 5 v. u.</ls> <ls>R. 1, 41, 25.</ls> <ls>RAGH. 3, 50.</ls> <ls>BHĀG. P. 1, 3, 10. 3, 24, 19.</ls> <ls>VP. 378. fg.</ls> ein Sohn <is>Vitatha's</is> | ||
; | ||
137119 new <ls>HARIV. 788.</ls> <ls n="HARIV.">2219.</ls> <ls n="HARIV.">7595.</ls> <ls n="HARIV.">11495.</ls> <ls n="HARIV.">12439.</ls> <ls n="HARIV.">S. 927, Z. 5 v. u.</ls> <ls>R. 1, 41, 25.</ls> <ls>RAGH. 3, 50.</ls> <ls>BHĀG. P. 1, 3, 10. 3, 24, 19.</ls> <ls>VP. 378. fg.</ls> ein Sohn <is>Vitatha's</is> | ||
; -------------------------------- | ||
; <L>19351<pc>2-0448<k1>kola | ||
; Abnormal ls: <ls>HARIV. LANGL. 1, 68</ls> | ||
181940 old <ls>HARIV. LANGL. 1, 68</ls> und <ls>ŚKDR.</ls> nach <ls>HARIV.</ls>; die gedr. Ausg.: {#kolisarpAH#} st. {#kolAH sarpAH#}, wie | ||
; | ||
181940 new <ls>HARIV. LANGL. 1, 68</ls> und <ls>ŚKDR.</ls> nach <ls>HARIV.</ls>; die gedr. Ausg.: {#kolisarpAH#} st. {#kolAH sarpAH#}, wie | ||
; -------------------------------- | ||
; <L>28074<pc>3-0172<k1>jvAlAjihva | ||
; Abnormal ls: <ls>HARIV. LANGL. I, 513</ls> | ||
276910 old <ls>VYĀḌI</ls> zu <ls>H. 210</ls> (vgl. <ls>HARIV. LANGL. I, 513</ls>). N. pr. eines <is>Dānava</is> | ||
; | ||
276910 new <ls>VYĀḌI</ls> zu <ls>H. 210</ls> (vgl. <ls>HARIV. LANGL. I, 513</ls>). N. pr. eines <is>Dānava</is> | ||
; -------------------------------- | ||
; <L>30856<pc>3-0424<k1>trikakud | ||
; Abnormal ls: <ls>HARIV. S. 927, Z. 4</ls> | ||
305856 old <ls n="MBH. 12,">13252.</ls> <ls n="MBH.">13, 6956.</ls> <ls>HARIV. S. 927, Z. 4</ls> v. u. | ||
; | ||
305856 new <ls n="MBH. 12,">13252.</ls> <ls n="MBH.">13, 6956.</ls> <ls>HARIV. S. 927, Z. 4</ls> v. u. | ||
; -------------------------------- | ||
; <L>43424<pc>4-0572<k1>parRaka | ||
; Abnormal ls: <ls>HARIV. LANGL. II, 376</ls> | ||
432891 old <ls>HARIV. 14165</ls>; vgl. <ls>HARIV. LANGL. II, 376</ls>, wo die Calc. Ausg. {#parRinI#} hat. | ||
; | ||
432891 new <ls>HARIV. 14165</ls>; vgl. <ls>HARIV. LANGL. II, 376</ls>, wo die Calc. Ausg. {#parRinI#} hat. | ||
; -------------------------------- | ||
; <L>45424<pc>4-0731<k1>piSitASin | ||
; Abnormal ls: <ls>HARIV. LANGL. I, 513.</ls> | ||
451966 old <ls>VYĀḌI</ls> zu <ls>H. 210</ls>; vgl. <ls>HARIV. LANGL. I, 513.</ls> | ||
; | ||
451966 new <ls>VYĀḌI</ls> zu <ls>H. 210</ls>; vgl. <ls>HARIV. LANGL. I, 513.</ls> | ||
; -------------------------------- | ||
; <L>46726<pc>4-0839<k1>pUrRAyus | ||
; Abnormal ls: <ls>HARIV. LANGL. II, 481</ls> | ||
464771 old <ls>MBH. 1, 2554.</ls> <ls>HARIV. LANGL. II, 481</ls> (die Calc. Ausg. <ls n="HARIV.">14156</ls> | ||
; | ||
464771 new <ls>MBH. 1, 2554.</ls> <ls>HARIV. LANGL. II, 481</ls> (die Calc. Ausg. <ls n="HARIV.">14156</ls> | ||
; -------------------------------- | ||
; <L>49502<pc>4-1047<k1>pramardana | ||
; Abnormal ls: <ls>HARIV. LANGL. I, 513.</ls> | ||
489797 old <ls>VYĀḌI</ls> zu <ls>H. 210</ls>; vgl. <ls>HARIV. LANGL. I, 513.</ls> eines <is>Vidyādhara</is> | ||
; | ||
489797 new <ls>VYĀḌI</ls> zu <ls>H. 210</ls>; vgl. <ls>HARIV. LANGL. I, 513.</ls> eines <is>Vidyādhara</is> | ||
; -------------------------------- | ||
; <L>54819<pc>5-0277<k1>BAsvant | ||
; Abnormal ls: <ls n="HARIV."> 5185.</ls> | ||
541191 old <ls n="HARIV.">1331</ls> <ls n="HARIV."> 5185.</ls> <ls n="HARIV.">10995.</ls> <ls>R. 1, 44, 30. 2, 83, 6.</ls> <ls>R. GORR. 2, 100, 16.</ls> {#BUzaRAni#} | ||
; | ||
541191 new <ls n="HARIV.">1331</ls> <ls n="HARIV."> 5185.</ls> <ls n="HARIV.">10995.</ls> <ls>R. 1, 44, 30. 2, 83, 6.</ls> <ls>R. GORR. 2, 100, 16.</ls> {#BUzaRAni#} | ||
; -------------------------------- | ||
; <L>54948<pc>5-0294<k1>BImaka | ||
; Abnormal ls: <ls>HARIV. LANGL. I, 513.</ls> | ||
543102 old <ls>HARIV. 9561.</ls> <ls>HARIV. LANGL. I, 513.</ls> {#BIzaka#} | ||
; | ||
543102 new <ls>HARIV. 9561.</ls> <ls>HARIV. LANGL. I, 513.</ls> {#BIzaka#} | ||
; -------------------------------- | ||
; <L>58325<pc>5-0619<k1>mahAkAla | ||
; Abnormal ls: <ls>HARIV. LANGL. I,512.</ls> | ||
579488 old <ls n="HARIV.">12502.</ls> <ls>HARIV. LANGL. I,512.</ls> <ls>KATHĀS. 50,147.</ls> <ls>PAÑCAR. 1,15,7.</ls> <ls>Verz. d. Oxf. H. 45,a,7.</ls> {#°gaRotpatti#} | ||
; | ||
579488 new <ls n="HARIV.">12502.</ls> <ls>HARIV. LANGL. I,512.</ls> <ls>KATHĀS. 50,147.</ls> <ls>PAÑCAR. 1,15,7.</ls> <ls>Verz. d. Oxf. H. 45,a,7.</ls> {#°gaRotpatti#} | ||
; -------------------------------- | ||
; <L>100618<pc>7-0281<k1>SUra | ||
; Abnormal ls: <ls>HARIV. lith. Ausg. 33, 56.</ls> | ||
947904 old <ls>HARIV. lith. Ausg. 33, 56.</ls> {#SUrABIrAH#} ({#SUdrA°?#}) | ||
; | ||
947904 new <ls>HARIV. lith. Ausg. 33, 56.</ls> {#SUrABIrAH#} ({#SUdrA°?#}) | ||
; -------------------------------- | ||
; <L>100642<pc>7-0282<k1>SUravIra | ||
; Abnormal ls: <ls>HARIV. lith. Ausg. 33, 56.</ls> | ||
948079 old <ls>HARIV. lith. Ausg. 33, 56.</ls> | ||
; | ||
948079 new <ls>HARIV. lith. Ausg. 33, 56.</ls> | ||
; -------------------------------- | ||
; <L>108891<pc>7-0963<k1>sAvarRa | ||
; Abnormal ls: <ls>HARIV. lith Ausg. 7, 43.</ls> | ||
1030782 old <ls>HARIV. lith Ausg. 7, 43.</ls> | ||
; | ||
1030782 new <ls>HARIV. lith Ausg. 7, 43.</ls> | ||
; -------------------------------- | ||
; <L>111836<pc>7-1143<k1>susaMyukta | ||
; Abnormal ls: <ls>HARIV. lith. Ausg. 21, 37.</ls> | ||
1055950 old <ls>HARIV. lith. Ausg. 21, 37.</ls> so v. a. {%in richtigem Zahlenverhältniss zu einander stehend%} | ||
; | ||
1055950 new <ls>HARIV. lith. Ausg. 21, 37.</ls> so v. a. {%in richtigem Zahlenverhältniss zu einander stehend%} | ||
; -------------------------------- | ||
; <L>116085<pc>7-1519<k1>har | ||
; Abnormal ls: <ls>HARIV. 2, 3.</ls> | ||
1102535 old <ls>NĪLAK.</ls> zu <ls>MBH. 3, 10247</ls> und zu <ls>HARIV. 2, 3.</ls> | ||
; | ||
1102535 new <ls>NĪLAK.</ls> zu <ls>MBH. 3, 10247</ls> und zu <ls>HARIV. 2, 3.</ls> |
Oops, something went wrong.