Skip to content

Commit

Permalink
Improve markup of links to HARIVAMSA Calcutta edition.
Browse files Browse the repository at this point in the history
Ref: #49
  • Loading branch information
funderburkjim committed Feb 8, 2022
1 parent 45eb6de commit 7a0cd64
Show file tree
Hide file tree
Showing 11 changed files with 18,152 additions and 0 deletions.
9,674 changes: 9,674 additions & 0 deletions pwg_ls2/hariv/change_01.txt

Large diffs are not rendered by default.

6,228 changes: 6,228 additions & 0 deletions pwg_ls2/hariv/change_02.txt

Large diffs are not rendered by default.

211 changes: 211 additions & 0 deletions pwg_ls2/hariv/change_abnormal.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,211 @@
#-*- coding:utf-8 -*-
"""change_abnormal.py
generate change transactions for lines in entries which
are considered abnormal. The
"""
from __future__ import print_function
import sys, re,codecs
from parseheadline import parseheadline

class Entry(object):
Ldict = {}
def __init__(self,lines,linenum1,linenum2):
# linenum1,2 are int
self.metaline = lines[0]
self.lend = lines[-1] # the <LEND> line
self.datalines = lines[1:-1] # the non-meta lines
# parse the meta line into a dictionary
#self.meta = Hwmeta(self.metaline)
self.metad = parseheadline(self.metaline)
self.linenum1 = linenum1
self.linenum2 = linenum2
#L = self.meta.L
L = self.metad['L']
if L in self.Ldict:
print("Entry init error: duplicate L",L,linenum1)
exit(1)
self.Ldict[L] = self
self.lsarr = []

def init_entries(filein):
# slurp lines
with codecs.open(filein,encoding='utf-8',mode='r') as f:
lines = [line.rstrip('\r\n') for line in f]
recs=[] # list of Entry objects
inentry = False
idx1 = None
idx2 = None
for idx,line in enumerate(lines):
if inentry:
if line.startswith('<LEND>'):
idx2 = idx
entrylines = lines[idx1:idx2+1]
linenum1 = idx1 + 1
linenum2 = idx2 + 1
entry = Entry(entrylines,linenum1,linenum2)
recs.append(entry)
# prepare for next entry
idx1 = None
idx2 = None
inentry = False
elif line.startswith('<L>'): # error
print('init_entries Error 1. Not expecting <L>')
print("line # ",idx+1)
print(line.encode('utf-8'))
exit(1)
else:
# keep looking for <LEND>
continue
else:
# inentry = False. Looking for '<L>'
if line.startswith('<L>'):
idx1 = idx
inentry = True
elif line.startswith('<LEND>'): # error
print('init_entries Error 2. Not expecting <LEND>')
print("line # ",idx+1)
print(line.encode('utf-8'))
exit(1)
else:
# keep looking for <L>
continue
# when all lines are read, we should have inentry = False
if inentry:
print('init_entries Error 3. Last entry not closed')
print('Open entry starts at line',idx1+1)
exit(1)

print(len(lines),"lines read from",filein)
print(len(recs),"entries found")
return recs


class LSchange(object):
def __init__(self,entry,iline,ls):
self.entry = entry
self.iline = iline
self.ls = ls

def find_abnormals_hariv(lspfx,entries):
replacements = (('.','[.]'), ('(','\('), (')','\)'))
tmp = lspfx
for old,new in replacements:
tmp = tmp.replace(old,new)
#regexnorm = re.compile(r'^<ls>%s ([0-9]+), ( fg+[.])?</ls>$'%tmp)
"""
regexdata = [
[r'<ls>%s.*?</ls>'%tmp,
[ (r'<ls>%s ([0-9]+), ([0-9]+[.]?)</ls>'%tmp , '1a'),
(r'<ls>%s ([0-9]+), ([0-9]+[.]?) fgg?[.]</ls>' % tmp, '1b'),
(r'<ls>%s ([0-9]+), ([0-9]+[.,]?) v[.] l[.]</ls>' % tmp, '1c'),
(r'<ls>%s</ls>'%tmp , '1d'),
(r'<ls>%s[^<]*ed[.] Bomb[.].*?</ls>'%tmp , '1e'),
]
],
[r'<ls n="%s[^"]*">.*?</ls>'%tmp,
[(r'<ls n="%s">([0-9]+), ([0-9]+[.]?)</ls>'%tmp , '2a'),
(r'<ls n="%s">([0-9]+), ([0-9]+[.]?) fgg?[.]</ls>' % tmp, '2b'),
(r'<ls n="%s">([0-9]+), ([0-9]+[.,]?) v[.] l[.]</ls>' % tmp, '2c'),
(r'<ls n="%s ([0-9]+),">([0-9]+[.]?)</ls>'%tmp , '2d'),
(r'<ls n="%s ([0-9]+),">([0-9]+[.]?) fgg?[.]</ls>' % tmp, '2e'),
(r'<ls n="%s ([0-9]+),">([0-9]+[.,]?) v[.] l[.]</ls>' % tmp, '2f'),
]
]
]
"""
regexdata = [
[r'<ls>%s.*?</ls>'%tmp,
[ (r'<ls>%s ([0-9]+[.]?)</ls>'%tmp , '1a'),
(r'<ls>%s ([0-9]+[.]?) fgg?[.]</ls>' % tmp, '1b'),
(r'<ls>%s ([0-9]+[.,]?) v[.] l[.]</ls>' % tmp, '1c'),
(r'<ls>%s</ls>'%tmp , '1d'),
(r'<ls>%s[^<]*ed[.] Bomb[.].*?</ls>'%tmp , '1e'),
]
],
[r'<ls n="%s[^"]*">.*?</ls>'%tmp,
[(r'<ls n="%s">([0-9]+[.]?)</ls>'%tmp , '2a'),
(r'<ls n="%s">([0-9]+[.]?) fgg?[.]</ls>' % tmp, '2b'),
(r'<ls n="%s">([0-9]+[.,]?) v[.] l[.]</ls>' % tmp, '2c'),
#(r'<ls n="%s ([0-9]+),">([0-9]+[.]?)</ls>'%tmp , '2d'),
#(r'<ls n="%s ([0-9]+),">([0-9]+[.]?) fgg?[.]</ls>' % tmp, '2e'),
#(r'<ls n="%s ([0-9]+),">([0-9]+[.,]?) v[.] l[.]</ls>' % tmp, '2f'),

]
]
]
abnormals = []
normals = []
for entry in entries:
#text = '\n'.join(entry.datalines)
for iline,line in enumerate(entry.datalines):
abnormal = False
for regex1,regexnorms in regexdata:
lsarr = re.findall(regex1,line)
#abnormal =
for ls in lsarr:
normal = False
for regex1a,regextype in regexnorms:
if re.search(regex1a,ls):
#normal = LSinstance(entry,ls,regextype)
#normals.append(normal)
normal = True
break
if normal == False:
abnormal = True
break # for ls in lsarr
if abnormal:
break # for regex1a
#
if abnormal:
# generate change transaction for this line
instance = LSchange(entry,iline,ls)
abnormals.append(instance)
print(len(abnormals),'abnormal lines found')
return abnormals

def normals_summary(normals):
d = {}
for lsinstance in normals:
t = lsinstance.type
if t not in d:
d[t] = 0
d[t] = d[t] + 1
types = sorted(d.keys())
tot = 0
for t in types:
print(d[t],"ls instances of type",t)
tot = tot + d[t]
print('totals=',tot)

def write_abnormals(fileout,abnormals):
with codecs.open(fileout,"w","utf-8") as f:
for x in abnormals:
entry = x.entry
iline = x.iline
ls = x.ls # the abnormal ls
lnum = entry.linenum1+iline+1
metaline = re.sub(r'<k2>.*$','',entry.metaline)
line = entry.datalines[iline]
outarr = []
outarr.append('; --------------------------------')
outarr.append('; %s' % metaline)
outarr.append('; Abnormal ls: %s' %ls)
outarr.append('%s old %s' %(lnum,line))
outarr.append(';')
outarr.append('%s new %s' %(lnum,line))
for out in outarr:
f.write(out+'\n')
print(len(abnormals),'change transactions',fileout)

if __name__=="__main__":
lspfx = sys.argv[1]
filein = sys.argv[2] # xxx.txt (path to digitization of xxx)
#filebib = sys.argv[2] # pwbib_input.txt
fileout = sys.argv[3] #
entries = init_entries(filein)
if lspfx == 'HARIV.':
abnormals = find_abnormals_hariv(lspfx,entries)
write_abnormals(fileout,abnormals)
else:
print('Not implemented for lspfx = %s' %lspfx)
156 changes: 156 additions & 0 deletions pwg_ls2/hariv/change_abnormal.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,156 @@
; --------------------------------
; <L>371<pc>1-0030<k1>agnija
; Abnormal ls: <ls>HARIV. S. 927, Z. 5</ls>
3232 old <ls>HARIV. S. 927, Z. 5</ls>, v. u.
;
3232 new <ls>HARIV. S. 927, Z. 5</ls>, v. u.
; --------------------------------
; <L>521<pc>1-0039<k1>agra
; Abnormal ls: <ls>HARIV. S. 927</ls>
4342 old <ls>HARIV. S. 927</ls>, ult. erscheint {#agra#} im Voc. als ein Beiname <is>Viṣṇu's</is>.
;
4342 new <ls>HARIV. S. 927</ls>, ult. erscheint {#agra#} im Voc. als ein Beiname <is>Viṣṇu's</is>.
; --------------------------------
; <L>526<pc>1-0040<k1>agraja
; Abnormal ls: <ls>HARIV. S. 927</ls>
4386 old <ls>HARIV. S. 927</ls>, ult.
;
4386 new <ls>HARIV. S. 927</ls>, ult.
; --------------------------------
; <L>873<pc>1-0066<k1>aja
; Abnormal ls: <ls>HARIV. S. 927, Z. 4</ls>
7646 old <ls>HARIV. S. 927, Z. 4</ls>, v. u.
;
7646 new <ls>HARIV. S. 927, Z. 4</ls>, v. u.
; --------------------------------
; <L>924<pc>1-0069<k1>ajaya
; Abnormal ls: <ls>HARIV. S. 927, Z. 4</ls>
8021 old <ls>HARIV. S. 927, Z. 4</ls>, v. u.
;
8021 new <ls>HARIV. S. 927, Z. 4</ls>, v. u.
; --------------------------------
; <L>1016<pc>1-0074<k1>ajEkapAd
; Abnormal ls: <ls>HARIV. S. 928, Z. 1.</ls>
8755 old <ls>HARIV. S. 928, Z. 1.</ls>
;
8755 new <ls>HARIV. S. 928, Z. 1.</ls>
; --------------------------------
; <L>5286<pc>1-0384<k1>ambarIza
; Abnormal ls: <ls n="HARIV.">83, N. 6.</ls>
44108 old <ls n="HARIV.">83, N. 6.</ls> <is>Māṃdhātar's</is>
;
44108 new <ls n="HARIV.">83, N. 6.</ls> <is>Māṃdhātar's</is>
; --------------------------------
; <L>11578<pc>1-0937<k1>unnetar
; Abnormal ls: <ls>HARIV. LANGL. II, 297</ls>
106622 old <ls>13.</ls> <ls>HARIV. LANGL. II, 297</ls>; der gedr. Text <ls>HARIV. 11363</ls> : {#sunetar#} .
;
106622 new <ls>13.</ls> <ls>HARIV. LANGL. II, 297</ls>; der gedr. Text <ls>HARIV. 11363</ls> : {#sunetar#} .
; --------------------------------
; <L>14182<pc>2-0009<k1>kakudmin
; Abnormal ls: <ls>HARIV. S. 927, Z. 4.</ls>
130173 old <ls>HARIV. S. 927, Z. 4.</ls> v. u.
;
130173 new <ls>HARIV. S. 927, Z. 4.</ls> v. u.
; --------------------------------
; <L>14857<pc>2-0063<k1>kapAlin
; Abnormal ls: <ls>HARIV. LANGL. I, 513.</ls>
136842 old <ls>VYĀḌI</ls> zu <ls>210.</ls> <ls>HARIV. LANGL. I, 513.</ls>
;
136842 new <ls>VYĀḌI</ls> zu <ls>210.</ls> <ls>HARIV. LANGL. I, 513.</ls>
; --------------------------------
; <L>14891<pc>2-0065<k1>kapila
; Abnormal ls: <ls n="HARIV.">S. 927, Z. 5 v. u.</ls>
137119 old <ls>HARIV. 788.</ls> <ls n="HARIV.">2219.</ls> <ls n="HARIV.">7595.</ls> <ls n="HARIV.">11495.</ls> <ls n="HARIV.">12439.</ls> <ls n="HARIV.">S. 927, Z. 5 v. u.</ls> <ls>R. 1, 41, 25.</ls> <ls>RAGH. 3, 50.</ls> <ls>BHĀG. P. 1, 3, 10. 3, 24, 19.</ls> <ls>VP. 378. fg.</ls> ein Sohn <is>Vitatha's</is>
;
137119 new <ls>HARIV. 788.</ls> <ls n="HARIV.">2219.</ls> <ls n="HARIV.">7595.</ls> <ls n="HARIV.">11495.</ls> <ls n="HARIV.">12439.</ls> <ls n="HARIV.">S. 927, Z. 5 v. u.</ls> <ls>R. 1, 41, 25.</ls> <ls>RAGH. 3, 50.</ls> <ls>BHĀG. P. 1, 3, 10. 3, 24, 19.</ls> <ls>VP. 378. fg.</ls> ein Sohn <is>Vitatha's</is>
; --------------------------------
; <L>19351<pc>2-0448<k1>kola
; Abnormal ls: <ls>HARIV. LANGL. 1, 68</ls>
181940 old <ls>HARIV. LANGL. 1, 68</ls> und <ls>ŚKDR.</ls> nach <ls>HARIV.</ls>; die gedr. Ausg.: {#kolisarpAH#} st. {#kolAH sarpAH#}, wie
;
181940 new <ls>HARIV. LANGL. 1, 68</ls> und <ls>ŚKDR.</ls> nach <ls>HARIV.</ls>; die gedr. Ausg.: {#kolisarpAH#} st. {#kolAH sarpAH#}, wie
; --------------------------------
; <L>28074<pc>3-0172<k1>jvAlAjihva
; Abnormal ls: <ls>HARIV. LANGL. I, 513</ls>
276910 old <ls>VYĀḌI</ls> zu <ls>H. 210</ls> (vgl. <ls>HARIV. LANGL. I, 513</ls>). N. pr. eines <is>Dānava</is>
;
276910 new <ls>VYĀḌI</ls> zu <ls>H. 210</ls> (vgl. <ls>HARIV. LANGL. I, 513</ls>). N. pr. eines <is>Dānava</is>
; --------------------------------
; <L>30856<pc>3-0424<k1>trikakud
; Abnormal ls: <ls>HARIV. S. 927, Z. 4</ls>
305856 old <ls n="MBH. 12,">13252.</ls> <ls n="MBH.">13, 6956.</ls> <ls>HARIV. S. 927, Z. 4</ls> v. u.
;
305856 new <ls n="MBH. 12,">13252.</ls> <ls n="MBH.">13, 6956.</ls> <ls>HARIV. S. 927, Z. 4</ls> v. u.
; --------------------------------
; <L>43424<pc>4-0572<k1>parRaka
; Abnormal ls: <ls>HARIV. LANGL. II, 376</ls>
432891 old <ls>HARIV. 14165</ls>; vgl. <ls>HARIV. LANGL. II, 376</ls>, wo die Calc. Ausg. {#parRinI#} hat.
;
432891 new <ls>HARIV. 14165</ls>; vgl. <ls>HARIV. LANGL. II, 376</ls>, wo die Calc. Ausg. {#parRinI#} hat.
; --------------------------------
; <L>45424<pc>4-0731<k1>piSitASin
; Abnormal ls: <ls>HARIV. LANGL. I, 513.</ls>
451966 old <ls>VYĀḌI</ls> zu <ls>H. 210</ls>; vgl. <ls>HARIV. LANGL. I, 513.</ls>
;
451966 new <ls>VYĀḌI</ls> zu <ls>H. 210</ls>; vgl. <ls>HARIV. LANGL. I, 513.</ls>
; --------------------------------
; <L>46726<pc>4-0839<k1>pUrRAyus
; Abnormal ls: <ls>HARIV. LANGL. II, 481</ls>
464771 old <ls>MBH. 1, 2554.</ls> <ls>HARIV. LANGL. II, 481</ls> (die Calc. Ausg. <ls n="HARIV.">14156</ls>
;
464771 new <ls>MBH. 1, 2554.</ls> <ls>HARIV. LANGL. II, 481</ls> (die Calc. Ausg. <ls n="HARIV.">14156</ls>
; --------------------------------
; <L>49502<pc>4-1047<k1>pramardana
; Abnormal ls: <ls>HARIV. LANGL. I, 513.</ls>
489797 old <ls>VYĀḌI</ls> zu <ls>H. 210</ls>; vgl. <ls>HARIV. LANGL. I, 513.</ls> eines <is>Vidyādhara</is>
;
489797 new <ls>VYĀḌI</ls> zu <ls>H. 210</ls>; vgl. <ls>HARIV. LANGL. I, 513.</ls> eines <is>Vidyādhara</is>
; --------------------------------
; <L>54819<pc>5-0277<k1>BAsvant
; Abnormal ls: <ls n="HARIV."> 5185.</ls>
541191 old <ls n="HARIV.">1331</ls> <ls n="HARIV."> 5185.</ls> <ls n="HARIV.">10995.</ls> <ls>R. 1, 44, 30. 2, 83, 6.</ls> <ls>R. GORR. 2, 100, 16.</ls> {#BUzaRAni#}
;
541191 new <ls n="HARIV.">1331</ls> <ls n="HARIV."> 5185.</ls> <ls n="HARIV.">10995.</ls> <ls>R. 1, 44, 30. 2, 83, 6.</ls> <ls>R. GORR. 2, 100, 16.</ls> {#BUzaRAni#}
; --------------------------------
; <L>54948<pc>5-0294<k1>BImaka
; Abnormal ls: <ls>HARIV. LANGL. I, 513.</ls>
543102 old <ls>HARIV. 9561.</ls> <ls>HARIV. LANGL. I, 513.</ls> {#BIzaka#}
;
543102 new <ls>HARIV. 9561.</ls> <ls>HARIV. LANGL. I, 513.</ls> {#BIzaka#}
; --------------------------------
; <L>58325<pc>5-0619<k1>mahAkAla
; Abnormal ls: <ls>HARIV. LANGL. I,512.</ls>
579488 old <ls n="HARIV.">12502.</ls> <ls>HARIV. LANGL. I,512.</ls> <ls>KATHĀS. 50,147.</ls> <ls>PAÑCAR. 1,15,7.</ls> <ls>Verz. d. Oxf. H. 45,a,7.</ls> {#°gaRotpatti#}
;
579488 new <ls n="HARIV.">12502.</ls> <ls>HARIV. LANGL. I,512.</ls> <ls>KATHĀS. 50,147.</ls> <ls>PAÑCAR. 1,15,7.</ls> <ls>Verz. d. Oxf. H. 45,a,7.</ls> {#°gaRotpatti#}
; --------------------------------
; <L>100618<pc>7-0281<k1>SUra
; Abnormal ls: <ls>HARIV. lith. Ausg. 33, 56.</ls>
947904 old <ls>HARIV. lith. Ausg. 33, 56.</ls> {#SUrABIrAH#} ({#SUdrA°?#})
;
947904 new <ls>HARIV. lith. Ausg. 33, 56.</ls> {#SUrABIrAH#} ({#SUdrA°?#})
; --------------------------------
; <L>100642<pc>7-0282<k1>SUravIra
; Abnormal ls: <ls>HARIV. lith. Ausg. 33, 56.</ls>
948079 old <ls>HARIV. lith. Ausg. 33, 56.</ls>
;
948079 new <ls>HARIV. lith. Ausg. 33, 56.</ls>
; --------------------------------
; <L>108891<pc>7-0963<k1>sAvarRa
; Abnormal ls: <ls>HARIV. lith Ausg. 7, 43.</ls>
1030782 old <ls>HARIV. lith Ausg. 7, 43.</ls>
;
1030782 new <ls>HARIV. lith Ausg. 7, 43.</ls>
; --------------------------------
; <L>111836<pc>7-1143<k1>susaMyukta
; Abnormal ls: <ls>HARIV. lith. Ausg. 21, 37.</ls>
1055950 old <ls>HARIV. lith. Ausg. 21, 37.</ls> so v. a. {%in richtigem Zahlenverhältniss zu einander stehend%}
;
1055950 new <ls>HARIV. lith. Ausg. 21, 37.</ls> so v. a. {%in richtigem Zahlenverhältniss zu einander stehend%}
; --------------------------------
; <L>116085<pc>7-1519<k1>har
; Abnormal ls: <ls>HARIV. 2, 3.</ls>
1102535 old <ls>NĪLAK.</ls> zu <ls>MBH. 3, 10247</ls> und zu <ls>HARIV. 2, 3.</ls>
;
1102535 new <ls>NĪLAK.</ls> zu <ls>MBH. 3, 10247</ls> und zu <ls>HARIV. 2, 3.</ls>
Loading

0 comments on commit 7a0cd64

Please sign in to comment.