Skip to content


Browse files Browse the repository at this point in the history
  • Loading branch information
funderburkjim committed Dec 9, 2023
1 parent d32d701 commit ff0e219
Show file tree
Hide file tree
Showing 10 changed files with 1,687 additions and 0 deletions.
70 changes: 70 additions & 0 deletions pwgissues/issue67/change_irregular.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,70 @@
; <L>11824<pc>1-0954<k1>upaDA<k2>upaDA/
108730 old <div n="1">— 2) {%das aufdie-Probe-Stellen%}, = {#DarmAdyEryatparIkzaRam#}
108730 new <div n="1">— 2) {%das auf-die-Probe-Stellen%}, = {#DarmAdyEryatparIkzaRam#}
; <L>23675<pc>2-0852<k1>grahaRa<k2>gra/haRa
228492 old <div n="3">— β) {%das Fangen, Einfangen, Gefangennehmen, inseine%}
228492 new <div n="3">— β) {%das Fangen, Einfangen, Gefangennehmen, in-seine-Gewalt-Bekommen%}:
; <L>23675<pc>2-0852<k1>grahaRa<k2>gra/haRa
228494 old {%- Gewalt-Bekommen%}: {#SvA mfgagrahaRe SuciH#}
228494 new {#SvA mfgagrahaRe SuciH#}
; <L>34412<pc>3-0741<k1>devagaRa<k2>devagaRa/
340910 old {#devagaRa/#}¦ ({#deva + gaRa#}) <lex>m.</lex> {%Götterschaar, - abtheilung%}
340910 new {#devagaRa/#}¦ ({#deva + gaRa#}) <lex>m.</lex> {%Götterschaar, -abtheilung%}
; <L>36186<pc>3-0868<k1>Dar<k2>Dar
356546 old <div n="1">— 2) {%vernehmen. horen. erfahren%}: {#madvAkyaM cAvaDArya#}
356546 new <div n="1">— 2) {%vernehmen. hören, erfahren%}: {#madvAkyaM cAvaDArya#}
; <L>56511<pc>5-0448<k1>maRqapa<k2>maRqapa/
560080 old <div n="1">— 4) <lex>f.</lex> {#A#} {%eine hest. Hülsenfrucht%}, = {#nizpAvI#}
560080 new <div n="1">— 4) <lex>f.</lex> {#A#} {%eine best. Hülsenfrucht%}, = {#nizpAvI#}
; <L>59115<pc>5-0662<k1>mahAsarja<k2>mahAsarja
585218 old {#mahAsarja#}¦ ({#ma° + sarja#}) <lex>m.</lex> {%Terminalia tomentosa W. u. A.%} und {%Artocar pus integrifolia Lin.%}
585218 new {#mahAsarja#}¦ ({#ma° + sarja#}) <lex>m.</lex> {%Terminalia tomentosa W. u. A.%} und {%Artocarpus integrifolia Lin.%}
; <L>71543<pc>5-1326<k1>keSaDAraRa<k2>keSaDAraRa
676419 old {#keSaDAraRa#}¦ ({#keSa + DA°#}) <lex>n.</lex> {%das Tragen von Haar%} so v. a. {%nicht abge schnittenes Haar%}
676419 new {#keSaDAraRa#}¦ ({#keSa + DA°#}) <lex>n.</lex> {%das Tragen von Haar%} so v. a. {%nicht abgeschnittenes Haar%}
; <L>104237<pc>7-0590<k1>satvan<k2>sa/tvan
985017 old <ls n="ṚV.">4, 13, 2.</ls> (wonach unter {#drapsa#} und {#drapsin#} zu andern und drafsha im Zend zu vergleichen ist). <is>Indra</is>
985017 new <ls n="ṚV.">4, 13, 2.</ls> (wonach unter {#drapsa#} und {#drapsin#} zu ändern und drafsha im Zend zu vergleichen ist). <is>Indra</is>
; <L>106186<pc>7-0748<k1>saMplava<k2>saMplava
1004292 old <ls>MĀRK. P. 114, 20.</ls> {#garBa°#} so v. a. {%Fehl geburt%}
1004292 new <ls>MĀRK. P. 114, 20.</ls> {#garBa°#} so v. a. {%Fehlgeburt%}
; <L>107049<pc>7-0830<k1>sarvasaMnAha<k2>sarvasaMnAha
1014133 old {#sarvasaMnAha#}¦ <lex>m.</lex> {%das vollständige Gerüstetsein zu Etwas, das mit-Eiferan-Etwas-Gehen%}; = 1. {#sarvAtman#}
1014133 new {#sarvasaMnAha#}¦ <lex>m.</lex> {%das vollständige Gerüstetsein zu Etwas, das mit-Eifer-an-Etwas-Gehen%}; = 1. {#sarvAtman#}
; <L>107361<pc>7-0846<k1>saviSezaRa<k2>saviSezaRa
1016431 old {#saviSezaRa#}¦ (2. {#sa + vi°#}) <lex>adj.</lex> {%mit näheren Bestimmungen%} ({%Attributen¤u.s.w.%})
1016431 new {#saviSezaRa#}¦ (2. {#sa + vi°#}) <lex>adj.</lex> {%mit näheren Bestimmungen%} ({%Attributen u.s.w.%})
; <L>121043<pc>7-1775<k1>pratizWA<k2>pratizWA
1139724 old <div n="1"> 6) {%Vorzūglichkeit%} überh.: {#kAvyabanDasya#} Cit. bei
1139724 new <div n="1"> 6) {%Vorzüglichkeit%} überh.: {#kAvyabanDasya#} Cit. bei
; <L>122234<pc>7-1809<k1>vyaparopaRa<k2>vyaparopaRa
1146609 old <div n="1"> 3) {%das Vernicḥten%}: {#jIvita°#}
1146609 new <div n="1"> 3) {%das Vernichten%}: {#jIvita°#}
600 changes: 600 additions & 0 deletions pwgissues/issue67/change_regular.txt

Large diffs are not rendered by default.

71 changes: 71 additions & 0 deletions pwgissues/issue67/
Original file line number Diff line number Diff line change
@@ -0,0 +1,71 @@
# coding=utf-8
Generate change transactions from an 'old' and 'new' file
The two files should have same number of lines
ASSUME input file is a dictionary as in csl-orig/v02, e.g. mw.txt.
This structure identifies the metaline for each change;
and this is the only difference from,
which ignores this structure, and is thus available for
generating changes for any two text files with same number of lines.
python old.txt new.txt changes.txt
python old.txt changes.txt new1.txt
then new1.txt is same as new.txt.
from __future__ import print_function
import sys, re,codecs

def read_lines(filein):
with,encoding='utf-8',mode='r') as f:
lines = [x.rstrip('\r\n') for x in f]
return lines

class Change(object):
def __init__(self,iline,line1,line2,metaline1):
self.iline = iline
self.line1 = line1
self.line2 = line2
self.lnum = iline+1
self.metaline1 = metaline1
a = []
a.append('; %s' %metaline1)
a.append('%s old %s' %(self.lnum,self.line1))
a.append('%s new %s' %(self.lnum,self.line2))
self.changeout = a

def write_changes(fileout,changes):
outarr = []
for change in changes:
for x in change.changeout:
with,"w","utf-8") as f:
for out in outarr:
print(len(changes),"changes written to",fileout)

if __name__=="__main__":
filein1 = sys.argv[1] # old.txt
filein2 = sys.argv[2] # new.txt
fileout = sys.argv[3] # changes.txt
lines1 = read_lines(filein1)
lines2 = read_lines(filein2)
n = len(lines1)
if n != len(lines2):
print('ERROR: files have different number of lines')
changes = []
metaline1 = None
metaline2 = None
for iline,line1 in enumerate(lines1):
line2 = lines2[iline]
if line1.startswith('<L>'):
metaline1 = line1

if line1 == line2:

103 changes: 103 additions & 0 deletions pwgissues/issue67/
Original file line number Diff line number Diff line change
@@ -0,0 +1,103 @@
#-*- coding:utf-8 -*-
Module to read a digitization
and generate a list of Entry objects
Adapted for temp_pwkvn_22.txt
from __future__ import print_function
import sys,re,codecs

class Entry(object):
Ldict = {}
def __init__(self,lines,linenum1,linenum2):
# linenum1,2 are int
self.metaline = lines[0]
self.lend = lines[-1] # the <LEND> line
self.datalines = lines[1:-1] # the non-meta lines
# parse the meta line into a dictionary
self.metad = parseheadline(self.metaline)
self.linenum1 = linenum1
self.linenum2 = linenum2
L = self.metad['L']
if L in self.Ldict:
print("Entry init error: duplicate L",L,linenum1)
self.Ldict[L] = self
self.lsarr = []

def init(filein):
# slurp lines
with,encoding='utf-8',mode='r') as f:
lines = [line.rstrip('\r\n') for line in f]
recs=[] # list of Entry objects
inentry = False
idx1 = None
idx2 = None
for idx,line in enumerate(lines):
if inentry:
if line.startswith('<LEND>'):
idx2 = idx
entrylines = lines[idx1:idx2+1]
linenum1 = idx1 + 1
linenum2 = idx2 + 1
entry = Entry(entrylines,linenum1,linenum2)
# prepare for next entry
idx1 = None
idx2 = None
inentry = False
elif line.startswith('<L>'): # error
print('init_entries Error 1. Not expecting <L>')
print("line # ",idx+1)
# keep looking for <LEND>
# inentry = False. Looking for '<L>'
if line.startswith('<L>'):
idx1 = idx
inentry = True
elif line.startswith('<LEND>'): # error
print('init_entries Error 2. Not expecting <LEND>')
print("line # ",idx+1)
# keep looking for <L>
# when all lines are read, we should have inentry = False
if inentry:
print('digentry.init Error 3. for file',filein)
print('Last entry not closed. Open entry starts at line',idx1+1)

print(len(lines),"lines read from",filein)
print(len(recs),"entries found")
return recs

def parseheadline(headline):
function to parse a 'metaline' and return a dictionary.
headline = <L>16850<pc>292-3<k1>visarga<k2>visarga<h>1<e>
returns dictionary
{'L': '16850',
'pc': '292-3',
'k1': 'visarga',
'k2': 'visarga',
'h': '1',
'e': ''}
headline = headline.strip()
splits = re.split('[<]([^>]*)[>]([^<]*)',headline)
result = {}
for i in range(len(splits)):
if i % 3 == 1:
result[splits[i]] = splits[i+1]
return result

if __name__=="__main__":
filein = sys.argv[1] # xxx.txt (path to digitization of xxx)
entries = init(filein)

0 comments on commit ff0e219

Please sign in to comment.