#67

sanskrit-lexicon · Dec 9, 2023 · ff0e219 · ff0e219
1 parent d32d701
commit ff0e219
Show file tree

Hide file tree

Showing 10 changed files with 1,687 additions and 0 deletions.
diff --git a/pwgissues/issue67/change_irregular.txt b/pwgissues/issue67/change_irregular.txt
@@ -0,0 +1,70 @@
+; <L>11824<pc>1-0954<k1>upaDA<k2>upaDA/
+108730 old <div n="1">— 2) {%das aufdie-Probe-Stellen%}, = {#DarmAdyEryatparIkzaRam#} 
+;
+108730 new <div n="1">— 2) {%das auf-die-Probe-Stellen%}, = {#DarmAdyEryatparIkzaRam#} 
+;---------------------------------------------------
+; <L>23675<pc>2-0852<k1>grahaRa<k2>gra/haRa
+228492 old <div n="3">— β) {%das Fangen, Einfangen, Gefangennehmen, inseine%}
+;
+228492 new <div n="3">— β) {%das Fangen, Einfangen, Gefangennehmen, in-seine-Gewalt-Bekommen%}:
+;---------------------------------------------------
+; <L>23675<pc>2-0852<k1>grahaRa<k2>gra/haRa
+228494 old {%- Gewalt-Bekommen%}: {#SvA mfgagrahaRe SuciH#} 
+;
+228494 new {#SvA mfgagrahaRe SuciH#} 
+;---------------------------------------------------
+; <L>34412<pc>3-0741<k1>devagaRa<k2>devagaRa/
+340910 old {#devagaRa/#}¦ ({#deva + gaRa#}) <lex>m.</lex> {%Götterschaar, - abtheilung%} 
+;
+340910 new {#devagaRa/#}¦ ({#deva + gaRa#}) <lex>m.</lex> {%Götterschaar, -abtheilung%} 
+;---------------------------------------------------
+; <L>36186<pc>3-0868<k1>Dar<k2>Dar
+356546 old <div n="1">— 2) {%vernehmen. horen. erfahren%}: {#madvAkyaM cAvaDArya#} 
+;
+356546 new <div n="1">— 2) {%vernehmen. hören, erfahren%}: {#madvAkyaM cAvaDArya#} 
+;---------------------------------------------------
+; <L>56511<pc>5-0448<k1>maRqapa<k2>maRqapa/
+560080 old <div n="1">— 4) <lex>f.</lex> {#A#} {%eine hest. Hülsenfrucht%}, = {#nizpAvI#} 
+;
+560080 new <div n="1">— 4) <lex>f.</lex> {#A#} {%eine best. Hülsenfrucht%}, = {#nizpAvI#} 
+;---------------------------------------------------
+; <L>59115<pc>5-0662<k1>mahAsarja<k2>mahAsarja
+585218 old {#mahAsarja#}¦ ({#ma° + sarja#}) <lex>m.</lex> {%Terminalia tomentosa W. u. A.%} und {%Artocar pus integrifolia Lin.%} 
+;
+585218 new {#mahAsarja#}¦ ({#ma° + sarja#}) <lex>m.</lex> {%Terminalia tomentosa W. u. A.%} und {%Artocarpus integrifolia Lin.%} 
+;---------------------------------------------------
+; <L>71543<pc>5-1326<k1>keSaDAraRa<k2>keSaDAraRa
+676419 old {#keSaDAraRa#}¦ ({#keSa + DA°#}) <lex>n.</lex> {%das Tragen von Haar%} so v. a. {%nicht abge schnittenes Haar%} 
+;
+676419 new {#keSaDAraRa#}¦ ({#keSa + DA°#}) <lex>n.</lex> {%das Tragen von Haar%} so v. a. {%nicht abgeschnittenes Haar%} 
+;---------------------------------------------------
+; <L>104237<pc>7-0590<k1>satvan<k2>sa/tvan
+985017 old <ls n="ṚV.">4, 13, 2.</ls> (wonach unter {#drapsa#} und {#drapsin#} zu andern und drafsha im Zend zu vergleichen ist). <is>Indra</is> 
+;
+985017 new <ls n="ṚV.">4, 13, 2.</ls> (wonach unter {#drapsa#} und {#drapsin#} zu ändern und drafsha im Zend zu vergleichen ist). <is>Indra</is> 
+;---------------------------------------------------
+; <L>106186<pc>7-0748<k1>saMplava<k2>saMplava
+1004292 old <ls>MĀRK. P. 114, 20.</ls> {#garBa°#} so v. a. {%Fehl geburt%} 
+;
+1004292 new <ls>MĀRK. P. 114, 20.</ls> {#garBa°#} so v. a. {%Fehlgeburt%} 
+;---------------------------------------------------
+; <L>107049<pc>7-0830<k1>sarvasaMnAha<k2>sarvasaMnAha
+1014133 old {#sarvasaMnAha#}¦ <lex>m.</lex> {%das vollständige Gerüstetsein zu Etwas, das mit-Eiferan-Etwas-Gehen%}; = 1. {#sarvAtman#} 
+;
+1014133 new {#sarvasaMnAha#}¦ <lex>m.</lex> {%das vollständige Gerüstetsein zu Etwas, das mit-Eifer-an-Etwas-Gehen%}; = 1. {#sarvAtman#} 
+;---------------------------------------------------
+; <L>107361<pc>7-0846<k1>saviSezaRa<k2>saviSezaRa
+1016431 old {#saviSezaRa#}¦ (2. {#sa + vi°#}) <lex>adj.</lex> {%mit näheren Bestimmungen%} ({%Attributen¤u.s.w.%}) 
+;
+1016431 new {#saviSezaRa#}¦ (2. {#sa + vi°#}) <lex>adj.</lex> {%mit näheren Bestimmungen%} ({%Attributen u.s.w.%}) 
+;---------------------------------------------------
+; <L>121043<pc>7-1775<k1>pratizWA<k2>pratizWA
+1139724 old <div n="1"> 6) {%Vorzūglichkeit%} überh.: {#kAvyabanDasya#} Cit. bei 
+;
+1139724 new <div n="1"> 6) {%Vorzüglichkeit%} überh.: {#kAvyabanDasya#} Cit. bei 
+;---------------------------------------------------
+; <L>122234<pc>7-1809<k1>vyaparopaRa<k2>vyaparopaRa
+1146609 old <div n="1"> 3) {%das Vernicḥten%}: {#jIvita°#} 
+;
+1146609 new <div n="1"> 3) {%das Vernichten%}: {#jIvita°#} 
+;---------------------------------------------------
diff --git a/pwgissues/issue67/change_regular.txt b/pwgissues/issue67/change_regular.txt
diff --git a/pwgissues/issue67/diff_to_changes_dict.py b/pwgissues/issue67/diff_to_changes_dict.py
@@ -0,0 +1,71 @@
+# coding=utf-8
+""" diff_to_changes_dict.py
+   Generate change transactions from an 'old' and 'new' file
+   The two files should have same number of lines
+   ASSUME input file is a dictionary as in csl-orig/v02, e.g. mw.txt.
+     This structure identifies the metaline for each change;
+     and this is the only difference from diff_to_changes.py,
+     which ignores this structure, and is thus available for 
+     generating changes for any two text files with same number of lines.
+  python diff_to_changes_dict.py old.txt new.txt changes.txt
+  Now:
+  python updateByLine.py old.txt changes.txt new1.txt
+  then new1.txt is same as new.txt.
+"""
+from __future__ import print_function
+import sys, re,codecs
+
+def read_lines(filein):
+ with codecs.open(filein,encoding='utf-8',mode='r') as f:
+  lines = [x.rstrip('\r\n') for x in f]
+ return lines
+
+class Change(object):
+ def __init__(self,iline,line1,line2,metaline1):
+  self.iline = iline
+  self.line1 = line1
+  self.line2 = line2
+  self.lnum = iline+1
+  self.metaline1 = metaline1 
+  a = []
+  a.append('; %s' %metaline1)
+  a.append('%s old %s' %(self.lnum,self.line1))
+  a.append(';')
+  a.append('%s new %s' %(self.lnum,self.line2))
+  a.append(';---------------------------------------------------')
+  self.changeout = a
+
+def write_changes(fileout,changes):
+ outarr = []
+ for change in changes:
+  for x in change.changeout:
+   outarr.append(x)
+ with codecs.open(fileout,"w","utf-8") as f:
+  for out in outarr:
+   f.write(out+'\n')
+ print(len(changes),"changes written to",fileout)
+
+if __name__=="__main__":
+ filein1 = sys.argv[1] # old.txt
+ filein2 = sys.argv[2] # new.txt
+ fileout = sys.argv[3] # changes.txt
+ lines1 = read_lines(filein1)
+ lines2 = read_lines(filein2)
+ n = len(lines1)
+ if n != len(lines2):
+  print('ERROR: files have different number of lines')
+  exit(1)
+ changes = []
+ metaline1 = None
+ metaline2 = None
+ for iline,line1 in enumerate(lines1):
+  line2 = lines2[iline]
+  if line1.startswith('<L>'):
+   metaline1 = line1
+
+  if line1 == line2:
+   continue
+  changes.append(Change(iline,line1,line2,metaline1))
+ #
+ write_changes(fileout,changes)
+
diff --git a/pwgissues/issue67/digentry.py b/pwgissues/issue67/digentry.py
@@ -0,0 +1,103 @@
+#-*- coding:utf-8 -*-
+"""digentry.py
+  Module to read a digitization 
+  and generate a list of Entry objects
+  Adapted for temp_pwkvn_22.txt
+"""
+from __future__ import print_function
+import sys,re,codecs
+
+class Entry(object):
+ Ldict = {}
+ def __init__(self,lines,linenum1,linenum2):
+  # linenum1,2 are int
+  self.metaline = lines[0]
+  self.lend = lines[-1]  # the <LEND> line
+  self.datalines = lines[1:-1]  # the non-meta lines
+  # parse the meta line into a dictionary
+  self.metad = parseheadline(self.metaline)
+  self.linenum1 = linenum1
+  self.linenum2 = linenum2
+  L = self.metad['L']
+  if L in self.Ldict:
+   print("Entry init error: duplicate L",L,linenum1)
+   exit(1)
+  self.Ldict[L] = self
+  self.lsarr = []
+
+def init(filein):
+ # slurp lines
+ with codecs.open(filein,encoding='utf-8',mode='r') as f:
+  lines = [line.rstrip('\r\n') for line in f]
+ recs=[]  # list of Entry objects
+ inentry = False  
+ idx1 = None
+ idx2 = None
+ for idx,line in enumerate(lines):
+  if inentry:
+   if line.startswith('<LEND>'):
+    idx2 = idx
+    entrylines = lines[idx1:idx2+1]
+    linenum1 = idx1 + 1
+    linenum2 = idx2 + 1
+    entry = Entry(entrylines,linenum1,linenum2)
+    recs.append(entry)
+    # prepare for next entry
+    idx1 = None
+    idx2 = None
+    inentry = False
+   elif line.startswith('<L>'):  # error
+    print('init_entries Error 1. Not expecting <L>')
+    print("line # ",idx+1)
+    print(line.encode('utf-8'))
+    exit(1)
+   else: 
+    # keep looking for <LEND>
+    continue
+  else:
+   # inentry = False. Looking for '<L>'
+   if line.startswith('<L>'):
+    idx1 = idx
+    inentry = True
+   elif line.startswith('<LEND>'): # error
+    print('init_entries Error 2. Not expecting <LEND>')
+    print("line # ",idx+1)
+    print(line.encode('utf-8'))
+    exit(1)
+   else: 
+    # keep looking for <L>
+    continue
+ # when all lines are read, we should have inentry = False
+ if inentry:
+  print('digentry.init Error 3. for file',filein)
+  print('Last entry not closed. Open entry starts at line',idx1+1)
+  exit(1)
+
+ print(len(lines),"lines read from",filein)
+ print(len(recs),"entries found")
+ return recs
+
+def parseheadline(headline):
+ """
+  function to parse a 'metaline' and return a dictionary.
+  Example:
+  headline = <L>16850<pc>292-3<k1>visarga<k2>visarga<h>1<e>
+  returns dictionary
+  {'L': '16850', 
+   'pc': '292-3',
+   'k1': 'visarga', 
+   'k2': 'visarga', 
+   'h': '1', 
+   'e': ''}
+ """
+ headline = headline.strip()
+ splits = re.split('[<]([^>]*)[>]([^<]*)',headline)
+ result = {}
+ for i in range(len(splits)):
+  if i % 3 == 1:
+   result[splits[i]] = splits[i+1]
+ return result
+
+if __name__=="__main__":
+ filein = sys.argv[1] #  xxx.txt (path to digitization of xxx)
+ entries = init(filein)