-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
1 parent
d32d701
commit ff0e219
Showing
10 changed files
with
1,687 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,70 @@ | ||
; <L>11824<pc>1-0954<k1>upaDA<k2>upaDA/ | ||
108730 old <div n="1">— 2) {%das aufdie-Probe-Stellen%}, = {#DarmAdyEryatparIkzaRam#} | ||
; | ||
108730 new <div n="1">— 2) {%das auf-die-Probe-Stellen%}, = {#DarmAdyEryatparIkzaRam#} | ||
;--------------------------------------------------- | ||
; <L>23675<pc>2-0852<k1>grahaRa<k2>gra/haRa | ||
228492 old <div n="3">— β) {%das Fangen, Einfangen, Gefangennehmen, inseine%} | ||
; | ||
228492 new <div n="3">— β) {%das Fangen, Einfangen, Gefangennehmen, in-seine-Gewalt-Bekommen%}: | ||
;--------------------------------------------------- | ||
; <L>23675<pc>2-0852<k1>grahaRa<k2>gra/haRa | ||
228494 old {%- Gewalt-Bekommen%}: {#SvA mfgagrahaRe SuciH#} | ||
; | ||
228494 new {#SvA mfgagrahaRe SuciH#} | ||
;--------------------------------------------------- | ||
; <L>34412<pc>3-0741<k1>devagaRa<k2>devagaRa/ | ||
340910 old {#devagaRa/#}¦ ({#deva + gaRa#}) <lex>m.</lex> {%Götterschaar, - abtheilung%} | ||
; | ||
340910 new {#devagaRa/#}¦ ({#deva + gaRa#}) <lex>m.</lex> {%Götterschaar, -abtheilung%} | ||
;--------------------------------------------------- | ||
; <L>36186<pc>3-0868<k1>Dar<k2>Dar | ||
356546 old <div n="1">— 2) {%vernehmen. horen. erfahren%}: {#madvAkyaM cAvaDArya#} | ||
; | ||
356546 new <div n="1">— 2) {%vernehmen. hören, erfahren%}: {#madvAkyaM cAvaDArya#} | ||
;--------------------------------------------------- | ||
; <L>56511<pc>5-0448<k1>maRqapa<k2>maRqapa/ | ||
560080 old <div n="1">— 4) <lex>f.</lex> {#A#} {%eine hest. Hülsenfrucht%}, = {#nizpAvI#} | ||
; | ||
560080 new <div n="1">— 4) <lex>f.</lex> {#A#} {%eine best. Hülsenfrucht%}, = {#nizpAvI#} | ||
;--------------------------------------------------- | ||
; <L>59115<pc>5-0662<k1>mahAsarja<k2>mahAsarja | ||
585218 old {#mahAsarja#}¦ ({#ma° + sarja#}) <lex>m.</lex> {%Terminalia tomentosa W. u. A.%} und {%Artocar pus integrifolia Lin.%} | ||
; | ||
585218 new {#mahAsarja#}¦ ({#ma° + sarja#}) <lex>m.</lex> {%Terminalia tomentosa W. u. A.%} und {%Artocarpus integrifolia Lin.%} | ||
;--------------------------------------------------- | ||
; <L>71543<pc>5-1326<k1>keSaDAraRa<k2>keSaDAraRa | ||
676419 old {#keSaDAraRa#}¦ ({#keSa + DA°#}) <lex>n.</lex> {%das Tragen von Haar%} so v. a. {%nicht abge schnittenes Haar%} | ||
; | ||
676419 new {#keSaDAraRa#}¦ ({#keSa + DA°#}) <lex>n.</lex> {%das Tragen von Haar%} so v. a. {%nicht abgeschnittenes Haar%} | ||
;--------------------------------------------------- | ||
; <L>104237<pc>7-0590<k1>satvan<k2>sa/tvan | ||
985017 old <ls n="ṚV.">4, 13, 2.</ls> (wonach unter {#drapsa#} und {#drapsin#} zu andern und drafsha im Zend zu vergleichen ist). <is>Indra</is> | ||
; | ||
985017 new <ls n="ṚV.">4, 13, 2.</ls> (wonach unter {#drapsa#} und {#drapsin#} zu ändern und drafsha im Zend zu vergleichen ist). <is>Indra</is> | ||
;--------------------------------------------------- | ||
; <L>106186<pc>7-0748<k1>saMplava<k2>saMplava | ||
1004292 old <ls>MĀRK. P. 114, 20.</ls> {#garBa°#} so v. a. {%Fehl geburt%} | ||
; | ||
1004292 new <ls>MĀRK. P. 114, 20.</ls> {#garBa°#} so v. a. {%Fehlgeburt%} | ||
;--------------------------------------------------- | ||
; <L>107049<pc>7-0830<k1>sarvasaMnAha<k2>sarvasaMnAha | ||
1014133 old {#sarvasaMnAha#}¦ <lex>m.</lex> {%das vollständige Gerüstetsein zu Etwas, das mit-Eiferan-Etwas-Gehen%}; = 1. {#sarvAtman#} | ||
; | ||
1014133 new {#sarvasaMnAha#}¦ <lex>m.</lex> {%das vollständige Gerüstetsein zu Etwas, das mit-Eifer-an-Etwas-Gehen%}; = 1. {#sarvAtman#} | ||
;--------------------------------------------------- | ||
; <L>107361<pc>7-0846<k1>saviSezaRa<k2>saviSezaRa | ||
1016431 old {#saviSezaRa#}¦ (2. {#sa + vi°#}) <lex>adj.</lex> {%mit näheren Bestimmungen%} ({%Attributen¤u.s.w.%}) | ||
; | ||
1016431 new {#saviSezaRa#}¦ (2. {#sa + vi°#}) <lex>adj.</lex> {%mit näheren Bestimmungen%} ({%Attributen u.s.w.%}) | ||
;--------------------------------------------------- | ||
; <L>121043<pc>7-1775<k1>pratizWA<k2>pratizWA | ||
1139724 old <div n="1"> 6) {%Vorzūglichkeit%} überh.: {#kAvyabanDasya#} Cit. bei | ||
; | ||
1139724 new <div n="1"> 6) {%Vorzüglichkeit%} überh.: {#kAvyabanDasya#} Cit. bei | ||
;--------------------------------------------------- | ||
; <L>122234<pc>7-1809<k1>vyaparopaRa<k2>vyaparopaRa | ||
1146609 old <div n="1"> 3) {%das Vernicḥten%}: {#jIvita°#} | ||
; | ||
1146609 new <div n="1"> 3) {%das Vernichten%}: {#jIvita°#} | ||
;--------------------------------------------------- |
Large diffs are not rendered by default.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,71 @@ | ||
# coding=utf-8 | ||
""" diff_to_changes_dict.py | ||
Generate change transactions from an 'old' and 'new' file | ||
The two files should have same number of lines | ||
ASSUME input file is a dictionary as in csl-orig/v02, e.g. mw.txt. | ||
This structure identifies the metaline for each change; | ||
and this is the only difference from diff_to_changes.py, | ||
which ignores this structure, and is thus available for | ||
generating changes for any two text files with same number of lines. | ||
python diff_to_changes_dict.py old.txt new.txt changes.txt | ||
Now: | ||
python updateByLine.py old.txt changes.txt new1.txt | ||
then new1.txt is same as new.txt. | ||
""" | ||
from __future__ import print_function | ||
import sys, re,codecs | ||
|
||
def read_lines(filein): | ||
with codecs.open(filein,encoding='utf-8',mode='r') as f: | ||
lines = [x.rstrip('\r\n') for x in f] | ||
return lines | ||
|
||
class Change(object): | ||
def __init__(self,iline,line1,line2,metaline1): | ||
self.iline = iline | ||
self.line1 = line1 | ||
self.line2 = line2 | ||
self.lnum = iline+1 | ||
self.metaline1 = metaline1 | ||
a = [] | ||
a.append('; %s' %metaline1) | ||
a.append('%s old %s' %(self.lnum,self.line1)) | ||
a.append(';') | ||
a.append('%s new %s' %(self.lnum,self.line2)) | ||
a.append(';---------------------------------------------------') | ||
self.changeout = a | ||
|
||
def write_changes(fileout,changes): | ||
outarr = [] | ||
for change in changes: | ||
for x in change.changeout: | ||
outarr.append(x) | ||
with codecs.open(fileout,"w","utf-8") as f: | ||
for out in outarr: | ||
f.write(out+'\n') | ||
print(len(changes),"changes written to",fileout) | ||
|
||
if __name__=="__main__": | ||
filein1 = sys.argv[1] # old.txt | ||
filein2 = sys.argv[2] # new.txt | ||
fileout = sys.argv[3] # changes.txt | ||
lines1 = read_lines(filein1) | ||
lines2 = read_lines(filein2) | ||
n = len(lines1) | ||
if n != len(lines2): | ||
print('ERROR: files have different number of lines') | ||
exit(1) | ||
changes = [] | ||
metaline1 = None | ||
metaline2 = None | ||
for iline,line1 in enumerate(lines1): | ||
line2 = lines2[iline] | ||
if line1.startswith('<L>'): | ||
metaline1 = line1 | ||
|
||
if line1 == line2: | ||
continue | ||
changes.append(Change(iline,line1,line2,metaline1)) | ||
# | ||
write_changes(fileout,changes) | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,103 @@ | ||
#-*- coding:utf-8 -*- | ||
"""digentry.py | ||
Module to read a digitization | ||
and generate a list of Entry objects | ||
Adapted for temp_pwkvn_22.txt | ||
""" | ||
from __future__ import print_function | ||
import sys,re,codecs | ||
|
||
class Entry(object): | ||
Ldict = {} | ||
def __init__(self,lines,linenum1,linenum2): | ||
# linenum1,2 are int | ||
self.metaline = lines[0] | ||
self.lend = lines[-1] # the <LEND> line | ||
self.datalines = lines[1:-1] # the non-meta lines | ||
# parse the meta line into a dictionary | ||
self.metad = parseheadline(self.metaline) | ||
self.linenum1 = linenum1 | ||
self.linenum2 = linenum2 | ||
L = self.metad['L'] | ||
if L in self.Ldict: | ||
print("Entry init error: duplicate L",L,linenum1) | ||
exit(1) | ||
self.Ldict[L] = self | ||
self.lsarr = [] | ||
|
||
def init(filein): | ||
# slurp lines | ||
with codecs.open(filein,encoding='utf-8',mode='r') as f: | ||
lines = [line.rstrip('\r\n') for line in f] | ||
recs=[] # list of Entry objects | ||
inentry = False | ||
idx1 = None | ||
idx2 = None | ||
for idx,line in enumerate(lines): | ||
if inentry: | ||
if line.startswith('<LEND>'): | ||
idx2 = idx | ||
entrylines = lines[idx1:idx2+1] | ||
linenum1 = idx1 + 1 | ||
linenum2 = idx2 + 1 | ||
entry = Entry(entrylines,linenum1,linenum2) | ||
recs.append(entry) | ||
# prepare for next entry | ||
idx1 = None | ||
idx2 = None | ||
inentry = False | ||
elif line.startswith('<L>'): # error | ||
print('init_entries Error 1. Not expecting <L>') | ||
print("line # ",idx+1) | ||
print(line.encode('utf-8')) | ||
exit(1) | ||
else: | ||
# keep looking for <LEND> | ||
continue | ||
else: | ||
# inentry = False. Looking for '<L>' | ||
if line.startswith('<L>'): | ||
idx1 = idx | ||
inentry = True | ||
elif line.startswith('<LEND>'): # error | ||
print('init_entries Error 2. Not expecting <LEND>') | ||
print("line # ",idx+1) | ||
print(line.encode('utf-8')) | ||
exit(1) | ||
else: | ||
# keep looking for <L> | ||
continue | ||
# when all lines are read, we should have inentry = False | ||
if inentry: | ||
print('digentry.init Error 3. for file',filein) | ||
print('Last entry not closed. Open entry starts at line',idx1+1) | ||
exit(1) | ||
|
||
print(len(lines),"lines read from",filein) | ||
print(len(recs),"entries found") | ||
return recs | ||
|
||
def parseheadline(headline): | ||
""" | ||
function to parse a 'metaline' and return a dictionary. | ||
Example: | ||
headline = <L>16850<pc>292-3<k1>visarga<k2>visarga<h>1<e> | ||
returns dictionary | ||
{'L': '16850', | ||
'pc': '292-3', | ||
'k1': 'visarga', | ||
'k2': 'visarga', | ||
'h': '1', | ||
'e': ''} | ||
""" | ||
headline = headline.strip() | ||
splits = re.split('[<]([^>]*)[>]([^<]*)',headline) | ||
result = {} | ||
for i in range(len(splits)): | ||
if i % 3 == 1: | ||
result[splits[i]] = splits[i+1] | ||
return result | ||
|
||
if __name__=="__main__": | ||
filein = sys.argv[1] # xxx.txt (path to digitization of xxx) | ||
entries = init(filein) |
Oops, something went wrong.