From 261c57aa7b5e7bc439399524a8948095239b1837 Mon Sep 17 00:00:00 2001 From: funderburkjim Date: Mon, 18 Sep 2023 13:23:08 -0400 Subject: [PATCH] MD abbreviations abbrev1, temp_md_1. Ref: https://github.com/sanskrit-lexicon/MD/issues/11 --- v02/distinctfiles/md/pywork/mdab/mdab.sql | 11 ++ .../md/pywork/mdab/mdab_input.txt | 185 ++++++++++++++++++ v02/distinctfiles/md/pywork/mdab/readme.txt | 3 + v02/distinctfiles/md/pywork/mdab/redo.sh | 4 + v02/distinctfiles/md/pywork/mdab/redo_mdab.sh | 5 + v02/inventory.txt | 10 +- v02/makotemplates/pywork/make_xml.py | 22 ++- v02/makotemplates/pywork/redo_postxml.sh | 2 +- 8 files changed, 232 insertions(+), 10 deletions(-) create mode 100644 v02/distinctfiles/md/pywork/mdab/mdab.sql create mode 100644 v02/distinctfiles/md/pywork/mdab/mdab_input.txt create mode 100644 v02/distinctfiles/md/pywork/mdab/readme.txt create mode 100644 v02/distinctfiles/md/pywork/mdab/redo.sh create mode 100644 v02/distinctfiles/md/pywork/mdab/redo_mdab.sh diff --git a/v02/distinctfiles/md/pywork/mdab/mdab.sql b/v02/distinctfiles/md/pywork/mdab/mdab.sql new file mode 100644 index 0000000..ab134f0 --- /dev/null +++ b/v02/distinctfiles/md/pywork/mdab/mdab.sql @@ -0,0 +1,11 @@ +DROP TABLE if exists mdab; +CREATE TABLE mdab ( + `id` VARCHAR(100) UNIQUE, + `data` TEXT NOT NULL +); +.separator "\t" +.import mdab_input.txt mdab +create index datum on mdab(id); +pragma table_info (mdab); +select count(*) from mdab; +.exit diff --git a/v02/distinctfiles/md/pywork/mdab/mdab_input.txt b/v02/distinctfiles/md/pywork/mdab/mdab_input.txt new file mode 100644 index 0000000..6500ac6 --- /dev/null +++ b/v02/distinctfiles/md/pywork/mdab/mdab_input.txt @@ -0,0 +1,185 @@ +& & and. +& & and. +&c. &c. et cetera, and so forth. +&c. &c. et cetera, and so forth. ++ + with; also. += = equals, is the equivalent of. +a. a. adjective. +A. D. A. D. Anno Domini +ab. ab. ablative. +abs. abs. absolute. +abst. abst. abstract. +abst. N. abst. N. abstract noun. +ac. ac. accusative. +act. act. active. +ad. ad. adverb, -ial, -ially. +aor. aor. aorist. +app. app. apposition. +ass. ass. asseverative. +athg. athg. anything. +AV. AV. Atharva-veda. +B. B. Buddhistic term. +beg. beg. beginning. +bg. bg. beginning. +Br. Br. Brāhmaṇa. +C. C. Classical (post-Vedic) Sanskrit. +cj. cj. conjunction. +col. col. collective, -ly. +coll. coll. collective, -ly. +comm. comm. commentator. +conj. conj. conjecture. +cor. cor. correlative. +cp. cp. compare. +cpd. cpd. compound. +cpv. cpv. comparative. +crt. crt. certain. +cs. cs. causal. +d. d. dative. +dbl. dbl. double. +den. den. denominative. +der. der. derivative. +des. des. desiderative. +dim. dim. diminutive. +dr. dr. dramatic term. +du. du. dual. +E. E. Epic. +e. g. e. g. for example. +emp. emp. emphatic. +enc. enc. enclitic. +encl. encl. enclitic. +Eng. Eng. English. +ep. ep. epithet. +eq. eq. equivalent. +esp. esp. especially. +etc. etc. et cetera, and so forth. +exc. exc. except, exceptionally. +excl. excl. exclamation. +exp. exp. explanation. +f. f. feminine; also = for. +ff. ff. and the following. +fig. fig. figurative, -ly. +fp. fp. future participle passive. +fr. fr. from. +ft. ft. future. +g. g. genitive. +gd. gd. gerund (indeclinable participle). +Gk. Gk. Greek. +gnly. gnly. generally. +gr. gr. grammatical term. +hvg. hvg. having. +i. e. i. e. that is. +id. id. the same. +ij. ij. interjection. +imps. imps. impersonal. +impv. impv. imperative. +in. in. instrumental. +incor. incor. incorrect. +incorr. incorr. incorrect. +ind. ind. indicative. +indc. indc. indeclinable. +indec. indec. indeclinable. +indf. indf. indefinite. +inf. inf. infinitive. +int. int. intransitive. +intr. intr. interrogative. +intv. intv. intensive (frequentative). +ir. ir. irregular. +K. K. king. +Lat. Lat. Latin. +lc. lc. locative. +leg. leg. legal term. +lit. lit. literally. +m. m. masculine. +met. met. metronymic. +metr. metr. metrical (due to exigencies of metre). +mg. mg. meaning. +mtn. mtn. mountain. +N. N. name; when alone = name of a man or of a woman. +n. n. neuter. +N.E. N.E. Northeast. +N.W. N.W. Northwest. +neg. neg. negative. +nm. nm. nominative. +nr. nr. numeral. +num. num. numeral. +obj. obj. object. +onom. onom. onomatopœic. +opp. opp. opposite. +opt. opt. optative. +or. or. originally. +ord. ord. ordinary. +orig. orig. originally. +P. P. Parasmaipada. +pat. pat. patronymic. +pcl. pcl. particle. +perh. perh. perhaps. +pf. pf. perfect. +ph. ph. philosophical term. +phil. phil. philosophical term. +pl. pl. plural. +pn. pn. pronoun, pronominal. +pos. pos. possessive. +poss. poss. possessive. +pot. pot. potential. +pp. pp. perfect passive participle. +Pr. Pr. Prākrit (Sanskrit equivalent of Prākrit word), Prākritic. +pr. pr. present. +pred. pred. predicate, predicative, -ly. +prn. prn. pronoun, pronominal. +prob. prob. probably. +prop. prop. properly. +prp. prp. preposition. +prs. prs. person. +ps. ps. passive. +pt. pt. participle. +px. px. prefix. +q. v. q. v. which see. +R. R. river. +red. red. reduplicated, reduplication. +rel. rel. relative. +rep. rep. repeated. +rf. rf. reflexive. +rh. rh. rhetorical term. +rit. rit. ritual term. +rl. rl. relative. +rp. rp. repeated. +RV. RV. Rig-veda. +RV.² RV.² Rig-veda (² = ?) +RV.¹ RV.¹ Rig-veda (¹ = ?) +S. S. Sūtra. +s. s. singular. +s. v. s. v. sub voce. +sb. sb. substantive. +sc. sc. scilicet, that is to say, supply. +sg. sg. singular. +sp. sp. specifically. +spv. spv. superlative. +st. st. stem. +sthg. sthg. something. +str. str. strong. +subj. subj. subjunctive. +SV. SV. Sāma-veda. +sx. sx. suffix. +T. T. title. +t. t. term. +TBr. TBr. Taittirīya-Brāhmaṇa. +tr. tr. transitive. +TS. TS. Taittirīya-Saṃhitā. +U. U. Upaniṣad. +V. V. Veda, Vedic. +v. v. vide, see. +v. r. v. r. various reading. +vb. vb. verb. +vbl. vbl. verbal. +vbl. N. vbl. N. verbal noun. +vc. vc. vocative. +VS. VS. Vājasaneyi-Saṃhitā. +w. w. with. +wk. wk. weak. +YV. YV. Yajur-veda.± with or without. +Ā. Ā. Ātmanepada. +ŚB. ŚB. Śatapatha-Brāhmaṇa. +ɴ. ɴ. noun. +√ root. +𝑃. 𝑃. Purāṇa. diff --git a/v02/distinctfiles/md/pywork/mdab/readme.txt b/v02/distinctfiles/md/pywork/mdab/readme.txt new file mode 100644 index 0000000..8fb1122 --- /dev/null +++ b/v02/distinctfiles/md/pywork/mdab/readme.txt @@ -0,0 +1,3 @@ + +# to update, work with abbrev_input.txt +sh redo.sh diff --git a/v02/distinctfiles/md/pywork/mdab/redo.sh b/v02/distinctfiles/md/pywork/mdab/redo.sh new file mode 100644 index 0000000..5972a06 --- /dev/null +++ b/v02/distinctfiles/md/pywork/mdab/redo.sh @@ -0,0 +1,4 @@ +echo "making mdab.sqlite from mdab_input.txt" +sh redo_mdab.sh +echo "moving mdab.sqlite to web/sqlite/" +mv mdab.sqlite ../../web/sqlite/ diff --git a/v02/distinctfiles/md/pywork/mdab/redo_mdab.sh b/v02/distinctfiles/md/pywork/mdab/redo_mdab.sh new file mode 100644 index 0000000..e172248 --- /dev/null +++ b/v02/distinctfiles/md/pywork/mdab/redo_mdab.sh @@ -0,0 +1,5 @@ +echo "remaking mdab.sqlite" +rm mdab.sqlite +sqlite3 mdab.sqlite < mdab.sql +echo "finished remaking mdab.sqlite" +chmod 0755 mdab.sqlite diff --git a/v02/inventory.txt b/v02/inventory.txt index 53202d0..cd2788e 100644 --- a/v02/inventory.txt +++ b/v02/inventory.txt @@ -58,11 +58,11 @@ pw pwg:pywork/make_xml_ls.py:CD ; Later, try to get all abbreviations to be the same, ; so templates can be used. ; also xxx to redo_postxml.sh for abbreviations -ben stc bur cae mw pw pwkvn pwg lan gra ap90 bhs:pywork/${dictlo}ab/${dictlo}ab.sql:CD -ben stc bur cae mw pw pwkvn pwg lan gra ap90 bhs:pywork/${dictlo}ab/${dictlo}ab_input.txt:CD -ben stc bur cae mw pw pwkvn pwg lan gra ap90 bhs:pywork/${dictlo}ab/readme.txt:CD -ben stc bur cae mw pw pwkvn pwg lan gra ap90 bhs:pywork/${dictlo}ab/redo.sh:CD -ben stc bur cae mw pw pwkvn pwg lan gra ap90 bhs:pywork/${dictlo}ab/redo_${dictlo}ab.sh:CD +ben stc bur cae mw pw pwkvn pwg lan gra ap90 bhs md:pywork/${dictlo}ab/${dictlo}ab.sql:CD +ben stc bur cae mw pw pwkvn pwg lan gra ap90 bhs md:pywork/${dictlo}ab/${dictlo}ab_input.txt:CD +ben stc bur cae mw pw pwkvn pwg lan gra ap90 bhs md:pywork/${dictlo}ab/readme.txt:CD +ben stc bur cae mw pw pwkvn pwg lan gra ap90 bhs md:pywork/${dictlo}ab/redo.sh:CD +ben stc bur cae mw pw pwkvn pwg lan gra ap90 bhs md:pywork/${dictlo}ab/redo_${dictlo}ab.sh:CD mw:pywork/${dictlo}ab/check.py:CD ; -------------------------------------------------------------- ; -- literary sources: MW diff --git a/v02/makotemplates/pywork/make_xml.py b/v02/makotemplates/pywork/make_xml.py index 415f679..0df9e8f 100644 --- a/v02/makotemplates/pywork/make_xml.py +++ b/v02/makotemplates/pywork/make_xml.py @@ -268,12 +268,13 @@ def dig_to_xml_specific(x): # change -- to mdash x = re.sub(r'--',u'—',x) # change ‡ to _ (two vowels that will be combined via sandhi) - x = re.sub(u'‡','_',x) + # x = re.sub(u'‡','_',x) + x = re.sub(u'‡','‿',x) # 09-13-2023. u+203f Undertie # remove the ¤ symbol. It brackets some numbers (e.g. ¤2¤) but there # is no obvious typographical feature. x = re.sub(u'¤','',x) - # change X to x - x = re.sub(r'(.*?)',r'\1',x) + # change X to x # 09-14-2023 comment out + # x = re.sub(r'(.*?)',r'\1',x) if divflag: # add divs for - x = re.sub(r'-','
-',x) @@ -687,7 +688,7 @@ def construct_xmlstring(datalines,hwrec): datalines1.append(x) datalines = datalines1 %endif -%if dictlo in ['md','shs','skd','vcp']: +%if dictlo in ['shs','skd','vcp']: for i,x in enumerate(datalines): if i == 0: pass @@ -703,6 +704,19 @@ def construct_xmlstring(datalines,hwrec): datalines1.append(x) datalines = datalines1 %endif +%if dictlo in ['md']: + for i,x in enumerate(datalines): + if i == 0: + pass + elif x.strip() == '': + pass + elif x.startswith('[Page'): + pass + else: + x = '' + x + datalines1.append(x) + datalines = datalines1 +%endif %if dictlo in ['pe','pgn','pui','vei']: for i,x in enumerate(datalines): if i == 0: diff --git a/v02/makotemplates/pywork/redo_postxml.sh b/v02/makotemplates/pywork/redo_postxml.sh index 7625cb4..96954ce 100644 --- a/v02/makotemplates/pywork/redo_postxml.sh +++ b/v02/makotemplates/pywork/redo_postxml.sh @@ -11,7 +11,7 @@ sh redo.sh cd ../ # back to pywork # For applicable dictionaries, update other web/sqlite databases # abbreviations -%if dictlo in ['ben','stc','bur','cae','mw','pw','pwg','lan','gra','ap90','pwkvn','bhs']: +%if dictlo in ['ben','stc','bur','cae','mw','pw','pwg','lan','gra','ap90','pwkvn','bhs','md']: cd ${dictlo}ab sh redo.sh cd ../ # back to pywork