From 23b7d9ad27bcc8501360f881dd99f48a137ff1a4 Mon Sep 17 00:00:00 2001 From: seth-js <83692925+seth-js@users.noreply.github.com> Date: Fri, 20 Dec 2024 03:19:20 -0600 Subject: [PATCH 1/5] improve russian form handling, write tests Fixed Russian forms being improperly marked as lemmas. Wrote tests. --- 3-tidy-up.js | 2 +- data/test/dict/ru/en/tag_bank_1.json | 1 + data/test/dict/ru/en/term_bank_1.json | 201 ++++++++ data/test/dict/ru/en/term_bank_2.json | 598 ++++++++++++++++++++++ data/test/ipa/ru/en/tag_bank_1.json | 1 + data/test/ipa/ru/en/term_meta_bank_1.json | 28 + data/test/kaikki/ru-en.json | 4 + data/test/tidy/ru-en-forms-0.json | 1 + data/test/tidy/ru-en-lemmas.json | 1 + 9 files changed, 836 insertions(+), 1 deletion(-) create mode 100644 data/test/dict/ru/en/tag_bank_1.json create mode 100644 data/test/dict/ru/en/term_bank_1.json create mode 100644 data/test/dict/ru/en/term_bank_2.json create mode 100644 data/test/ipa/ru/en/tag_bank_1.json create mode 100644 data/test/ipa/ru/en/term_meta_bank_1.json create mode 100644 data/test/kaikki/ru-en.json create mode 100644 data/test/tidy/ru-en-forms-0.json create mode 100644 data/test/tidy/ru-en-lemmas.json diff --git a/3-tidy-up.js b/3-tidy-up.js index 509fa30..909be68 100644 --- a/3-tidy-up.js +++ b/3-tidy-up.js @@ -43,7 +43,7 @@ function isInflectionGloss(glosses, formOf) { if(!Array.isArray(formOf)) return false; for (const {word: lemma} of formOf) { if(!lemma) continue; - if (glosses.some(gloss => new RegExp(`of ${escapeRegExp(lemma)}$`).test(gloss))) return true; + if (glosses.some(gloss => new RegExp(`of ${escapeRegExp(lemma)}($| \(.+?\)$)`).test(gloss))) return true; } case 'fr': diff --git a/data/test/dict/ru/en/tag_bank_1.json b/data/test/dict/ru/en/tag_bank_1.json new file mode 100644 index 0000000..a51fa5c --- /dev/null +++ b/data/test/dict/ru/en/tag_bank_1.json @@ -0,0 +1 @@ +[["n","partOfSpeech",-1,"noun",1],["fig","",0,"figuratively",0],["sl","",0,"slang",0],["v","partOfSpeech",-1,"verb",1],["col","",0,"colloquial",0],["impers","",0,"impersonal",0]] \ No newline at end of file diff --git a/data/test/dict/ru/en/term_bank_1.json b/data/test/dict/ru/en/term_bank_1.json new file mode 100644 index 0000000..91df195 --- /dev/null +++ b/data/test/dict/ru/en/term_bank_1.json @@ -0,0 +1,201 @@ +[ + [ + "снег", + "", + "n", + "n", + 0, + [ + { + "type": "structured-content", + "content": [ + { + "tag": "div", + "content": [ + "snow", + { + "tag": "div", + "data": { + "content": "extra-info" + }, + "content": { + "tag": "div", + "data": { + "content": "example-sentence" + }, + "content": [ + { + "tag": "div", + "data": { + "content": "example-sentence-a" + }, + "content": "идёт снег" + }, + { + "tag": "div", + "data": { + "content": "example-sentence-b" + }, + "content": "it is snowing" + } + ] + } + }, + { + "tag": "div", + "data": { + "content": "extra-info" + }, + "content": { + "tag": "div", + "data": { + "content": "example-sentence" + }, + "content": [ + { + "tag": "div", + "data": { + "content": "example-sentence-a" + }, + "content": "мо́крый снег" + }, + { + "tag": "div", + "data": { + "content": "example-sentence-b" + }, + "content": "sleet, wet snow" + } + ] + } + } + ] + } + ] + } + ], + 0, + "" + ], + [ + "снег", + "", + "fig n", + "n", + 0, + [ + { + "type": "structured-content", + "content": [ + { + "tag": "div", + "content": [ + "snow, the white electrical noise on a TV set when there is no TV signal" + ] + } + ] + } + ], + 0, + "" + ], + [ + "снег", + "", + "sl n", + "n", + 0, + [ + { + "type": "structured-content", + "content": [ + { + "tag": "div", + "content": [ + "cocaine" + ] + } + ] + } + ], + 0, + "" + ], + [ + "побелеть", + "побеле́ть", + "v", + "v", + 0, + [ + { + "type": "structured-content", + "content": [ + { + "tag": "div", + "content": [ + "to turn white, (intransitive) to whiten" + ] + } + ] + } + ], + 0, + "" + ], + [ + "побелеть", + "побеле́ть", + "col v", + "v", + 0, + [ + { + "type": "structured-content", + "content": [ + { + "tag": "div", + "content": [ + "to turn gray" + ] + } + ] + }, + { + "type": "structured-content", + "content": [ + { + "tag": "div", + "content": [ + "to become brighter" + ] + } + ] + } + ], + 0, + "" + ], + [ + "побелеть", + "побеле́ть", + "col impers v", + "v", + 0, + [ + { + "type": "structured-content", + "content": [ + { + "tag": "div", + "content": [ + "to dawn" + ] + } + ] + } + ], + 0, + "" + ] +] \ No newline at end of file diff --git a/data/test/dict/ru/en/term_bank_2.json b/data/test/dict/ru/en/term_bank_2.json new file mode 100644 index 0000000..cae509c --- /dev/null +++ b/data/test/dict/ru/en/term_bank_2.json @@ -0,0 +1,598 @@ +[ + [ + "снега", + "сне́га", + "non-lemma", + "", + 0, + [ + [ + "снег", + [ + "genitive" + ] + ], + [ + "снег", + [ + "genitive", + "singular" + ] + ] + ], + 0, + "" + ], + [ + "снега", + "снега́", + "non-lemma", + "", + 0, + [ + [ + "снег", + [ + "accusative", + "plural", + "irregular" + ] + ], + [ + "снег", + [ + "nominative", + "plural" + ] + ], + [ + "снег", + [ + "nominative", + "plural", + "irregular" + ] + ] + ], + 0, + "" + ], + [ + "снегов", + "снего́в", + "non-lemma", + "", + 0, + [ + [ + "снег", + [ + "genitive", + "plural" + ] + ] + ], + 0, + "" + ], + [ + "снежный", + "сне́жный", + "non-lemma", + "", + 0, + [ + [ + "снег", + [ + "adjective", + "relational" + ] + ] + ], + 0, + "" + ], + [ + "снеговой", + "снегово́й", + "non-lemma", + "", + 0, + [ + [ + "снег", + [ + "adjective", + "relational" + ] + ] + ], + 0, + "" + ], + [ + "снежок", + "снежо́к", + "non-lemma", + "", + 0, + [ + [ + "снег", + [ + "diminutive" + ] + ] + ], + 0, + "" + ], + [ + "снегу", + "сне́гу", + "non-lemma", + "", + 0, + [ + [ + "снег", + [ + "singular", + "partitive" + ] + ], + [ + "снег", + [ + "dative", + "singular" + ] + ] + ], + 0, + "" + ], + [ + "снегам", + "снега́м", + "non-lemma", + "", + 0, + [ + [ + "снег", + [ + "dative", + "plural" + ] + ] + ], + 0, + "" + ], + [ + "снег", + "сне́г", + "non-lemma", + "", + 0, + [ + [ + "снег", + [ + "accusative", + "singular" + ] + ] + ], + 0, + "" + ], + [ + "снегом", + "сне́гом", + "non-lemma", + "", + 0, + [ + [ + "снег", + [ + "instrumental", + "singular" + ] + ] + ], + 0, + "" + ], + [ + "снегами", + "снега́ми", + "non-lemma", + "", + 0, + [ + [ + "снег", + [ + "instrumental", + "plural" + ] + ] + ], + 0, + "" + ], + [ + "снеге", + "сне́ге", + "non-lemma", + "", + 0, + [ + [ + "снег", + [ + "singular", + "prepositional" + ] + ] + ], + 0, + "" + ], + [ + "снегах", + "снега́х", + "non-lemma", + "", + 0, + [ + [ + "снег", + [ + "plural", + "prepositional" + ] + ] + ], + 0, + "" + ], + [ + "снегу", + "снегу́", + "non-lemma", + "", + 0, + [ + [ + "снег", + [ + "locative", + "singular" + ] + ] + ], + 0, + "" + ], + [ + "белеть", + "беле́ть", + "non-lemma", + "", + 0, + [ + [ + "побеле́ть", + [ + "imperfective" + ] + ] + ], + 0, + "" + ], + [ + "побелевший", + "побеле́вший", + "non-lemma", + "", + 0, + [ + [ + "побеле́ть", + [ + "past", + "active", + "participle" + ] + ] + ], + 0, + "" + ], + [ + "побелев", + "побеле́в", + "non-lemma", + "", + 0, + [ + [ + "побеле́ть", + [ + "past", + "participle", + "adverbial" + ] + ] + ], + 0, + "" + ], + [ + "побелевши", + "побеле́вши", + "non-lemma", + "", + 0, + [ + [ + "побеле́ть", + [ + "past", + "participle", + "adverbial" + ] + ] + ], + 0, + "" + ], + [ + "побелею", + "побеле́ю", + "non-lemma", + "", + 0, + [ + [ + "побеле́ть", + [ + "first-person", + "singular", + "future" + ] + ] + ], + 0, + "" + ], + [ + "побелеешь", + "побеле́ешь", + "non-lemma", + "", + 0, + [ + [ + "побеле́ть", + [ + "second-person", + "singular", + "future" + ] + ] + ], + 0, + "" + ], + [ + "побелеет", + "побеле́ет", + "non-lemma", + "", + 0, + [ + [ + "побеле́ть", + [ + "third-person", + "singular", + "future" + ] + ] + ], + 0, + "" + ], + [ + "побелеем", + "побеле́ем", + "non-lemma", + "", + 0, + [ + [ + "побеле́ть", + [ + "first-person", + "plural", + "future" + ] + ] + ], + 0, + "" + ], + [ + "побелеете", + "побеле́ете", + "non-lemma", + "", + 0, + [ + [ + "побеле́ть", + [ + "second-person", + "plural", + "future" + ] + ] + ], + 0, + "" + ], + [ + "побелеют", + "побеле́ют", + "non-lemma", + "", + 0, + [ + [ + "побеле́ть", + [ + "third-person", + "plural", + "future" + ] + ] + ], + 0, + "" + ], + [ + "побелей", + "побеле́й", + "non-lemma", + "", + 0, + [ + [ + "побеле́ть", + [ + "singular", + "imperative" + ] + ] + ], + 0, + "" + ], + [ + "побелейте", + "побеле́йте", + "non-lemma", + "", + 0, + [ + [ + "побеле́ть", + [ + "plural", + "imperative" + ] + ] + ], + 0, + "" + ], + [ + "побелел", + "побеле́л", + "non-lemma", + "", + 0, + [ + [ + "побеле́ть", + [ + "singular", + "masculine", + "past" + ] + ] + ], + 0, + "" + ], + [ + "побелели", + "побеле́ли", + "non-lemma", + "", + 0, + [ + [ + "побеле́ть", + [ + "plural", + "feminine", + "past" + ] + ], + [ + "побеле́ть", + [ + "plural", + "masculine", + "past" + ] + ], + [ + "побеле́ть", + [ + "plural", + "neuter", + "past" + ] + ] + ], + 0, + "" + ], + [ + "побелела", + "побеле́ла", + "non-lemma", + "", + 0, + [ + [ + "побеле́ть", + [ + "singular", + "feminine", + "past" + ] + ] + ], + 0, + "" + ], + [ + "побелело", + "побеле́ло", + "non-lemma", + "", + 0, + [ + [ + "побеле́ть", + [ + "singular", + "neuter", + "past" + ] + ] + ], + 0, + "" + ] +] \ No newline at end of file diff --git a/data/test/ipa/ru/en/tag_bank_1.json b/data/test/ipa/ru/en/tag_bank_1.json new file mode 100644 index 0000000..0637a08 --- /dev/null +++ b/data/test/ipa/ru/en/tag_bank_1.json @@ -0,0 +1 @@ +[] \ No newline at end of file diff --git a/data/test/ipa/ru/en/term_meta_bank_1.json b/data/test/ipa/ru/en/term_meta_bank_1.json new file mode 100644 index 0000000..fea77d7 --- /dev/null +++ b/data/test/ipa/ru/en/term_meta_bank_1.json @@ -0,0 +1,28 @@ +[ + [ + "снег", + "ipa", + { + "reading": "снег", + "transcriptions": [ + { + "ipa": "[sʲnʲek]", + "tags": [] + } + ] + } + ], + [ + "побелеть", + "ipa", + { + "reading": "побеле́ть", + "transcriptions": [ + { + "ipa": "[pəbʲɪˈlʲetʲ]", + "tags": [] + } + ] + } + ] +] \ No newline at end of file diff --git a/data/test/kaikki/ru-en.json b/data/test/kaikki/ru-en.json new file mode 100644 index 0000000..3c3d5fc --- /dev/null +++ b/data/test/kaikki/ru-en.json @@ -0,0 +1,4 @@ +{"pos":"noun","head_templates":[{"name":"ru-noun+","args":{"1":"c","2":"","3":"(1)","par":"сне́гу","loc":"снегу́","adj":"сне́жный","dim":"снежо́к","adj2":"снегово́й"},"expansion":"снег • (sneg) m inan (genitive сне́га, nominative plural снега́, genitive plural снего́в, relational adjective сне́жный or снегово́й, diminutive снежо́к)"}],"forms":[{"form":"sneg","tags":["romanization"]},{"form":"сне́га","tags":["genitive"]},{"form":"снега́","tags":["nominative","plural"]},{"form":"снего́в","tags":["genitive","plural"]},{"form":"сне́жный","tags":["adjective","relational"]},{"form":"снегово́й","tags":["adjective","relational"]},{"form":"снежо́к","tags":["diminutive"]},{"form":"no-table-tags","source":"declension","tags":["table-tags"]},{"form":"ru-noun-table","source":"declension","tags":["inflection-template"]},{"form":"velar-stem","source":"declension","tags":["class"]},{"form":"accent-c","source":"declension","tags":["class"]},{"form":"сне́г","tags":["nominative","singular"],"source":"declension","roman":"snég"},{"form":"снега́","tags":["irregular","nominative","plural"],"source":"declension","roman":"snegá"},{"form":"сне́га","tags":["genitive","singular"],"source":"declension","roman":"snéga"},{"form":"снего́в","tags":["genitive","plural"],"source":"declension","roman":"snegóv"},{"form":"сне́гу","tags":["dative","singular"],"source":"declension","roman":"snégu"},{"form":"снега́м","tags":["dative","plural"],"source":"declension","roman":"snegám"},{"form":"сне́г","tags":["accusative","singular"],"source":"declension","roman":"snég"},{"form":"снега́","tags":["accusative","irregular","plural"],"source":"declension","roman":"snegá"},{"form":"сне́гом","tags":["instrumental","singular"],"source":"declension","roman":"snégom"},{"form":"снега́ми","tags":["instrumental","plural"],"source":"declension","roman":"snegámi"},{"form":"сне́ге","tags":["prepositional","singular"],"source":"declension","roman":"snége"},{"form":"снега́х","tags":["plural","prepositional"],"source":"declension","roman":"snegáx"},{"form":"сне́гу","tags":["partitive","singular"],"source":"declension","roman":"snégu"},{"form":"снегу́","tags":["locative","singular"],"source":"declension","roman":"snegú"}],"sounds":[{"ipa":"[sʲnʲek]"},{"audio":"Ru-снег.ogg","ogg_url":"https://upload.wikimedia.org/wikipedia/commons/0/01/Ru-%D1%81%D0%BD%D0%B5%D0%B3.ogg","mp3_url":"https://upload.wikimedia.org/wikipedia/commons/transcoded/0/01/Ru-%D1%81%D0%BD%D0%B5%D0%B3.ogg/Ru-%D1%81%D0%BD%D0%B5%D0%B3.ogg.mp3"}],"etymology_text":"Inherited from Proto-Slavic *sněgъ, from Proto-Balto-Slavic *snáigas, from Proto-Indo-European *snóygʷʰos.","etymology_templates":[{"name":"root","args":{"1":"ru","2":"ine-pro","3":"*sneygʷʰ-"},"expansion":""},{"name":"glossary","args":{"1":"Inherited"},"expansion":"Inherited"},{"name":"inh","args":{"1":"ru","2":"sla-pro","3":"*sněgъ","4":"","5":"","lit":"","pos":"","tr":"","ts":"","id":"","sc":"","g":"","g2":"","g3":"","nocat":"","sort":""},"expansion":"Proto-Slavic *sněgъ"},{"name":"inh+","args":{"1":"ru","2":"sla-pro","3":"*sněgъ"},"expansion":"Inherited from Proto-Slavic *sněgъ"},{"name":"inh","args":{"1":"ru","2":"ine-bsl-pro","3":"*snáigas"},"expansion":"Proto-Balto-Slavic *snáigas"},{"name":"inh","args":{"1":"ru","2":"ine-pro","3":"*snóygʷʰos"},"expansion":"Proto-Indo-European *snóygʷʰos"}],"word":"снег","lang":"Russian","lang_code":"ru","synonyms":[{"roman":"sněg","word":"снѣгъ — Pre-reform orthography (1918)","_dis1":"0 0 0"}],"holonyms":[{"roman":"burán","word":"бура́н","_dis1":"0 0 0"},{"roman":"vʹjúga","word":"вью́га","_dis1":"0 0 0"},{"roman":"nast","word":"наст","_dis1":"0 0 0"},{"roman":"metélʹ","word":"мете́ль","_dis1":"0 0 0"},{"roman":"pozjómka","word":"позёмка","_dis1":"0 0 0"},{"roman":"poróša","word":"поро́ша","_dis1":"0 0 0"},{"roman":"snegovík","word":"снегови́к","_dis1":"0 0 0"},{"roman":"snéžnaja bába","word":"сне́жная ба́ба","_dis1":"0 0 0"},{"roman":"snežók","word":"снежо́к","_dis1":"0 0 0"},{"roman":"sugrób","word":"сугро́б","_dis1":"0 0 0"}],"meronyms":[{"roman":"snežínka","word":"снежи́нка","_dis1":"0 0 0"}],"derived":[{"tags":["masculine"],"roman":"podsnéžnik","word":"подсне́жник","_dis1":"0 0 0"},{"tags":["animate","masculine"],"roman":"snegovík","word":"снегови́к","_dis1":"0 0 0"},{"tags":["masculine"],"roman":"snegopád","word":"снегопа́д","_dis1":"0 0 0"},{"tags":["animate","feminine"],"roman":"Snegúročka","word":"Снегу́рочка","_dis1":"0 0 0"},{"tags":["imperfective"],"roman":"snežítʹ","word":"снежи́ть","_dis1":"0 0 0"},{"tags":["feminine"],"roman":"snežínka","word":"снежи́нка","_dis1":"0 0 0"},{"tags":["masculine"],"roman":"snegostúp","word":"снегосту́п","_dis1":"0 0 0"},{"tags":["masculine"],"roman":"snegoxód","word":"снегохо́д","_dis1":"0 0 0"}],"related":[{"tags":["animate","masculine"],"roman":"snegírʹ","word":"снеги́рь","_dis1":"0 0 0"}],"senses":[{"examples":[{"text":"идёт снег","english":"it is snowing","type":"example","roman":"idjót sneg"},{"text":"мо́крый снег","english":"sleet, wet snow","type":"example","roman":"mókryj sneg"},{"text":"пе́рвый снег","english":"the first snow","type":"example","roman":"pérvyj sneg"},{"text":"ски́дывать/ски́нуть снег с кры́ши","english":"to throw the snow off the roof","type":"example","roman":"skídyvatʹ/skínutʹ sneg s krýši"},{"text":"как снег на го́лову (saying)\nkak sneg na gólovu\nlike a bolt from the blue (unexpectedly, suddenly)\n(literally, “like snow on the head”)","type":"example"},{"text":"что́-либо ну́жно, как прошлого́дний снег\nštó-libo núžno, kak prošlogódnij sneg\nsomething is not needed in the least\n(literally, “something is needed like last year's snow”)","type":"example"},{"text":"зимо́й сне́га не вы́просить (saying)\nzimój snéga ne výprositʹ\n(said about someone mean, stingy)\n(literally, “cannot be asked for snow in winter”)","type":"example"},{"text":"Дава́й поваля́емся в снегу́!","english":"Let's roll in the snow!","type":"example","roman":"Daváj povaljájemsja v snegú!"}],"links":[["snow","snow"]],"glosses":["snow"],"id":"en-снег-ru-noun-p0YiLwnY","categories":[{"name":"Russian nouns with accent pattern c","kind":"other","parents":[],"source":"w+disamb","_dis":"33 33 33"},{"name":"Russian nouns with partitive singular","kind":"other","parents":[],"source":"w+disamb","_dis":"31 37 31"},{"name":"Snow","kind":"topical","parents":["Water","Weather","Liquids","Atmosphere","Matter","Nature","Chemistry","All topics","Sciences","Fundamental"],"source":"w+disamb","orig":"ru:Snow","langcode":"ru","_dis":"67 32 1"}]},{"links":[["snow","snow"]],"raw_glosses":["(figuratively) snow, the white electrical noise on a TV set when there is no TV signal"],"glosses":["snow, the white electrical noise on a TV set when there is no TV signal"],"tags":["figuratively"],"id":"en-снег-ru-noun-mEgWUvyE","categories":[{"name":"Pages with 4 entries","kind":"other","parents":[],"source":"w+disamb","_dis":"41 41 1 13 1 1 3 1"},{"name":"Pages with entries","kind":"other","parents":[],"source":"w+disamb","_dis":"39 39 1 18 1 1 2 1"},{"name":"Russian entries with incorrect language header","kind":"other","parents":["Entries with incorrect language header","Entry maintenance"],"source":"w+disamb","_dis":"2 96 2"},{"name":"Russian links with redundant wikilinks","kind":"other","parents":["Links with redundant wikilinks","Entry maintenance"],"source":"w+disamb","_dis":"3 93 3"},{"name":"Russian nouns ending in a consonant with plural -а","kind":"other","parents":[],"source":"w+disamb","_dis":"8 85 8"},{"name":"Russian nouns with accent pattern c","kind":"other","parents":[],"source":"w+disamb","_dis":"33 33 33"},{"name":"Russian nouns with irregular nominative plural","kind":"other","parents":[],"source":"w+disamb","_dis":"28 44 28"},{"name":"Russian nouns with locative singular","kind":"other","parents":[],"source":"w+disamb","_dis":"29 43 29"},{"name":"Russian nouns with partitive singular","kind":"other","parents":[],"source":"w+disamb","_dis":"31 37 31"}]},{"links":[["cocaine","cocaine"]],"raw_glosses":["(slang) cocaine"],"glosses":["cocaine"],"tags":["slang"],"id":"en-снег-ru-noun-OcetgAxX","categories":[{"name":"Russian nouns with accent pattern c","kind":"other","parents":[],"source":"w+disamb","_dis":"33 33 33"},{"name":"Russian nouns with partitive singular","kind":"other","parents":[],"source":"w+disamb","_dis":"31 37 31"}]}]} +{"pos":"verb","head_templates":[{"name":"head","args":{"1":"ru","2":"verb form","head":"возни́к"},"expansion":"возни́к • (vozník)"}],"forms":[{"form":"возни́к","tags":["canonical"]},{"form":"vozník","tags":["romanization"]}],"word":"возник","lang":"Russian","lang_code":"ru","sounds":[{"ipa":"[vɐzʲˈnʲik]"}],"senses":[{"links":[["возни́кнуть","возникнуть#Russian"]],"glosses":["short masculine singular past indicative perfective of возни́кнуть (vozníknutʹ)"],"tags":["form-of","indicative","masculine","past","perfective","short-form","singular"],"form_of":[{"word":"возни́кнуть","extra":"vozníknutʹ"}],"id":"en-возник-ru-verb-5MxEK0j6","categories":[{"name":"Pages with 1 entry","kind":"other","parents":[],"source":"w"},{"name":"Pages with entries","kind":"other","parents":[],"source":"w"},{"name":"Russian entries with incorrect language header","kind":"other","parents":["Entries with incorrect language header","Entry maintenance"],"source":"w"}]}]} +{"pos":"noun","head_templates":[{"name":"head","args":{"1":"ru","2":"noun form","g":"f-in"},"expansion":"простынёй • (prostynjój) f inan"}],"forms":[{"form":"prostynjój","tags":["romanization"]}],"word":"простынёй","lang":"Russian","lang_code":"ru","sounds":[{"ipa":"[prəstɨˈnʲɵj]"}],"senses":[{"links":[["простыня́","простыня#Russian"]],"glosses":["instrumental singular of простыня́ (prostynjá)"],"tags":["form-of","instrumental","singular"],"form_of":[{"word":"простыня́","extra":"prostynjá"}],"id":"en-простынёй-ru-noun-BAgg9vrw","categories":[{"name":"Pages with 1 entry","kind":"other","parents":[],"source":"w"},{"name":"Pages with entries","kind":"other","parents":[],"source":"w"},{"name":"Russian entries with incorrect language header","kind":"other","parents":["Entries with incorrect language header","Entry maintenance"],"source":"w"}]}]} +{"pos":"verb","head_templates":[{"name":"ru-verb","args":{"1":"побеле́ть","2":"pf","impf":"беле́ть"},"expansion":"побеле́ть • (pobelétʹ) pf (imperfective беле́ть)"}],"forms":[{"form":"побеле́ть","tags":["canonical"]},{"form":"pobelétʹ","tags":["romanization"]},{"form":"беле́ть","tags":["imperfective"]},{"form":"intransitive perfective","source":"conjugation","tags":["table-tags"]},{"form":"ru-conj","source":"conjugation","tags":["inflection-template"]},{"form":"1a perfective intransitive","source":"conjugation","tags":["class"]},{"form":"побеле́ть","tags":["infinitive","perfective"],"source":"conjugation","roman":"pobelétʹ"},{"form":"-","tags":["active","participle","present"],"source":"conjugation"},{"form":"побеле́вший","tags":["active","participle","past"],"source":"conjugation","roman":"pobelévšij"},{"form":"-","tags":["participle","passive","present"],"source":"conjugation"},{"form":"-","tags":["participle","passive","past"],"source":"conjugation"},{"form":"-","tags":["adverbial","participle","present"],"source":"conjugation"},{"form":"побеле́в","tags":["adverbial","participle","past"],"source":"conjugation","roman":"pobelév"},{"form":"побеле́вши","tags":["adverbial","participle","past"],"source":"conjugation","roman":"pobelévši"},{"form":"-","tags":["first-person","present","singular"],"source":"conjugation"},{"form":"побеле́ю","tags":["first-person","future","singular"],"source":"conjugation","roman":"pobeléju"},{"form":"-","tags":["present","second-person","singular"],"source":"conjugation"},{"form":"побеле́ешь","tags":["future","second-person","singular"],"source":"conjugation","roman":"pobeléješʹ"},{"form":"-","tags":["present","singular","third-person"],"source":"conjugation"},{"form":"побеле́ет","tags":["future","singular","third-person"],"source":"conjugation","roman":"pobeléjet"},{"form":"-","tags":["first-person","plural","present"],"source":"conjugation"},{"form":"побеле́ем","tags":["first-person","future","plural"],"source":"conjugation","roman":"pobeléjem"},{"form":"-","tags":["plural","present","second-person"],"source":"conjugation"},{"form":"побеле́ете","tags":["future","plural","second-person"],"source":"conjugation","roman":"pobeléjete"},{"form":"-","tags":["plural","present","third-person"],"source":"conjugation"},{"form":"побеле́ют","tags":["future","plural","third-person"],"source":"conjugation","roman":"pobeléjut"},{"form":"побеле́й","tags":["imperative","singular"],"source":"conjugation","roman":"pobeléj"},{"form":"побеле́йте","tags":["imperative","plural"],"source":"conjugation","roman":"pobeléjte"},{"form":"побеле́л","tags":["masculine","past","singular"],"source":"conjugation","roman":"pobelél"},{"form":"побеле́ли","tags":["masculine","past","plural"],"source":"conjugation","roman":"pobeléli"},{"form":"побеле́ла","tags":["feminine","past","singular"],"source":"conjugation","roman":"pobeléla"},{"form":"побеле́ли","tags":["feminine","past","plural"],"source":"conjugation","roman":"pobeléli"},{"form":"побеле́ло","tags":["neuter","past","singular"],"source":"conjugation","roman":"pobelélo"},{"form":"побеле́ли","tags":["neuter","past","plural"],"source":"conjugation","roman":"pobeléli"}],"inflection_templates":[{"name":"ru-conj","args":{"1":"pf-intr","2":"1a","3":"побеле́ть"}}],"sounds":[{"ipa":"[pəbʲɪˈlʲetʲ]"},{"audio":"LL-Q7737 (rus)-Tatiana Kerbush-побелеть.wav","ogg_url":"https://upload.wikimedia.org/wikipedia/commons/transcoded/2/24/LL-Q7737_%28rus%29-Tatiana_Kerbush-%D0%BF%D0%BE%D0%B1%D0%B5%D0%BB%D0%B5%D1%82%D1%8C.wav/LL-Q7737_%28rus%29-Tatiana_Kerbush-%D0%BF%D0%BE%D0%B1%D0%B5%D0%BB%D0%B5%D1%82%D1%8C.wav.ogg","mp3_url":"https://upload.wikimedia.org/wikipedia/commons/transcoded/2/24/LL-Q7737_%28rus%29-Tatiana_Kerbush-%D0%BF%D0%BE%D0%B1%D0%B5%D0%BB%D0%B5%D1%82%D1%8C.wav/LL-Q7737_%28rus%29-Tatiana_Kerbush-%D0%BF%D0%BE%D0%B1%D0%B5%D0%BB%D0%B5%D1%82%D1%8C.wav.mp3"}],"etymology_text":"по- (po-) + беле́ть (belétʹ)","etymology_templates":[{"name":"af","args":{"1":"ru","2":"по-","3":"беле́ть"},"expansion":"по- (po-) + беле́ть (belétʹ)"}],"word":"побелеть","lang":"Russian","lang_code":"ru","related":[{"roman":"bélyj","word":"бе́лый","_dis1":"0 0 0 0"},{"roman":"belítʹ","word":"бели́ть","_dis1":"0 0 0 0"},{"roman":"belíla","word":"бели́ла","_dis1":"0 0 0 0"},{"roman":"beljósyj","word":"белёсый","_dis1":"0 0 0 0"},{"roman":"belovátyj","word":"белова́тый","_dis1":"0 0 0 0"},{"roman":"belók","word":"бело́к","_dis1":"0 0 0 0"}],"senses":[{"links":[["turn","turn"],["white","white"],["whiten","whiten"]],"synonyms":[{"word":"побледне́ть"}],"glosses":["to turn white, (intransitive) to whiten"],"id":"en-побелеть-ru-verb-CmNvFNei","categories":[{"name":"Russian terms prefixed with по-","kind":"other","parents":[],"source":"w+disamb","_dis":"27 23 27 23"}]},{"links":[["turn","turn"],["gray","gray"]],"synonyms":[{"word":"поседе́ть"}],"raw_glosses":["(colloquial) to turn gray"],"glosses":["to turn gray"],"tags":["colloquial"],"id":"en-побелеть-ru-verb-5awyn6OU","categories":[{"name":"Russian terms prefixed with по-","kind":"other","parents":[],"source":"w+disamb","_dis":"27 23 27 23"}]},{"links":[["brighter","brighter"]],"synonyms":[{"word":"посветле́ть"}],"raw_glosses":["(colloquial) to become brighter"],"glosses":["to become brighter"],"tags":["colloquial"],"id":"en-побелеть-ru-verb-srmrilyR","categories":[{"name":"Pages with 1 entry","kind":"other","parents":[],"source":"w+disamb","_dis":"12 7 74 7"},{"name":"Pages with entries","kind":"other","parents":[],"source":"w+disamb","_dis":"7 6 81 6"},{"name":"Russian entries with incorrect language header","kind":"other","parents":["Entries with incorrect language header","Entry maintenance"],"source":"w+disamb","_dis":"13 5 77 5"},{"name":"Russian terms prefixed with по-","kind":"other","parents":[],"source":"w+disamb","_dis":"27 23 27 23"}]},{"links":[["dawn","dawn"]],"synonyms":[{"word":"рассвести́"},{"word":"свести́"}],"raw_glosses":["(colloquial, impersonal) to dawn"],"glosses":["to dawn"],"tags":["colloquial","impersonal"],"id":"en-побелеть-ru-verb-DJUq4lnp","categories":[{"name":"Russian terms prefixed with по-","kind":"other","parents":[],"source":"w+disamb","_dis":"27 23 27 23"}]}]} \ No newline at end of file diff --git a/data/test/tidy/ru-en-forms-0.json b/data/test/tidy/ru-en-forms-0.json new file mode 100644 index 0000000..b7a505e --- /dev/null +++ b/data/test/tidy/ru-en-forms-0.json @@ -0,0 +1 @@ +{"_type":"map","map":[["снег",{"_type":"map","map":[["сне́га",{"_type":"map","map":[["noun",["genitive","genitive singular"]]]}],["снега́",{"_type":"map","map":[["noun",["accusative plural irregular","nominative plural","nominative plural irregular"]]]}],["снего́в",{"_type":"map","map":[["noun",["genitive plural"]]]}],["сне́жный",{"_type":"map","map":[["noun",["adjective relational"]]]}],["снегово́й",{"_type":"map","map":[["noun",["adjective relational"]]]}],["снежо́к",{"_type":"map","map":[["noun",["diminutive"]]]}],["сне́гу",{"_type":"map","map":[["noun",["singular partitive","dative singular"]]]}],["снега́м",{"_type":"map","map":[["noun",["dative plural"]]]}],["сне́г",{"_type":"map","map":[["noun",["accusative singular"]]]}],["сне́гом",{"_type":"map","map":[["noun",["instrumental singular"]]]}],["снега́ми",{"_type":"map","map":[["noun",["instrumental plural"]]]}],["сне́ге",{"_type":"map","map":[["noun",["singular prepositional"]]]}],["снега́х",{"_type":"map","map":[["noun",["plural prepositional"]]]}],["снегу́",{"_type":"map","map":[["noun",["locative singular"]]]}]]}],["побеле́ть",{"_type":"map","map":[["беле́ть",{"_type":"map","map":[["verb",["imperfective"]]]}],["побеле́вший",{"_type":"map","map":[["verb",["past active participle"]]]}],["побеле́в",{"_type":"map","map":[["verb",["past participle adverbial"]]]}],["побеле́вши",{"_type":"map","map":[["verb",["past participle adverbial"]]]}],["побеле́ю",{"_type":"map","map":[["verb",["first-person singular future"]]]}],["побеле́ешь",{"_type":"map","map":[["verb",["second-person singular future"]]]}],["побеле́ет",{"_type":"map","map":[["verb",["third-person singular future"]]]}],["побеле́ем",{"_type":"map","map":[["verb",["first-person plural future"]]]}],["побеле́ете",{"_type":"map","map":[["verb",["second-person plural future"]]]}],["побеле́ют",{"_type":"map","map":[["verb",["third-person plural future"]]]}],["побеле́й",{"_type":"map","map":[["verb",["singular imperative"]]]}],["побеле́йте",{"_type":"map","map":[["verb",["plural imperative"]]]}],["побеле́л",{"_type":"map","map":[["verb",["singular masculine past"]]]}],["побеле́ли",{"_type":"map","map":[["verb",["plural feminine past","plural masculine past","plural neuter past"]]]}],["побеле́ла",{"_type":"map","map":[["verb",["singular feminine past"]]]}],["побеле́ло",{"_type":"map","map":[["verb",["singular neuter past"]]]}]]}]]} \ No newline at end of file diff --git a/data/test/tidy/ru-en-lemmas.json b/data/test/tidy/ru-en-lemmas.json new file mode 100644 index 0000000..08b049f --- /dev/null +++ b/data/test/tidy/ru-en-lemmas.json @@ -0,0 +1 @@ +{"снег":{"снег":{"noun":{"0":{"ipa":[{"ipa":"[sʲnʲek]","tags":[]}],"glossTree":{"_type":"map","map":[["snow",{"_type":"map","map":[["_tags",[]],["_examples",[{"text":"идёт снег","translation":"it is snowing"},{"text":"мо́крый снег","translation":"sleet, wet snow"}]]]}],["(figuratively) snow, the white electrical noise on a TV set when there is no TV signal",{"_type":"map","map":[["_tags",["figuratively"]],["_examples",[]]]}],["(slang) cocaine",{"_type":"map","map":[["_tags",["slang"]],["_examples",[]]]}]]}}}}},"побеле́ть":{"побеле́ть":{"verb":{"0":{"ipa":[{"ipa":"[pəbʲɪˈlʲetʲ]","tags":[]}],"glossTree":{"_type":"map","map":[["to turn white, (intransitive) to whiten",{"_type":"map","map":[["_tags",[]],["_examples",[]]]}],["(colloquial) to turn gray",{"_type":"map","map":[["_tags",["colloquial"]],["_examples",[]]]}],["(colloquial) to become brighter",{"_type":"map","map":[["_tags",["colloquial"]],["_examples",[]]]}],["(colloquial, impersonal) to dawn",{"_type":"map","map":[["_tags",["colloquial","impersonal"]],["_examples",[]]]}]]}}}}}} \ No newline at end of file From 8c9bfef5cc09769cae3a9f9782e84a4ce9ac1b1f Mon Sep 17 00:00:00 2001 From: seth-js <83692925+seth-js@users.noreply.github.com> Date: Fri, 20 Dec 2024 03:25:21 -0600 Subject: [PATCH 2/5] add missing common russian case, write tests --- data/language/tag_order.json | 3 +- data/test/dict/ru/en/tag_bank_1.json | 45 ++- data/test/dict/ru/en/term_bank_2.json | 8 +- data/test/tidy/ru-en-forms-0.json | 448 +++++++++++++++++++++++++- data/test/tidy/ru-en-lemmas.json | 173 +++++++++- 5 files changed, 669 insertions(+), 8 deletions(-) diff --git a/data/language/tag_order.json b/data/language/tag_order.json index 4d9a314..f5a7321 100644 --- a/data/language/tag_order.json +++ b/data/language/tag_order.json @@ -10,7 +10,8 @@ "accusative", "vocative", "locative", - "instrumental" + "instrumental", + "prepositional" ], "persons": [ "first-person", diff --git a/data/test/dict/ru/en/tag_bank_1.json b/data/test/dict/ru/en/tag_bank_1.json index a51fa5c..7dc72db 100644 --- a/data/test/dict/ru/en/tag_bank_1.json +++ b/data/test/dict/ru/en/tag_bank_1.json @@ -1 +1,44 @@ -[["n","partOfSpeech",-1,"noun",1],["fig","",0,"figuratively",0],["sl","",0,"slang",0],["v","partOfSpeech",-1,"verb",1],["col","",0,"colloquial",0],["impers","",0,"impersonal",0]] \ No newline at end of file +[ + [ + "n", + "partOfSpeech", + -1, + "noun", + 1 + ], + [ + "fig", + "", + 0, + "figuratively", + 0 + ], + [ + "sl", + "", + 0, + "slang", + 0 + ], + [ + "v", + "partOfSpeech", + -1, + "verb", + 1 + ], + [ + "col", + "", + 0, + "colloquial", + 0 + ], + [ + "impers", + "", + 0, + "impersonal", + 0 + ] +] \ No newline at end of file diff --git a/data/test/dict/ru/en/term_bank_2.json b/data/test/dict/ru/en/term_bank_2.json index cae509c..6e3484e 100644 --- a/data/test/dict/ru/en/term_bank_2.json +++ b/data/test/dict/ru/en/term_bank_2.json @@ -235,8 +235,8 @@ [ "снег", [ - "singular", - "prepositional" + "prepositional", + "singular" ] ] ], @@ -253,8 +253,8 @@ [ "снег", [ - "plural", - "prepositional" + "prepositional", + "plural" ] ] ], diff --git a/data/test/tidy/ru-en-forms-0.json b/data/test/tidy/ru-en-forms-0.json index b7a505e..f246c3b 100644 --- a/data/test/tidy/ru-en-forms-0.json +++ b/data/test/tidy/ru-en-forms-0.json @@ -1 +1,447 @@ -{"_type":"map","map":[["снег",{"_type":"map","map":[["сне́га",{"_type":"map","map":[["noun",["genitive","genitive singular"]]]}],["снега́",{"_type":"map","map":[["noun",["accusative plural irregular","nominative plural","nominative plural irregular"]]]}],["снего́в",{"_type":"map","map":[["noun",["genitive plural"]]]}],["сне́жный",{"_type":"map","map":[["noun",["adjective relational"]]]}],["снегово́й",{"_type":"map","map":[["noun",["adjective relational"]]]}],["снежо́к",{"_type":"map","map":[["noun",["diminutive"]]]}],["сне́гу",{"_type":"map","map":[["noun",["singular partitive","dative singular"]]]}],["снега́м",{"_type":"map","map":[["noun",["dative plural"]]]}],["сне́г",{"_type":"map","map":[["noun",["accusative singular"]]]}],["сне́гом",{"_type":"map","map":[["noun",["instrumental singular"]]]}],["снега́ми",{"_type":"map","map":[["noun",["instrumental plural"]]]}],["сне́ге",{"_type":"map","map":[["noun",["singular prepositional"]]]}],["снега́х",{"_type":"map","map":[["noun",["plural prepositional"]]]}],["снегу́",{"_type":"map","map":[["noun",["locative singular"]]]}]]}],["побеле́ть",{"_type":"map","map":[["беле́ть",{"_type":"map","map":[["verb",["imperfective"]]]}],["побеле́вший",{"_type":"map","map":[["verb",["past active participle"]]]}],["побеле́в",{"_type":"map","map":[["verb",["past participle adverbial"]]]}],["побеле́вши",{"_type":"map","map":[["verb",["past participle adverbial"]]]}],["побеле́ю",{"_type":"map","map":[["verb",["first-person singular future"]]]}],["побеле́ешь",{"_type":"map","map":[["verb",["second-person singular future"]]]}],["побеле́ет",{"_type":"map","map":[["verb",["third-person singular future"]]]}],["побеле́ем",{"_type":"map","map":[["verb",["first-person plural future"]]]}],["побеле́ете",{"_type":"map","map":[["verb",["second-person plural future"]]]}],["побеле́ют",{"_type":"map","map":[["verb",["third-person plural future"]]]}],["побеле́й",{"_type":"map","map":[["verb",["singular imperative"]]]}],["побеле́йте",{"_type":"map","map":[["verb",["plural imperative"]]]}],["побеле́л",{"_type":"map","map":[["verb",["singular masculine past"]]]}],["побеле́ли",{"_type":"map","map":[["verb",["plural feminine past","plural masculine past","plural neuter past"]]]}],["побеле́ла",{"_type":"map","map":[["verb",["singular feminine past"]]]}],["побеле́ло",{"_type":"map","map":[["verb",["singular neuter past"]]]}]]}]]} \ No newline at end of file +{ + "_type": "map", + "map": [ + [ + "снег", + { + "_type": "map", + "map": [ + [ + "сне́га", + { + "_type": "map", + "map": [ + [ + "noun", + [ + "genitive", + "genitive singular" + ] + ] + ] + } + ], + [ + "снега́", + { + "_type": "map", + "map": [ + [ + "noun", + [ + "accusative plural irregular", + "nominative plural", + "nominative plural irregular" + ] + ] + ] + } + ], + [ + "снего́в", + { + "_type": "map", + "map": [ + [ + "noun", + [ + "genitive plural" + ] + ] + ] + } + ], + [ + "сне́жный", + { + "_type": "map", + "map": [ + [ + "noun", + [ + "adjective relational" + ] + ] + ] + } + ], + [ + "снегово́й", + { + "_type": "map", + "map": [ + [ + "noun", + [ + "adjective relational" + ] + ] + ] + } + ], + [ + "снежо́к", + { + "_type": "map", + "map": [ + [ + "noun", + [ + "diminutive" + ] + ] + ] + } + ], + [ + "сне́гу", + { + "_type": "map", + "map": [ + [ + "noun", + [ + "singular partitive", + "dative singular" + ] + ] + ] + } + ], + [ + "снега́м", + { + "_type": "map", + "map": [ + [ + "noun", + [ + "dative plural" + ] + ] + ] + } + ], + [ + "сне́г", + { + "_type": "map", + "map": [ + [ + "noun", + [ + "accusative singular" + ] + ] + ] + } + ], + [ + "сне́гом", + { + "_type": "map", + "map": [ + [ + "noun", + [ + "instrumental singular" + ] + ] + ] + } + ], + [ + "снега́ми", + { + "_type": "map", + "map": [ + [ + "noun", + [ + "instrumental plural" + ] + ] + ] + } + ], + [ + "сне́ге", + { + "_type": "map", + "map": [ + [ + "noun", + [ + "prepositional singular" + ] + ] + ] + } + ], + [ + "снега́х", + { + "_type": "map", + "map": [ + [ + "noun", + [ + "prepositional plural" + ] + ] + ] + } + ], + [ + "снегу́", + { + "_type": "map", + "map": [ + [ + "noun", + [ + "locative singular" + ] + ] + ] + } + ] + ] + } + ], + [ + "побеле́ть", + { + "_type": "map", + "map": [ + [ + "беле́ть", + { + "_type": "map", + "map": [ + [ + "verb", + [ + "imperfective" + ] + ] + ] + } + ], + [ + "побеле́вший", + { + "_type": "map", + "map": [ + [ + "verb", + [ + "past active participle" + ] + ] + ] + } + ], + [ + "побеле́в", + { + "_type": "map", + "map": [ + [ + "verb", + [ + "past participle adverbial" + ] + ] + ] + } + ], + [ + "побеле́вши", + { + "_type": "map", + "map": [ + [ + "verb", + [ + "past participle adverbial" + ] + ] + ] + } + ], + [ + "побеле́ю", + { + "_type": "map", + "map": [ + [ + "verb", + [ + "first-person singular future" + ] + ] + ] + } + ], + [ + "побеле́ешь", + { + "_type": "map", + "map": [ + [ + "verb", + [ + "second-person singular future" + ] + ] + ] + } + ], + [ + "побеле́ет", + { + "_type": "map", + "map": [ + [ + "verb", + [ + "third-person singular future" + ] + ] + ] + } + ], + [ + "побеле́ем", + { + "_type": "map", + "map": [ + [ + "verb", + [ + "first-person plural future" + ] + ] + ] + } + ], + [ + "побеле́ете", + { + "_type": "map", + "map": [ + [ + "verb", + [ + "second-person plural future" + ] + ] + ] + } + ], + [ + "побеле́ют", + { + "_type": "map", + "map": [ + [ + "verb", + [ + "third-person plural future" + ] + ] + ] + } + ], + [ + "побеле́й", + { + "_type": "map", + "map": [ + [ + "verb", + [ + "singular imperative" + ] + ] + ] + } + ], + [ + "побеле́йте", + { + "_type": "map", + "map": [ + [ + "verb", + [ + "plural imperative" + ] + ] + ] + } + ], + [ + "побеле́л", + { + "_type": "map", + "map": [ + [ + "verb", + [ + "singular masculine past" + ] + ] + ] + } + ], + [ + "побеле́ли", + { + "_type": "map", + "map": [ + [ + "verb", + [ + "plural feminine past", + "plural masculine past", + "plural neuter past" + ] + ] + ] + } + ], + [ + "побеле́ла", + { + "_type": "map", + "map": [ + [ + "verb", + [ + "singular feminine past" + ] + ] + ] + } + ], + [ + "побеле́ло", + { + "_type": "map", + "map": [ + [ + "verb", + [ + "singular neuter past" + ] + ] + ] + } + ] + ] + } + ] + ] +} \ No newline at end of file diff --git a/data/test/tidy/ru-en-lemmas.json b/data/test/tidy/ru-en-lemmas.json index 08b049f..85b71ab 100644 --- a/data/test/tidy/ru-en-lemmas.json +++ b/data/test/tidy/ru-en-lemmas.json @@ -1 +1,172 @@ -{"снег":{"снег":{"noun":{"0":{"ipa":[{"ipa":"[sʲnʲek]","tags":[]}],"glossTree":{"_type":"map","map":[["snow",{"_type":"map","map":[["_tags",[]],["_examples",[{"text":"идёт снег","translation":"it is snowing"},{"text":"мо́крый снег","translation":"sleet, wet snow"}]]]}],["(figuratively) snow, the white electrical noise on a TV set when there is no TV signal",{"_type":"map","map":[["_tags",["figuratively"]],["_examples",[]]]}],["(slang) cocaine",{"_type":"map","map":[["_tags",["slang"]],["_examples",[]]]}]]}}}}},"побеле́ть":{"побеле́ть":{"verb":{"0":{"ipa":[{"ipa":"[pəbʲɪˈlʲetʲ]","tags":[]}],"glossTree":{"_type":"map","map":[["to turn white, (intransitive) to whiten",{"_type":"map","map":[["_tags",[]],["_examples",[]]]}],["(colloquial) to turn gray",{"_type":"map","map":[["_tags",["colloquial"]],["_examples",[]]]}],["(colloquial) to become brighter",{"_type":"map","map":[["_tags",["colloquial"]],["_examples",[]]]}],["(colloquial, impersonal) to dawn",{"_type":"map","map":[["_tags",["colloquial","impersonal"]],["_examples",[]]]}]]}}}}}} \ No newline at end of file +{ + "снег": { + "снег": { + "noun": { + "0": { + "ipa": [ + { + "ipa": "[sʲnʲek]", + "tags": [] + } + ], + "glossTree": { + "_type": "map", + "map": [ + [ + "snow", + { + "_type": "map", + "map": [ + [ + "_tags", + [] + ], + [ + "_examples", + [ + { + "text": "идёт снег", + "translation": "it is snowing" + }, + { + "text": "мо́крый снег", + "translation": "sleet, wet snow" + } + ] + ] + ] + } + ], + [ + "(figuratively) snow, the white electrical noise on a TV set when there is no TV signal", + { + "_type": "map", + "map": [ + [ + "_tags", + [ + "figuratively" + ] + ], + [ + "_examples", + [] + ] + ] + } + ], + [ + "(slang) cocaine", + { + "_type": "map", + "map": [ + [ + "_tags", + [ + "slang" + ] + ], + [ + "_examples", + [] + ] + ] + } + ] + ] + } + } + } + } + }, + "побеле́ть": { + "побеле́ть": { + "verb": { + "0": { + "ipa": [ + { + "ipa": "[pəbʲɪˈlʲetʲ]", + "tags": [] + } + ], + "glossTree": { + "_type": "map", + "map": [ + [ + "to turn white, (intransitive) to whiten", + { + "_type": "map", + "map": [ + [ + "_tags", + [] + ], + [ + "_examples", + [] + ] + ] + } + ], + [ + "(colloquial) to turn gray", + { + "_type": "map", + "map": [ + [ + "_tags", + [ + "colloquial" + ] + ], + [ + "_examples", + [] + ] + ] + } + ], + [ + "(colloquial) to become brighter", + { + "_type": "map", + "map": [ + [ + "_tags", + [ + "colloquial" + ] + ], + [ + "_examples", + [] + ] + ] + } + ], + [ + "(colloquial, impersonal) to dawn", + { + "_type": "map", + "map": [ + [ + "_tags", + [ + "colloquial", + "impersonal" + ] + ], + [ + "_examples", + [] + ] + ] + } + ] + ] + } + } + } + } + } +} \ No newline at end of file From f49fbd5401660725db1ddb1e45725c172410a3c3 Mon Sep 17 00:00:00 2001 From: seth-js <83692925+seth-js@users.noreply.github.com> Date: Fri, 20 Dec 2024 03:34:19 -0600 Subject: [PATCH 3/5] filter `dated` inflection tag, add test --- 3-tidy-up.js | 3 +- data/test/dict/ru/en/tag_bank_1.json | 14 + data/test/dict/ru/en/term_bank_1.json | 72 +++++ data/test/dict/ru/en/term_bank_2.json | 340 ++++++++++++++++++++++ data/test/ipa/ru/en/term_meta_bank_1.json | 13 + data/test/kaikki/ru-en.json | 3 +- data/test/tidy/ru-en-forms-0.json | 227 +++++++++++++++ data/test/tidy/ru-en-lemmas.json | 58 ++++ 8 files changed, 728 insertions(+), 2 deletions(-) diff --git a/3-tidy-up.js b/3-tidy-up.js index 909be68..9ea0d13 100644 --- a/3-tidy-up.js +++ b/3-tidy-up.js @@ -93,7 +93,8 @@ const blacklistedTags = [ 'obsolete', 'archaic', 'used-in-the-form', - 'romanization' + 'romanization', + 'dated' ]; const identityTags = [ diff --git a/data/test/dict/ru/en/tag_bank_1.json b/data/test/dict/ru/en/tag_bank_1.json index 7dc72db..c972c2f 100644 --- a/data/test/dict/ru/en/tag_bank_1.json +++ b/data/test/dict/ru/en/tag_bank_1.json @@ -40,5 +40,19 @@ 0, "impersonal", 0 + ], + [ + "reltnl", + "", + 0, + "relational", + 0 + ], + [ + "adj", + "partOfSpeech", + -1, + "adjective", + 1 ] ] \ No newline at end of file diff --git a/data/test/dict/ru/en/term_bank_1.json b/data/test/dict/ru/en/term_bank_1.json index 91df195..7c33042 100644 --- a/data/test/dict/ru/en/term_bank_1.json +++ b/data/test/dict/ru/en/term_bank_1.json @@ -197,5 +197,77 @@ ], 0, "" + ], + [ + "зимний", + "зи́мний", + "reltnl adj", + "adj", + 0, + [ + { + "type": "structured-content", + "content": [ + { + "tag": "div", + "content": [ + "winter", + { + "tag": "div", + "data": { + "content": "extra-info" + }, + "content": { + "tag": "div", + "data": { + "content": "example-sentence" + }, + "content": [ + { + "tag": "div", + "data": { + "content": "example-sentence-a" + }, + "content": "Зи́мний дворе́ц" + }, + { + "tag": "div", + "data": { + "content": "example-sentence-b" + }, + "content": "Winter palace" + } + ] + } + } + ] + } + ] + } + ], + 0, + "" + ], + [ + "зимний", + "зи́мний", + "adj", + "adj", + 0, + [ + { + "type": "structured-content", + "content": [ + { + "tag": "div", + "content": [ + "wintry, hibernal" + ] + } + ] + } + ], + 0, + "" ] ] \ No newline at end of file diff --git a/data/test/dict/ru/en/term_bank_2.json b/data/test/dict/ru/en/term_bank_2.json index 6e3484e..6e4c9a1 100644 --- a/data/test/dict/ru/en/term_bank_2.json +++ b/data/test/dict/ru/en/term_bank_2.json @@ -594,5 +594,345 @@ ], 0, "" + ], + [ + "зимнее", + "зи́мнее", + "non-lemma", + "", + 0, + [ + [ + "зи́мний", + [ + "accusative", + "neuter" + ] + ], + [ + "зи́мний", + [ + "nominative", + "neuter" + ] + ] + ], + 0, + "" + ], + [ + "зимняя", + "зи́мняя", + "non-lemma", + "", + 0, + [ + [ + "зи́мний", + [ + "nominative", + "feminine" + ] + ] + ], + 0, + "" + ], + [ + "зимние", + "зи́мние", + "non-lemma", + "", + 0, + [ + [ + "зи́мний", + [ + "accusative", + "plural", + "inanimate" + ] + ], + [ + "зи́мний", + [ + "nominative", + "plural" + ] + ] + ], + 0, + "" + ], + [ + "зимнего", + "зи́мнего", + "non-lemma", + "", + 0, + [ + [ + "зи́мний", + [ + "accusative", + "masculine", + "animate" + ] + ], + [ + "зи́мний", + [ + "genitive", + "masculine", + "neuter" + ] + ] + ], + 0, + "" + ], + [ + "зимней", + "зи́мней", + "non-lemma", + "", + 0, + [ + [ + "зи́мний", + [ + "dative", + "feminine" + ] + ], + [ + "зи́мний", + [ + "genitive", + "feminine" + ] + ], + [ + "зи́мний", + [ + "instrumental", + "feminine" + ] + ], + [ + "зи́мний", + [ + "prepositional", + "feminine" + ] + ] + ], + 0, + "" + ], + [ + "зимних", + "зи́мних", + "non-lemma", + "", + 0, + [ + [ + "зи́мний", + [ + "accusative", + "plural", + "animate" + ] + ], + [ + "зи́мний", + [ + "genitive", + "plural" + ] + ], + [ + "зи́мний", + [ + "prepositional", + "plural" + ] + ] + ], + 0, + "" + ], + [ + "зимнему", + "зи́мнему", + "non-lemma", + "", + 0, + [ + [ + "зи́мний", + [ + "dative", + "masculine", + "neuter" + ] + ] + ], + 0, + "" + ], + [ + "зимним", + "зи́мним", + "non-lemma", + "", + 0, + [ + [ + "зи́мний", + [ + "instrumental", + "masculine", + "neuter" + ] + ], + [ + "зи́мний", + [ + "dative", + "plural" + ] + ] + ], + 0, + "" + ], + [ + "зимнюю", + "зи́мнюю", + "non-lemma", + "", + 0, + [ + [ + "зи́мний", + [ + "accusative", + "feminine" + ] + ] + ], + 0, + "" + ], + [ + "зимнею", + "зи́мнею", + "non-lemma", + "", + 0, + [ + [ + "зи́мний", + [ + "instrumental", + "feminine" + ] + ] + ], + 0, + "" + ], + [ + "зимними", + "зи́мними", + "non-lemma", + "", + 0, + [ + [ + "зи́мний", + [ + "instrumental", + "plural" + ] + ] + ], + 0, + "" + ], + [ + "зимнем", + "зи́мнем", + "non-lemma", + "", + 0, + [ + [ + "зи́мний", + [ + "prepositional", + "masculine", + "neuter" + ] + ] + ], + 0, + "" + ], + [ + "зимне", + "зи́мне", + "non-lemma", + "", + 0, + [ + [ + "зи́мний", + [ + "neuter", + "short-form" + ] + ] + ], + 0, + "" + ], + [ + "зимня", + "зи́мня", + "non-lemma", + "", + 0, + [ + [ + "зи́мний", + [ + "feminine", + "short-form" + ] + ] + ], + 0, + "" + ], + [ + "зимни", + "зи́мни", + "non-lemma", + "", + 0, + [ + [ + "зи́мний", + [ + "plural", + "short-form" + ] + ] + ], + 0, + "" ] ] \ No newline at end of file diff --git a/data/test/ipa/ru/en/term_meta_bank_1.json b/data/test/ipa/ru/en/term_meta_bank_1.json index fea77d7..ca94968 100644 --- a/data/test/ipa/ru/en/term_meta_bank_1.json +++ b/data/test/ipa/ru/en/term_meta_bank_1.json @@ -24,5 +24,18 @@ } ] } + ], + [ + "зимний", + "ipa", + { + "reading": "зи́мний", + "transcriptions": [ + { + "ipa": "[ˈzʲimnʲɪj]", + "tags": [] + } + ] + } ] ] \ No newline at end of file diff --git a/data/test/kaikki/ru-en.json b/data/test/kaikki/ru-en.json index 3c3d5fc..98e4fb1 100644 --- a/data/test/kaikki/ru-en.json +++ b/data/test/kaikki/ru-en.json @@ -1,4 +1,5 @@ {"pos":"noun","head_templates":[{"name":"ru-noun+","args":{"1":"c","2":"","3":"(1)","par":"сне́гу","loc":"снегу́","adj":"сне́жный","dim":"снежо́к","adj2":"снегово́й"},"expansion":"снег • (sneg) m inan (genitive сне́га, nominative plural снега́, genitive plural снего́в, relational adjective сне́жный or снегово́й, diminutive снежо́к)"}],"forms":[{"form":"sneg","tags":["romanization"]},{"form":"сне́га","tags":["genitive"]},{"form":"снега́","tags":["nominative","plural"]},{"form":"снего́в","tags":["genitive","plural"]},{"form":"сне́жный","tags":["adjective","relational"]},{"form":"снегово́й","tags":["adjective","relational"]},{"form":"снежо́к","tags":["diminutive"]},{"form":"no-table-tags","source":"declension","tags":["table-tags"]},{"form":"ru-noun-table","source":"declension","tags":["inflection-template"]},{"form":"velar-stem","source":"declension","tags":["class"]},{"form":"accent-c","source":"declension","tags":["class"]},{"form":"сне́г","tags":["nominative","singular"],"source":"declension","roman":"snég"},{"form":"снега́","tags":["irregular","nominative","plural"],"source":"declension","roman":"snegá"},{"form":"сне́га","tags":["genitive","singular"],"source":"declension","roman":"snéga"},{"form":"снего́в","tags":["genitive","plural"],"source":"declension","roman":"snegóv"},{"form":"сне́гу","tags":["dative","singular"],"source":"declension","roman":"snégu"},{"form":"снега́м","tags":["dative","plural"],"source":"declension","roman":"snegám"},{"form":"сне́г","tags":["accusative","singular"],"source":"declension","roman":"snég"},{"form":"снега́","tags":["accusative","irregular","plural"],"source":"declension","roman":"snegá"},{"form":"сне́гом","tags":["instrumental","singular"],"source":"declension","roman":"snégom"},{"form":"снега́ми","tags":["instrumental","plural"],"source":"declension","roman":"snegámi"},{"form":"сне́ге","tags":["prepositional","singular"],"source":"declension","roman":"snége"},{"form":"снега́х","tags":["plural","prepositional"],"source":"declension","roman":"snegáx"},{"form":"сне́гу","tags":["partitive","singular"],"source":"declension","roman":"snégu"},{"form":"снегу́","tags":["locative","singular"],"source":"declension","roman":"snegú"}],"sounds":[{"ipa":"[sʲnʲek]"},{"audio":"Ru-снег.ogg","ogg_url":"https://upload.wikimedia.org/wikipedia/commons/0/01/Ru-%D1%81%D0%BD%D0%B5%D0%B3.ogg","mp3_url":"https://upload.wikimedia.org/wikipedia/commons/transcoded/0/01/Ru-%D1%81%D0%BD%D0%B5%D0%B3.ogg/Ru-%D1%81%D0%BD%D0%B5%D0%B3.ogg.mp3"}],"etymology_text":"Inherited from Proto-Slavic *sněgъ, from Proto-Balto-Slavic *snáigas, from Proto-Indo-European *snóygʷʰos.","etymology_templates":[{"name":"root","args":{"1":"ru","2":"ine-pro","3":"*sneygʷʰ-"},"expansion":""},{"name":"glossary","args":{"1":"Inherited"},"expansion":"Inherited"},{"name":"inh","args":{"1":"ru","2":"sla-pro","3":"*sněgъ","4":"","5":"","lit":"","pos":"","tr":"","ts":"","id":"","sc":"","g":"","g2":"","g3":"","nocat":"","sort":""},"expansion":"Proto-Slavic *sněgъ"},{"name":"inh+","args":{"1":"ru","2":"sla-pro","3":"*sněgъ"},"expansion":"Inherited from Proto-Slavic *sněgъ"},{"name":"inh","args":{"1":"ru","2":"ine-bsl-pro","3":"*snáigas"},"expansion":"Proto-Balto-Slavic *snáigas"},{"name":"inh","args":{"1":"ru","2":"ine-pro","3":"*snóygʷʰos"},"expansion":"Proto-Indo-European *snóygʷʰos"}],"word":"снег","lang":"Russian","lang_code":"ru","synonyms":[{"roman":"sněg","word":"снѣгъ — Pre-reform orthography (1918)","_dis1":"0 0 0"}],"holonyms":[{"roman":"burán","word":"бура́н","_dis1":"0 0 0"},{"roman":"vʹjúga","word":"вью́га","_dis1":"0 0 0"},{"roman":"nast","word":"наст","_dis1":"0 0 0"},{"roman":"metélʹ","word":"мете́ль","_dis1":"0 0 0"},{"roman":"pozjómka","word":"позёмка","_dis1":"0 0 0"},{"roman":"poróša","word":"поро́ша","_dis1":"0 0 0"},{"roman":"snegovík","word":"снегови́к","_dis1":"0 0 0"},{"roman":"snéžnaja bába","word":"сне́жная ба́ба","_dis1":"0 0 0"},{"roman":"snežók","word":"снежо́к","_dis1":"0 0 0"},{"roman":"sugrób","word":"сугро́б","_dis1":"0 0 0"}],"meronyms":[{"roman":"snežínka","word":"снежи́нка","_dis1":"0 0 0"}],"derived":[{"tags":["masculine"],"roman":"podsnéžnik","word":"подсне́жник","_dis1":"0 0 0"},{"tags":["animate","masculine"],"roman":"snegovík","word":"снегови́к","_dis1":"0 0 0"},{"tags":["masculine"],"roman":"snegopád","word":"снегопа́д","_dis1":"0 0 0"},{"tags":["animate","feminine"],"roman":"Snegúročka","word":"Снегу́рочка","_dis1":"0 0 0"},{"tags":["imperfective"],"roman":"snežítʹ","word":"снежи́ть","_dis1":"0 0 0"},{"tags":["feminine"],"roman":"snežínka","word":"снежи́нка","_dis1":"0 0 0"},{"tags":["masculine"],"roman":"snegostúp","word":"снегосту́п","_dis1":"0 0 0"},{"tags":["masculine"],"roman":"snegoxód","word":"снегохо́д","_dis1":"0 0 0"}],"related":[{"tags":["animate","masculine"],"roman":"snegírʹ","word":"снеги́рь","_dis1":"0 0 0"}],"senses":[{"examples":[{"text":"идёт снег","english":"it is snowing","type":"example","roman":"idjót sneg"},{"text":"мо́крый снег","english":"sleet, wet snow","type":"example","roman":"mókryj sneg"},{"text":"пе́рвый снег","english":"the first snow","type":"example","roman":"pérvyj sneg"},{"text":"ски́дывать/ски́нуть снег с кры́ши","english":"to throw the snow off the roof","type":"example","roman":"skídyvatʹ/skínutʹ sneg s krýši"},{"text":"как снег на го́лову (saying)\nkak sneg na gólovu\nlike a bolt from the blue (unexpectedly, suddenly)\n(literally, “like snow on the head”)","type":"example"},{"text":"что́-либо ну́жно, как прошлого́дний снег\nštó-libo núžno, kak prošlogódnij sneg\nsomething is not needed in the least\n(literally, “something is needed like last year's snow”)","type":"example"},{"text":"зимо́й сне́га не вы́просить (saying)\nzimój snéga ne výprositʹ\n(said about someone mean, stingy)\n(literally, “cannot be asked for snow in winter”)","type":"example"},{"text":"Дава́й поваля́емся в снегу́!","english":"Let's roll in the snow!","type":"example","roman":"Daváj povaljájemsja v snegú!"}],"links":[["snow","snow"]],"glosses":["snow"],"id":"en-снег-ru-noun-p0YiLwnY","categories":[{"name":"Russian nouns with accent pattern c","kind":"other","parents":[],"source":"w+disamb","_dis":"33 33 33"},{"name":"Russian nouns with partitive singular","kind":"other","parents":[],"source":"w+disamb","_dis":"31 37 31"},{"name":"Snow","kind":"topical","parents":["Water","Weather","Liquids","Atmosphere","Matter","Nature","Chemistry","All topics","Sciences","Fundamental"],"source":"w+disamb","orig":"ru:Snow","langcode":"ru","_dis":"67 32 1"}]},{"links":[["snow","snow"]],"raw_glosses":["(figuratively) snow, the white electrical noise on a TV set when there is no TV signal"],"glosses":["snow, the white electrical noise on a TV set when there is no TV signal"],"tags":["figuratively"],"id":"en-снег-ru-noun-mEgWUvyE","categories":[{"name":"Pages with 4 entries","kind":"other","parents":[],"source":"w+disamb","_dis":"41 41 1 13 1 1 3 1"},{"name":"Pages with entries","kind":"other","parents":[],"source":"w+disamb","_dis":"39 39 1 18 1 1 2 1"},{"name":"Russian entries with incorrect language header","kind":"other","parents":["Entries with incorrect language header","Entry maintenance"],"source":"w+disamb","_dis":"2 96 2"},{"name":"Russian links with redundant wikilinks","kind":"other","parents":["Links with redundant wikilinks","Entry maintenance"],"source":"w+disamb","_dis":"3 93 3"},{"name":"Russian nouns ending in a consonant with plural -а","kind":"other","parents":[],"source":"w+disamb","_dis":"8 85 8"},{"name":"Russian nouns with accent pattern c","kind":"other","parents":[],"source":"w+disamb","_dis":"33 33 33"},{"name":"Russian nouns with irregular nominative plural","kind":"other","parents":[],"source":"w+disamb","_dis":"28 44 28"},{"name":"Russian nouns with locative singular","kind":"other","parents":[],"source":"w+disamb","_dis":"29 43 29"},{"name":"Russian nouns with partitive singular","kind":"other","parents":[],"source":"w+disamb","_dis":"31 37 31"}]},{"links":[["cocaine","cocaine"]],"raw_glosses":["(slang) cocaine"],"glosses":["cocaine"],"tags":["slang"],"id":"en-снег-ru-noun-OcetgAxX","categories":[{"name":"Russian nouns with accent pattern c","kind":"other","parents":[],"source":"w+disamb","_dis":"33 33 33"},{"name":"Russian nouns with partitive singular","kind":"other","parents":[],"source":"w+disamb","_dis":"31 37 31"}]}]} {"pos":"verb","head_templates":[{"name":"head","args":{"1":"ru","2":"verb form","head":"возни́к"},"expansion":"возни́к • (vozník)"}],"forms":[{"form":"возни́к","tags":["canonical"]},{"form":"vozník","tags":["romanization"]}],"word":"возник","lang":"Russian","lang_code":"ru","sounds":[{"ipa":"[vɐzʲˈnʲik]"}],"senses":[{"links":[["возни́кнуть","возникнуть#Russian"]],"glosses":["short masculine singular past indicative perfective of возни́кнуть (vozníknutʹ)"],"tags":["form-of","indicative","masculine","past","perfective","short-form","singular"],"form_of":[{"word":"возни́кнуть","extra":"vozníknutʹ"}],"id":"en-возник-ru-verb-5MxEK0j6","categories":[{"name":"Pages with 1 entry","kind":"other","parents":[],"source":"w"},{"name":"Pages with entries","kind":"other","parents":[],"source":"w"},{"name":"Russian entries with incorrect language header","kind":"other","parents":["Entries with incorrect language header","Entry maintenance"],"source":"w"}]}]} {"pos":"noun","head_templates":[{"name":"head","args":{"1":"ru","2":"noun form","g":"f-in"},"expansion":"простынёй • (prostynjój) f inan"}],"forms":[{"form":"prostynjój","tags":["romanization"]}],"word":"простынёй","lang":"Russian","lang_code":"ru","sounds":[{"ipa":"[prəstɨˈnʲɵj]"}],"senses":[{"links":[["простыня́","простыня#Russian"]],"glosses":["instrumental singular of простыня́ (prostynjá)"],"tags":["form-of","instrumental","singular"],"form_of":[{"word":"простыня́","extra":"prostynjá"}],"id":"en-простынёй-ru-noun-BAgg9vrw","categories":[{"name":"Pages with 1 entry","kind":"other","parents":[],"source":"w"},{"name":"Pages with entries","kind":"other","parents":[],"source":"w"},{"name":"Russian entries with incorrect language header","kind":"other","parents":["Entries with incorrect language header","Entry maintenance"],"source":"w"}]}]} -{"pos":"verb","head_templates":[{"name":"ru-verb","args":{"1":"побеле́ть","2":"pf","impf":"беле́ть"},"expansion":"побеле́ть • (pobelétʹ) pf (imperfective беле́ть)"}],"forms":[{"form":"побеле́ть","tags":["canonical"]},{"form":"pobelétʹ","tags":["romanization"]},{"form":"беле́ть","tags":["imperfective"]},{"form":"intransitive perfective","source":"conjugation","tags":["table-tags"]},{"form":"ru-conj","source":"conjugation","tags":["inflection-template"]},{"form":"1a perfective intransitive","source":"conjugation","tags":["class"]},{"form":"побеле́ть","tags":["infinitive","perfective"],"source":"conjugation","roman":"pobelétʹ"},{"form":"-","tags":["active","participle","present"],"source":"conjugation"},{"form":"побеле́вший","tags":["active","participle","past"],"source":"conjugation","roman":"pobelévšij"},{"form":"-","tags":["participle","passive","present"],"source":"conjugation"},{"form":"-","tags":["participle","passive","past"],"source":"conjugation"},{"form":"-","tags":["adverbial","participle","present"],"source":"conjugation"},{"form":"побеле́в","tags":["adverbial","participle","past"],"source":"conjugation","roman":"pobelév"},{"form":"побеле́вши","tags":["adverbial","participle","past"],"source":"conjugation","roman":"pobelévši"},{"form":"-","tags":["first-person","present","singular"],"source":"conjugation"},{"form":"побеле́ю","tags":["first-person","future","singular"],"source":"conjugation","roman":"pobeléju"},{"form":"-","tags":["present","second-person","singular"],"source":"conjugation"},{"form":"побеле́ешь","tags":["future","second-person","singular"],"source":"conjugation","roman":"pobeléješʹ"},{"form":"-","tags":["present","singular","third-person"],"source":"conjugation"},{"form":"побеле́ет","tags":["future","singular","third-person"],"source":"conjugation","roman":"pobeléjet"},{"form":"-","tags":["first-person","plural","present"],"source":"conjugation"},{"form":"побеле́ем","tags":["first-person","future","plural"],"source":"conjugation","roman":"pobeléjem"},{"form":"-","tags":["plural","present","second-person"],"source":"conjugation"},{"form":"побеле́ете","tags":["future","plural","second-person"],"source":"conjugation","roman":"pobeléjete"},{"form":"-","tags":["plural","present","third-person"],"source":"conjugation"},{"form":"побеле́ют","tags":["future","plural","third-person"],"source":"conjugation","roman":"pobeléjut"},{"form":"побеле́й","tags":["imperative","singular"],"source":"conjugation","roman":"pobeléj"},{"form":"побеле́йте","tags":["imperative","plural"],"source":"conjugation","roman":"pobeléjte"},{"form":"побеле́л","tags":["masculine","past","singular"],"source":"conjugation","roman":"pobelél"},{"form":"побеле́ли","tags":["masculine","past","plural"],"source":"conjugation","roman":"pobeléli"},{"form":"побеле́ла","tags":["feminine","past","singular"],"source":"conjugation","roman":"pobeléla"},{"form":"побеле́ли","tags":["feminine","past","plural"],"source":"conjugation","roman":"pobeléli"},{"form":"побеле́ло","tags":["neuter","past","singular"],"source":"conjugation","roman":"pobelélo"},{"form":"побеле́ли","tags":["neuter","past","plural"],"source":"conjugation","roman":"pobeléli"}],"inflection_templates":[{"name":"ru-conj","args":{"1":"pf-intr","2":"1a","3":"побеле́ть"}}],"sounds":[{"ipa":"[pəbʲɪˈlʲetʲ]"},{"audio":"LL-Q7737 (rus)-Tatiana Kerbush-побелеть.wav","ogg_url":"https://upload.wikimedia.org/wikipedia/commons/transcoded/2/24/LL-Q7737_%28rus%29-Tatiana_Kerbush-%D0%BF%D0%BE%D0%B1%D0%B5%D0%BB%D0%B5%D1%82%D1%8C.wav/LL-Q7737_%28rus%29-Tatiana_Kerbush-%D0%BF%D0%BE%D0%B1%D0%B5%D0%BB%D0%B5%D1%82%D1%8C.wav.ogg","mp3_url":"https://upload.wikimedia.org/wikipedia/commons/transcoded/2/24/LL-Q7737_%28rus%29-Tatiana_Kerbush-%D0%BF%D0%BE%D0%B1%D0%B5%D0%BB%D0%B5%D1%82%D1%8C.wav/LL-Q7737_%28rus%29-Tatiana_Kerbush-%D0%BF%D0%BE%D0%B1%D0%B5%D0%BB%D0%B5%D1%82%D1%8C.wav.mp3"}],"etymology_text":"по- (po-) + беле́ть (belétʹ)","etymology_templates":[{"name":"af","args":{"1":"ru","2":"по-","3":"беле́ть"},"expansion":"по- (po-) + беле́ть (belétʹ)"}],"word":"побелеть","lang":"Russian","lang_code":"ru","related":[{"roman":"bélyj","word":"бе́лый","_dis1":"0 0 0 0"},{"roman":"belítʹ","word":"бели́ть","_dis1":"0 0 0 0"},{"roman":"belíla","word":"бели́ла","_dis1":"0 0 0 0"},{"roman":"beljósyj","word":"белёсый","_dis1":"0 0 0 0"},{"roman":"belovátyj","word":"белова́тый","_dis1":"0 0 0 0"},{"roman":"belók","word":"бело́к","_dis1":"0 0 0 0"}],"senses":[{"links":[["turn","turn"],["white","white"],["whiten","whiten"]],"synonyms":[{"word":"побледне́ть"}],"glosses":["to turn white, (intransitive) to whiten"],"id":"en-побелеть-ru-verb-CmNvFNei","categories":[{"name":"Russian terms prefixed with по-","kind":"other","parents":[],"source":"w+disamb","_dis":"27 23 27 23"}]},{"links":[["turn","turn"],["gray","gray"]],"synonyms":[{"word":"поседе́ть"}],"raw_glosses":["(colloquial) to turn gray"],"glosses":["to turn gray"],"tags":["colloquial"],"id":"en-побелеть-ru-verb-5awyn6OU","categories":[{"name":"Russian terms prefixed with по-","kind":"other","parents":[],"source":"w+disamb","_dis":"27 23 27 23"}]},{"links":[["brighter","brighter"]],"synonyms":[{"word":"посветле́ть"}],"raw_glosses":["(colloquial) to become brighter"],"glosses":["to become brighter"],"tags":["colloquial"],"id":"en-побелеть-ru-verb-srmrilyR","categories":[{"name":"Pages with 1 entry","kind":"other","parents":[],"source":"w+disamb","_dis":"12 7 74 7"},{"name":"Pages with entries","kind":"other","parents":[],"source":"w+disamb","_dis":"7 6 81 6"},{"name":"Russian entries with incorrect language header","kind":"other","parents":["Entries with incorrect language header","Entry maintenance"],"source":"w+disamb","_dis":"13 5 77 5"},{"name":"Russian terms prefixed with по-","kind":"other","parents":[],"source":"w+disamb","_dis":"27 23 27 23"}]},{"links":[["dawn","dawn"]],"synonyms":[{"word":"рассвести́"},{"word":"свести́"}],"raw_glosses":["(colloquial, impersonal) to dawn"],"glosses":["to dawn"],"tags":["colloquial","impersonal"],"id":"en-побелеть-ru-verb-DJUq4lnp","categories":[{"name":"Russian terms prefixed with по-","kind":"other","parents":[],"source":"w+disamb","_dis":"27 23 27 23"}]}]} \ No newline at end of file +{"pos":"verb","head_templates":[{"name":"ru-verb","args":{"1":"побеле́ть","2":"pf","impf":"беле́ть"},"expansion":"побеле́ть • (pobelétʹ) pf (imperfective беле́ть)"}],"forms":[{"form":"побеле́ть","tags":["canonical"]},{"form":"pobelétʹ","tags":["romanization"]},{"form":"беле́ть","tags":["imperfective"]},{"form":"intransitive perfective","source":"conjugation","tags":["table-tags"]},{"form":"ru-conj","source":"conjugation","tags":["inflection-template"]},{"form":"1a perfective intransitive","source":"conjugation","tags":["class"]},{"form":"побеле́ть","tags":["infinitive","perfective"],"source":"conjugation","roman":"pobelétʹ"},{"form":"-","tags":["active","participle","present"],"source":"conjugation"},{"form":"побеле́вший","tags":["active","participle","past"],"source":"conjugation","roman":"pobelévšij"},{"form":"-","tags":["participle","passive","present"],"source":"conjugation"},{"form":"-","tags":["participle","passive","past"],"source":"conjugation"},{"form":"-","tags":["adverbial","participle","present"],"source":"conjugation"},{"form":"побеле́в","tags":["adverbial","participle","past"],"source":"conjugation","roman":"pobelév"},{"form":"побеле́вши","tags":["adverbial","participle","past"],"source":"conjugation","roman":"pobelévši"},{"form":"-","tags":["first-person","present","singular"],"source":"conjugation"},{"form":"побеле́ю","tags":["first-person","future","singular"],"source":"conjugation","roman":"pobeléju"},{"form":"-","tags":["present","second-person","singular"],"source":"conjugation"},{"form":"побеле́ешь","tags":["future","second-person","singular"],"source":"conjugation","roman":"pobeléješʹ"},{"form":"-","tags":["present","singular","third-person"],"source":"conjugation"},{"form":"побеле́ет","tags":["future","singular","third-person"],"source":"conjugation","roman":"pobeléjet"},{"form":"-","tags":["first-person","plural","present"],"source":"conjugation"},{"form":"побеле́ем","tags":["first-person","future","plural"],"source":"conjugation","roman":"pobeléjem"},{"form":"-","tags":["plural","present","second-person"],"source":"conjugation"},{"form":"побеле́ете","tags":["future","plural","second-person"],"source":"conjugation","roman":"pobeléjete"},{"form":"-","tags":["plural","present","third-person"],"source":"conjugation"},{"form":"побеле́ют","tags":["future","plural","third-person"],"source":"conjugation","roman":"pobeléjut"},{"form":"побеле́й","tags":["imperative","singular"],"source":"conjugation","roman":"pobeléj"},{"form":"побеле́йте","tags":["imperative","plural"],"source":"conjugation","roman":"pobeléjte"},{"form":"побеле́л","tags":["masculine","past","singular"],"source":"conjugation","roman":"pobelél"},{"form":"побеле́ли","tags":["masculine","past","plural"],"source":"conjugation","roman":"pobeléli"},{"form":"побеле́ла","tags":["feminine","past","singular"],"source":"conjugation","roman":"pobeléla"},{"form":"побеле́ли","tags":["feminine","past","plural"],"source":"conjugation","roman":"pobeléli"},{"form":"побеле́ло","tags":["neuter","past","singular"],"source":"conjugation","roman":"pobelélo"},{"form":"побеле́ли","tags":["neuter","past","plural"],"source":"conjugation","roman":"pobeléli"}],"inflection_templates":[{"name":"ru-conj","args":{"1":"pf-intr","2":"1a","3":"побеле́ть"}}],"sounds":[{"ipa":"[pəbʲɪˈlʲetʲ]"},{"audio":"LL-Q7737 (rus)-Tatiana Kerbush-побелеть.wav","ogg_url":"https://upload.wikimedia.org/wikipedia/commons/transcoded/2/24/LL-Q7737_%28rus%29-Tatiana_Kerbush-%D0%BF%D0%BE%D0%B1%D0%B5%D0%BB%D0%B5%D1%82%D1%8C.wav/LL-Q7737_%28rus%29-Tatiana_Kerbush-%D0%BF%D0%BE%D0%B1%D0%B5%D0%BB%D0%B5%D1%82%D1%8C.wav.ogg","mp3_url":"https://upload.wikimedia.org/wikipedia/commons/transcoded/2/24/LL-Q7737_%28rus%29-Tatiana_Kerbush-%D0%BF%D0%BE%D0%B1%D0%B5%D0%BB%D0%B5%D1%82%D1%8C.wav/LL-Q7737_%28rus%29-Tatiana_Kerbush-%D0%BF%D0%BE%D0%B1%D0%B5%D0%BB%D0%B5%D1%82%D1%8C.wav.mp3"}],"etymology_text":"по- (po-) + беле́ть (belétʹ)","etymology_templates":[{"name":"af","args":{"1":"ru","2":"по-","3":"беле́ть"},"expansion":"по- (po-) + беле́ть (belétʹ)"}],"word":"побелеть","lang":"Russian","lang_code":"ru","related":[{"roman":"bélyj","word":"бе́лый","_dis1":"0 0 0 0"},{"roman":"belítʹ","word":"бели́ть","_dis1":"0 0 0 0"},{"roman":"belíla","word":"бели́ла","_dis1":"0 0 0 0"},{"roman":"beljósyj","word":"белёсый","_dis1":"0 0 0 0"},{"roman":"belovátyj","word":"белова́тый","_dis1":"0 0 0 0"},{"roman":"belók","word":"бело́к","_dis1":"0 0 0 0"}],"senses":[{"links":[["turn","turn"],["white","white"],["whiten","whiten"]],"synonyms":[{"word":"побледне́ть"}],"glosses":["to turn white, (intransitive) to whiten"],"id":"en-побелеть-ru-verb-CmNvFNei","categories":[{"name":"Russian terms prefixed with по-","kind":"other","parents":[],"source":"w+disamb","_dis":"27 23 27 23"}]},{"links":[["turn","turn"],["gray","gray"]],"synonyms":[{"word":"поседе́ть"}],"raw_glosses":["(colloquial) to turn gray"],"glosses":["to turn gray"],"tags":["colloquial"],"id":"en-побелеть-ru-verb-5awyn6OU","categories":[{"name":"Russian terms prefixed with по-","kind":"other","parents":[],"source":"w+disamb","_dis":"27 23 27 23"}]},{"links":[["brighter","brighter"]],"synonyms":[{"word":"посветле́ть"}],"raw_glosses":["(colloquial) to become brighter"],"glosses":["to become brighter"],"tags":["colloquial"],"id":"en-побелеть-ru-verb-srmrilyR","categories":[{"name":"Pages with 1 entry","kind":"other","parents":[],"source":"w+disamb","_dis":"12 7 74 7"},{"name":"Pages with entries","kind":"other","parents":[],"source":"w+disamb","_dis":"7 6 81 6"},{"name":"Russian entries with incorrect language header","kind":"other","parents":["Entries with incorrect language header","Entry maintenance"],"source":"w+disamb","_dis":"13 5 77 5"},{"name":"Russian terms prefixed with по-","kind":"other","parents":[],"source":"w+disamb","_dis":"27 23 27 23"}]},{"links":[["dawn","dawn"]],"synonyms":[{"word":"рассвести́"},{"word":"свести́"}],"raw_glosses":["(colloquial, impersonal) to dawn"],"glosses":["to dawn"],"tags":["colloquial","impersonal"],"id":"en-побелеть-ru-verb-DJUq4lnp","categories":[{"name":"Russian terms prefixed with по-","kind":"other","parents":[],"source":"w+disamb","_dis":"27 23 27 23"}]}]} +{"pos":"adj","head_templates":[{"name":"ru-adj","args":{"1":"зи́мний"},"expansion":"зи́мний • (zímnij)"}],"forms":[{"form":"зи́мний","tags":["canonical"]},{"form":"zímnij","tags":["romanization"]},{"form":"no-table-tags","source":"declension","tags":["table-tags"]},{"form":"ru-decl-adj","source":"declension","tags":["inflection-template"]},{"form":"a","source":"declension","tags":["class"]},{"form":"зи́мний","tags":["masculine","nominative"],"source":"declension","roman":"zímnij"},{"form":"зи́мнее","tags":["neuter","nominative"],"source":"declension","roman":"zímneje"},{"form":"зи́мняя","tags":["feminine","nominative"],"source":"declension","roman":"zímnjaja"},{"form":"зи́мние","tags":["nominative","plural"],"source":"declension","roman":"zímnije"},{"form":"зи́мнего","tags":["genitive","masculine","neuter"],"source":"declension","roman":"zímnevo"},{"form":"зи́мней","tags":["feminine","genitive"],"source":"declension","roman":"zímnej"},{"form":"зи́мних","tags":["genitive","plural"],"source":"declension","roman":"zímnix"},{"form":"зи́мнему","tags":["dative","masculine","neuter"],"source":"declension","roman":"zímnemu"},{"form":"зи́мней","tags":["dative","feminine"],"source":"declension","roman":"zímnej"},{"form":"зи́мним","tags":["dative","plural"],"source":"declension","roman":"zímnim"},{"form":"зи́мнего","tags":["accusative","animate","masculine"],"source":"declension","roman":"zímnevo"},{"form":"зи́мнее","tags":["accusative","neuter"],"source":"declension","roman":"zímneje"},{"form":"зи́мнюю","tags":["accusative","feminine"],"source":"declension","roman":"zímnjuju"},{"form":"зи́мних","tags":["accusative","animate","plural"],"source":"declension","roman":"zímnix"},{"form":"зи́мний","tags":["accusative","inanimate","masculine"],"source":"declension","roman":"zímnij"},{"form":"зи́мние","tags":["accusative","inanimate","plural"],"source":"declension","roman":"zímnije"},{"form":"зи́мним","tags":["instrumental","masculine","neuter"],"source":"declension","roman":"zímnim"},{"form":"зи́мней","tags":["feminine","instrumental"],"source":"declension","roman":"zímnej"},{"form":"зи́мнею","tags":["feminine","instrumental"],"source":"declension","roman":"zímneju"},{"form":"зи́мними","tags":["instrumental","plural"],"source":"declension","roman":"zímnimi"},{"form":"зи́мнем","tags":["masculine","neuter","prepositional"],"source":"declension","roman":"zímnem"},{"form":"зи́мней","tags":["feminine","prepositional"],"source":"declension","roman":"zímnej"},{"form":"зи́мних","tags":["plural","prepositional"],"source":"declension","roman":"zímnix"},{"form":"-","tags":["masculine","short-form"],"source":"declension"},{"form":"зи́мне","tags":["neuter","short-form"],"source":"declension","roman":"zímne"},{"form":"зи́мня","tags":["feminine","short-form"],"source":"declension","roman":"zímnja"},{"form":"зи́мни","tags":["plural","short-form"],"source":"declension","roman":"zímni"},{"form":"no-table-tags","source":"declension","tags":["table-tags"]},{"form":"ru-decl-adj","source":"declension","tags":["inflection-template"]},{"form":"a","source":"declension","tags":["class"]},{"form":"зи́мній","tags":["dated","masculine","nominative"],"source":"declension","roman":"zímnij"},{"form":"зи́мнее","tags":["dated","neuter","nominative"],"source":"declension","roman":"zímneje"},{"form":"зи́мняя","tags":["dated","feminine","nominative"],"source":"declension","roman":"zímnjaja"},{"form":"зи́мніе","tags":["dated","masculine","nominative","plural"],"source":"declension","roman":"zímnije"},{"form":"зи́мнія","tags":["dated","feminine","neuter","nominative","plural"],"source":"declension","roman":"zímnija"},{"form":"зи́мняго","tags":["dated","genitive","masculine","neuter"],"source":"declension","roman":"zímnjago"},{"form":"зи́мней","tags":["dated","feminine","genitive"],"source":"declension","roman":"zímnej"},{"form":"зи́мнихъ","tags":["dated","feminine","genitive","masculine","neuter"],"source":"declension","roman":"zímnix"},{"form":"зи́мнему","tags":["dated","dative","masculine","neuter"],"source":"declension","roman":"zímnemu"},{"form":"зи́мней","tags":["dated","dative","feminine"],"source":"declension","roman":"zímnej"},{"form":"зи́мнимъ","tags":["dated","dative","feminine","masculine","neuter"],"source":"declension","roman":"zímnim"},{"form":"зи́мняго","tags":["accusative","animate","dated","masculine"],"source":"declension","roman":"zímnjago"},{"form":"зи́мнее","tags":["accusative","dated","neuter"],"source":"declension","roman":"zímneje"},{"form":"зи́мнюю","tags":["accusative","dated","feminine"],"source":"declension","roman":"zímnjuju"},{"form":"зи́мнихъ","tags":["accusative","animate","dated","feminine","masculine","neuter"],"source":"declension","roman":"zímnix"},{"form":"зи́мній","tags":["accusative","dated","inanimate","masculine"],"source":"declension","roman":"zímnij"},{"form":"зи́мніе","tags":["accusative","dated","inanimate","masculine","plural"],"source":"declension","roman":"zímnije"},{"form":"зи́мнія","tags":["accusative","dated","feminine","inanimate","neuter","plural"],"source":"declension","roman":"zímnija"},{"form":"зи́мнимъ","tags":["dated","instrumental","masculine","neuter"],"source":"declension","roman":"zímnim"},{"form":"зи́мней","tags":["dated","feminine","instrumental"],"source":"declension","roman":"zímnej"},{"form":"зи́мнею","tags":["dated","feminine","instrumental"],"source":"declension","roman":"zímneju"},{"form":"зи́мними","tags":["dated","feminine","instrumental","masculine","neuter"],"source":"declension","roman":"zímnimi"},{"form":"зи́мнемъ","tags":["dated","masculine","neuter","prepositional"],"source":"declension","roman":"zímnem"},{"form":"зи́мней","tags":["dated","feminine","prepositional"],"source":"declension","roman":"zímnej"},{"form":"зи́мнихъ","tags":["dated","feminine","masculine","neuter","prepositional"],"source":"declension","roman":"zímnix"},{"form":"-","tags":["dated","masculine","short-form"],"source":"declension"},{"form":"зи́мне","tags":["dated","neuter","short-form"],"source":"declension","roman":"zímne"},{"form":"зи́мня","tags":["dated","feminine","short-form"],"source":"declension","roman":"zímnja"},{"form":"зи́мни","tags":["dated","feminine","masculine","neuter","short-form"],"source":"declension","roman":"zímni"}],"inflection_templates":[{"name":"ru-decl-adj","args":{"1":"зи́мний","2":"a","short_m":"-"}},{"name":"ru-decl-adj","args":{"1":"зи́мній","2":"a","old":"1","short_m":"-"}}],"sounds":[{"ipa":"[ˈzʲimnʲɪj]"},{"audio":"Ru-зимний.ogg","ogg_url":"https://upload.wikimedia.org/wikipedia/commons/4/44/Ru-%D0%B7%D0%B8%D0%BC%D0%BD%D0%B8%D0%B9.ogg","mp3_url":"https://upload.wikimedia.org/wikipedia/commons/transcoded/4/44/Ru-%D0%B7%D0%B8%D0%BC%D0%BD%D0%B8%D0%B9.ogg/Ru-%D0%B7%D0%B8%D0%BC%D0%BD%D0%B8%D0%B9.ogg.mp3"}],"etymology_text":"Inherited from Proto-Slavic *zimьnъ. By surface analysis, зима́ (zimá) + -ний (-nij).","etymology_templates":[{"name":"glossary","args":{"1":"Inherited"},"expansion":"Inherited"},{"name":"inh","args":{"1":"ru","2":"sla-pro","3":"*zimьnъ","4":"","5":"","lit":"","pos":"","tr":"","ts":"","id":"","sc":"","g":"","g2":"","g3":"","nocat":"","sort":""},"expansion":"Proto-Slavic *zimьnъ"},{"name":"inh+","args":{"1":"ru","2":"sla-pro","3":"*zimьnъ"},"expansion":"Inherited from Proto-Slavic *zimьnъ"},{"name":"surf","args":{"1":"ru","2":"зима́","3":"-ний"},"expansion":"By surface analysis, зима́ (zimá) + -ний (-nij)"}],"word":"зимний","lang":"Russian","lang_code":"ru","synonyms":[{"roman":"zimá","word":"зима́","_dis1":"0 0"},{"roman":"zímnik","word":"зи́мник","_dis1":"0 0"},{"roman":"zimóvʹje","word":"зимо́вье","_dis1":"0 0"},{"roman":"zimóvka","word":"зимо́вка","_dis1":"0 0"},{"roman":"zimovátʹ","word":"зимова́ть","_dis1":"0 0"},{"roman":"po-zímnemu","word":"по-зи́мнему","_dis1":"0 0"}],"related":[{"roman":"vesénnij","word":"весе́нний","_dis1":"0 0"},{"roman":"létnij","word":"ле́тний","_dis1":"0 0"},{"roman":"osénnij","word":"осе́нний","_dis1":"0 0"},{"roman":"zímnij sad","english":"conservatory","word":"зи́мний сад","_dis1":"0 0"}],"senses":[{"examples":[{"text":"Зи́мний дворе́ц","english":"Winter palace","type":"example","roman":"Zímnij dvoréc"}],"links":[["winter","winter"]],"raw_glosses":["(relational) winter"],"glosses":["winter"],"tags":["relational"],"id":"en-зимний-ru-adj-MMVGH8J7","categories":[]},{"links":[["wintry","wintry"],["hibernal","hibernal"]],"glosses":["wintry, hibernal"],"id":"en-зимний-ru-adj-jqxvDNew","categories":[{"name":"Pages with 2 entries","kind":"other","parents":[],"source":"w+disamb","_dis":"0 100"},{"name":"Pages with entries","kind":"other","parents":[],"source":"w+disamb","_dis":"0 100"},{"name":"Russian adjectives with short accent pattern a","kind":"other","parents":[],"source":"w+disamb","_dis":"37 63"},{"name":"Russian entries with incorrect language header","kind":"other","parents":["Entries with incorrect language header","Entry maintenance"],"source":"w+disamb","_dis":"2 98"},{"name":"Russian terms suffixed with -ний","kind":"other","parents":[],"source":"w+disamb","_dis":"0 100"}]}]} \ No newline at end of file diff --git a/data/test/tidy/ru-en-forms-0.json b/data/test/tidy/ru-en-forms-0.json index f246c3b..f999267 100644 --- a/data/test/tidy/ru-en-forms-0.json +++ b/data/test/tidy/ru-en-forms-0.json @@ -442,6 +442,233 @@ ] ] } + ], + [ + "зи́мний", + { + "_type": "map", + "map": [ + [ + "зи́мнее", + { + "_type": "map", + "map": [ + [ + "adj", + [ + "accusative neuter", + "nominative neuter" + ] + ] + ] + } + ], + [ + "зи́мняя", + { + "_type": "map", + "map": [ + [ + "adj", + [ + "nominative feminine" + ] + ] + ] + } + ], + [ + "зи́мние", + { + "_type": "map", + "map": [ + [ + "adj", + [ + "accusative plural inanimate", + "nominative plural" + ] + ] + ] + } + ], + [ + "зи́мнего", + { + "_type": "map", + "map": [ + [ + "adj", + [ + "accusative masculine animate", + "genitive masculine neuter" + ] + ] + ] + } + ], + [ + "зи́мней", + { + "_type": "map", + "map": [ + [ + "adj", + [ + "dative feminine", + "genitive feminine", + "instrumental feminine", + "prepositional feminine" + ] + ] + ] + } + ], + [ + "зи́мних", + { + "_type": "map", + "map": [ + [ + "adj", + [ + "accusative plural animate", + "genitive plural", + "prepositional plural" + ] + ] + ] + } + ], + [ + "зи́мнему", + { + "_type": "map", + "map": [ + [ + "adj", + [ + "dative masculine neuter" + ] + ] + ] + } + ], + [ + "зи́мним", + { + "_type": "map", + "map": [ + [ + "adj", + [ + "instrumental masculine neuter", + "dative plural" + ] + ] + ] + } + ], + [ + "зи́мнюю", + { + "_type": "map", + "map": [ + [ + "adj", + [ + "accusative feminine" + ] + ] + ] + } + ], + [ + "зи́мнею", + { + "_type": "map", + "map": [ + [ + "adj", + [ + "instrumental feminine" + ] + ] + ] + } + ], + [ + "зи́мними", + { + "_type": "map", + "map": [ + [ + "adj", + [ + "instrumental plural" + ] + ] + ] + } + ], + [ + "зи́мнем", + { + "_type": "map", + "map": [ + [ + "adj", + [ + "prepositional masculine neuter" + ] + ] + ] + } + ], + [ + "зи́мне", + { + "_type": "map", + "map": [ + [ + "adj", + [ + "neuter short-form" + ] + ] + ] + } + ], + [ + "зи́мня", + { + "_type": "map", + "map": [ + [ + "adj", + [ + "feminine short-form" + ] + ] + ] + } + ], + [ + "зи́мни", + { + "_type": "map", + "map": [ + [ + "adj", + [ + "plural short-form" + ] + ] + ] + } + ] + ] + } ] ] } \ No newline at end of file diff --git a/data/test/tidy/ru-en-lemmas.json b/data/test/tidy/ru-en-lemmas.json index 85b71ab..f4dabde 100644 --- a/data/test/tidy/ru-en-lemmas.json +++ b/data/test/tidy/ru-en-lemmas.json @@ -168,5 +168,63 @@ } } } + }, + "зи́мний": { + "зи́мний": { + "adj": { + "0": { + "ipa": [ + { + "ipa": "[ˈzʲimnʲɪj]", + "tags": [] + } + ], + "glossTree": { + "_type": "map", + "map": [ + [ + "(relational) winter", + { + "_type": "map", + "map": [ + [ + "_tags", + [ + "relational" + ] + ], + [ + "_examples", + [ + { + "text": "Зи́мний дворе́ц", + "translation": "Winter palace" + } + ] + ] + ] + } + ], + [ + "wintry, hibernal", + { + "_type": "map", + "map": [ + [ + "_tags", + [] + ], + [ + "_examples", + [] + ] + ] + } + ] + ] + } + } + } + } } } \ No newline at end of file From 7d98101bdd0d08d94881d7d5f951f69c2e4b0af7 Mon Sep 17 00:00:00 2001 From: seth-js <83692925+seth-js@users.noreply.github.com> Date: Fri, 20 Dec 2024 03:39:57 -0600 Subject: [PATCH 4/5] improve noun gender coloring Changed masculine, feminine, and neuter tag coloring to something more visually pleasing. --- data/language/target-language-tags/en/tag_styles.json | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/data/language/target-language-tags/en/tag_styles.json b/data/language/target-language-tags/en/tag_styles.json index cf739b0..5f4ec88 100644 --- a/data/language/target-language-tags/en/tag_styles.json +++ b/data/language/target-language-tags/en/tag_styles.json @@ -1,5 +1,5 @@ { - "masc": ".tag[data-details='masculine'] .tag-label{background-color: blue;}", - "fem": ".tag[data-details='feminine'] .tag-label{background-color: red;}", - "neut": ".tag[data-details='neuter'] .tag-label{background-color: green;}" + "masc": ".tag[data-details='masculine'] .tag-label{background-color: #4d82e8;}", + "fem": ".tag[data-details='feminine'] .tag-label{background-color: #ca4d93;}", + "neut": ".tag[data-details='neuter'] .tag-label{background-color: #40ac65;}" } \ No newline at end of file From 21cebbedc5e76d4af5bc73a3bf21cee70e8a0c23 Mon Sep 17 00:00:00 2001 From: seth-js <83692925+seth-js@users.noreply.github.com> Date: Fri, 20 Dec 2024 03:53:58 -0600 Subject: [PATCH 5/5] add missing noun gender and verb aspect Extracts noun gender and verb aspect from headword `expansion`. Should work excellent for Slavic languages, and even Latin. --- 3-tidy-up.js | 27 ++++++++++++++++++++++++++- data/test/dict/la/en/tag_bank_1.json | 21 ++++++++++++++------- data/test/dict/la/en/term_bank_1.json | 4 ++-- data/test/dict/ru/en/tag_bank_1.json | 21 +++++++++++++++++++++ data/test/dict/ru/en/term_bank_1.json | 12 ++++++------ data/test/tidy/la-en-lemmas.json | 15 ++++++++++----- data/test/tidy/ru-en-lemmas.json | 26 +++++++++++++++++++------- types.ts | 1 + 8 files changed, 99 insertions(+), 28 deletions(-) diff --git a/3-tidy-up.js b/3-tidy-up.js index 9ea0d13..7365196 100644 --- a/3-tidy-up.js +++ b/3-tidy-up.js @@ -132,7 +132,7 @@ function handleLine(parsedLine) { processForms(forms, word, pos); - const {senses} = parsedLine; + const {senses, head_templates} = parsedLine; if (!senses) return; /** @type {IpaInfo[]} */ @@ -165,6 +165,31 @@ function handleLine(parsedLine) { tags.push(...sense.raw_tags); } + if (head_templates && targetIso === 'en') { + const tagMatch = [ + ['pf', 'perfective'], + ['impf', 'imperfective'], + ['m', 'masculine'], + ['f', 'feminine'], + ['n', 'neuter'], + ['inan', 'inanimate'], + ['anim', 'animate'], + ]; + + for (const entry of head_templates) { + if (entry.expansion) { + for (const [match, tag] of tagMatch) { + if ( + entry.expansion.replace(/\(.+?\)/g, '').split(' ').includes(match) && + !tags.includes(tag) + ) { + tags.push(tag); + } + } + } + } + } + return {...sense, glossesArray, tags}; })); diff --git a/data/test/dict/la/en/tag_bank_1.json b/data/test/dict/la/en/tag_bank_1.json index c8538dd..bfd363e 100644 --- a/data/test/dict/la/en/tag_bank_1.json +++ b/data/test/dict/la/en/tag_bank_1.json @@ -6,6 +6,13 @@ "declension-1", 0 ], + [ + "fem", + "", + -1, + "feminine", + 1 + ], [ "n", "partOfSpeech", @@ -48,6 +55,13 @@ "declension-2", 0 ], + [ + "neut", + "", + -1, + "neuter", + 1 + ], [ "not-comp", "", @@ -76,13 +90,6 @@ "declension-4", 0 ], - [ - "fem", - "", - -1, - "feminine", - 1 - ], [ "irreg", "", diff --git a/data/test/dict/la/en/term_bank_1.json b/data/test/dict/la/en/term_bank_1.json index 56c6989..7a82a41 100644 --- a/data/test/dict/la/en/term_bank_1.json +++ b/data/test/dict/la/en/term_bank_1.json @@ -2,7 +2,7 @@ [ "fama", "fāma", - "decl-1 n", + "decl-1 fem n", "n", 0, [ @@ -324,7 +324,7 @@ [ "lilium", "līlium", - "decl-2 n", + "decl-2 neut n", "n", 0, [ diff --git a/data/test/dict/ru/en/tag_bank_1.json b/data/test/dict/ru/en/tag_bank_1.json index c972c2f..a024656 100644 --- a/data/test/dict/ru/en/tag_bank_1.json +++ b/data/test/dict/ru/en/tag_bank_1.json @@ -1,4 +1,18 @@ [ + [ + "masc", + "", + -1, + "masculine", + 1 + ], + [ + "inanim", + "", + 0, + "inanimate", + 0 + ], [ "n", "partOfSpeech", @@ -20,6 +34,13 @@ "slang", 0 ], + [ + "pf", + "", + 0, + "perfective", + 0 + ], [ "v", "partOfSpeech", diff --git a/data/test/dict/ru/en/term_bank_1.json b/data/test/dict/ru/en/term_bank_1.json index 7c33042..c1b4097 100644 --- a/data/test/dict/ru/en/term_bank_1.json +++ b/data/test/dict/ru/en/term_bank_1.json @@ -2,7 +2,7 @@ [ "снег", "", - "n", + "masc inanim n", "n", 0, [ @@ -80,7 +80,7 @@ [ "снег", "", - "fig n", + "fig masc inanim n", "n", 0, [ @@ -102,7 +102,7 @@ [ "снег", "", - "sl n", + "sl masc inanim n", "n", 0, [ @@ -124,7 +124,7 @@ [ "побелеть", "побеле́ть", - "v", + "pf v", "v", 0, [ @@ -146,7 +146,7 @@ [ "побелеть", "побеле́ть", - "col v", + "col pf v", "v", 0, [ @@ -179,7 +179,7 @@ [ "побелеть", "побеле́ть", - "col impers v", + "col impers pf v", "v", 0, [ diff --git a/data/test/tidy/la-en-lemmas.json b/data/test/tidy/la-en-lemmas.json index 3ddace5..b065f0e 100644 --- a/data/test/tidy/la-en-lemmas.json +++ b/data/test/tidy/la-en-lemmas.json @@ -35,7 +35,8 @@ [ "_tags", [ - "declension-1" + "declension-1", + "feminine" ] ], [ @@ -53,7 +54,8 @@ [ "_tags", [ - "declension-1" + "declension-1", + "feminine" ] ], [ @@ -80,7 +82,8 @@ [ "_tags", [ - "declension-1" + "declension-1", + "feminine" ] ], [ @@ -107,7 +110,8 @@ [ "_tags", [ - "declension-1" + "declension-1", + "feminine" ] ], [ @@ -339,7 +343,8 @@ [ "_tags", [ - "declension-2" + "declension-2", + "neuter" ] ], [ diff --git a/data/test/tidy/ru-en-lemmas.json b/data/test/tidy/ru-en-lemmas.json index f4dabde..a57ca15 100644 --- a/data/test/tidy/ru-en-lemmas.json +++ b/data/test/tidy/ru-en-lemmas.json @@ -19,7 +19,10 @@ "map": [ [ "_tags", - [] + [ + "masculine", + "inanimate" + ] ], [ "_examples", @@ -45,7 +48,9 @@ [ "_tags", [ - "figuratively" + "figuratively", + "masculine", + "inanimate" ] ], [ @@ -63,7 +68,9 @@ [ "_tags", [ - "slang" + "slang", + "masculine", + "inanimate" ] ], [ @@ -99,7 +106,9 @@ "map": [ [ "_tags", - [] + [ + "perfective" + ] ], [ "_examples", @@ -116,7 +125,8 @@ [ "_tags", [ - "colloquial" + "colloquial", + "perfective" ] ], [ @@ -134,7 +144,8 @@ [ "_tags", [ - "colloquial" + "colloquial", + "perfective" ] ], [ @@ -153,7 +164,8 @@ "_tags", [ "colloquial", - "impersonal" + "impersonal", + "perfective" ] ], [ diff --git a/types.ts b/types.ts index 410bae4..5cbf93a 100644 --- a/types.ts +++ b/types.ts @@ -25,6 +25,7 @@ declare global { type HeadTemplate = { name?: string; args?: string[]; + expansion?: string; } type Sound = {