Skip to content

Commit

Permalink
Blacklist romanization (#130)
Browse files Browse the repository at this point in the history
* mvp

* mvp

* multiple readings

* initialize

* handle multiple readings better

* finish

* blacklist romanization inflection
  • Loading branch information
StefanVukovic99 authored Aug 11, 2024
1 parent d496ba0 commit 87102b9
Show file tree
Hide file tree
Showing 5 changed files with 42 additions and 374 deletions.
1 change: 1 addition & 0 deletions 3-tidy-up.js
Original file line number Diff line number Diff line change
Expand Up @@ -109,6 +109,7 @@ const blacklistedTags = [
'obsolete',
'archaic',
'used-in-the-form',
'romanization'
];

const identityTags = [
Expand Down
21 changes: 2 additions & 19 deletions data/test/dict/fa/en/term_bank_2.json
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[
[
"fârsi",
"فارسی",
"",
"non-lemma",
"",
Expand All @@ -9,24 +9,7 @@
[
"فارْسی",
[
"romanization"
]
]
],
0,
""
],
[
"molk",
"",
"non-lemma",
"",
0,
[
[
"ملک",
[
"romanization"
"فارسی ≈ فارْسی"
]
]
],
Expand Down
188 changes: 23 additions & 165 deletions data/test/dict/ja/en/term_bank_2.json
Original file line number Diff line number Diff line change
@@ -1,33 +1,4 @@
[
[
"tanoshii",
"",
"non-lemma",
"",
0,
[
[
"楽しい",
[
"attributive"
]
],
[
"楽しい",
[
"romanization"
]
],
[
"楽しい",
[
"terminative"
]
]
],
0,
""
],
[
"楽しく",
"",
Expand Down Expand Up @@ -177,6 +148,29 @@
0,
""
],
[
"tanoshii",
"",
"non-lemma",
"",
0,
[
[
"楽しい",
[
"attributive"
]
],
[
"楽しい",
[
"terminative"
]
]
],
0,
""
],
[
"楽しけれ",
"",
Expand Down Expand Up @@ -922,23 +916,6 @@
0,
""
],
[
"suki",
"",
"non-lemma",
"",
0,
[
[
"好き",
[
"romanization"
]
]
],
0,
""
],
[
"好きな",
"",
Expand Down Expand Up @@ -2142,40 +2119,6 @@
0,
""
],
[
"tanuki",
"",
"non-lemma",
"",
0,
[
[
"",
[
"romanization"
]
]
],
0,
""
],
[
"hashiru",
"",
"non-lemma",
"",
0,
[
[
"走る",
[
"romanization"
]
]
],
0,
""
],
[
"走り",
"",
Expand Down Expand Up @@ -3505,40 +3448,6 @@
0,
""
],
[
"goshiki",
"",
"non-lemma",
"",
0,
[
[
"五色",
[
"romanization"
]
]
],
0,
""
],
[
"gosiki",
"",
"non-lemma",
"",
0,
[
[
"五色",
[
"romanization"
]
]
],
0,
""
],
[
"ごしき",
"",
Expand All @@ -3557,40 +3466,6 @@
0,
""
],
[
"goshoku",
"",
"non-lemma",
"",
0,
[
[
"五色",
[
"romanization"
]
]
],
0,
""
],
[
"gosyoku",
"",
"non-lemma",
"",
0,
[
[
"五色",
[
"romanization"
]
]
],
0,
""
],
[
"ごしよく",
"",
Expand All @@ -3608,22 +3483,5 @@
],
0,
""
],
[
"onaka ga suita",
"",
"non-lemma",
"",
0,
[
[
"お腹が空いた",
[
"romanization"
]
]
],
0,
""
]
]
47 changes: 1 addition & 46 deletions data/test/tidy/fa-en-forms-0.json
Original file line number Diff line number Diff line change
@@ -1,49 +1,4 @@
{
"_type": "map",
"map": [
[
"ملک",
{
"_type": "map",
"map": [
[
"molk",
{
"_type": "map",
"map": [
[
"noun",
[
"romanization"
]
]
]
}
]
]
}
],
[
"فارْسی",
{
"_type": "map",
"map": [
[
"fârsi",
{
"_type": "map",
"map": [
[
"noun",
[
"romanization"
]
]
]
}
]
]
}
]
]
"map": []
}
Loading

0 comments on commit 87102b9

Please sign in to comment.