Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

add etymology entries to definitions #183

Closed
wants to merge 2 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
39 changes: 37 additions & 2 deletions 3-tidy-up.js
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
const { writeFileSync } = require('fs');
const { writeFileSync, readdirSync, unlinkSync } = require('fs');

const LineByLineReader = require('line-by-line');

Expand Down Expand Up @@ -125,7 +125,7 @@ lr.on('line', (line) => {
* @param {KaikkiLine} parsedLine
*/
function handleLine(parsedLine) {
const { pos, sounds, forms, etymology_number = 0 } = parsedLine;
const { pos, sounds, forms, etymology_number = 0, etymology_text } = parsedLine;
if(!pos) return;
const word = getCanonicalWordForm(parsedLine);
if (!word) return;
Expand Down Expand Up @@ -209,6 +209,20 @@ function handleLine(parsedLine) {
saveIpaResult(word, readings, pos, String(etymology_number), ipaObj);
}

for (const reading of readings) {
if (etymology_text) {
const breakdown = breakdownEtymology(etymology_text);

if (
targetIso === 'en' && breakdown
) {
lemmaDict[word][reading][pos][etymology_number].breakdown_text = breakdown;
}

lemmaDict[word][reading][pos][etymology_number].etymology_text = etymology_text;
}
}

const glossTree = getGlossTree(sensesWithoutInflectionGlosses);

for (const reading of readings) {
Expand All @@ -229,6 +243,21 @@ function handleLine(parsedLine) {

}

/**
* @param {string} text
* @returns {string}
* */
function breakdownEtymology(text) {
for (const part of text.split(/;|\.|\*/g).map(item => item.trim())) {
if (part.includes(' + ') && !part.includes('Proto')) {
return part
.replace('By surface analysis, ', '')
}
}

return '';
}

/**
* @param {Example} example
* @returns {StandardizedExample}
Expand Down Expand Up @@ -638,6 +667,12 @@ lr.on('end', () => {
clearConsoleLine();
process.stdout.write(`Processed ${lineCount} lines...\n`);

for (const file of readdirSync(writeFolder)) {
if (file.includes(`${sourceIso}-${targetIso}`)) {
unlinkSync(`${writeFolder}/${file}`);
}
}

const lemmasFilePath = `${writeFolder}/${sourceIso}-${targetIso}-lemmas.json`;
consoleOverwrite(`3-tidy-up.js: Writing lemma dict to ${lemmasFilePath}...`);
writeFileSync(lemmasFilePath, JSON.stringify(lemmaDict, mapJsonReplacer));
Expand Down
69 changes: 69 additions & 0 deletions 4-make-yomitan.js
Original file line number Diff line number Diff line change
Expand Up @@ -145,6 +145,61 @@ function getStructuredExamples(examples) {
});
}

/**
* @param {String} attribute
* @param {import('types').TermBank.StructuredContent} content
* @returns {import('types').TermBank.StructuredContent}
*/
function getStructuredEtymSpan(attribute, content) {
return {
"tag": "span",
"data": {
"content": attribute
},
"content": content
}
}

/**
* @param {String} etymology
* @param {String} breakdown
* @returns {import('types').TermBank.StructuredContent}
*/
function getStructuredEtym(etymology, breakdown='') {
/** @type {import('types').TermBank.StructuredContentNode[]} */
const result = [];

if (breakdown && etymology.includes(breakdown)) {
result.push(getStructuredEtymSpan('target-text', '📝 '));

if (breakdown === etymology) {
result.push(getStructuredEtymSpan('target-text', breakdown));
} else {
const [before, after] = etymology.split(breakdown);

if (before) result.push(getStructuredEtymSpan('normal-text', before));

result.push(getStructuredEtymSpan('target-text', breakdown));

if (after) result.push(getStructuredEtymSpan('normal-text', after));
}
}

return {
"tag": "div",
"data": {
"content": "extra-info"
},
"content": {
"tag":"div",
"data": {
"content": "etymology-entry"
},
"content": result.length > 0 ? [...result] : `📝 ${etymology}`
}
}
}

/**
* @param {GlossTwig} glossTwig
* @param {string[]} senseTags
Expand Down Expand Up @@ -315,6 +370,20 @@ let lastTermBankIndex = 0;

debug(entries);
for (const [tags, entry] of Object.entries(entries)) {
if (info.etymology_text) {
const lastDef = entry[5][entry[5].length -1];

if (
lastDef &&
typeof lastDef === 'object' &&
'type' in lastDef &&
lastDef.type === 'structured-content' &&
Array.isArray(lastDef.content)
) {
lastDef.content.push(getStructuredEtym(info.etymology_text, info.breakdown_text));
}
}

ymtLemmas.push(entry);
}
}
Expand Down
11 changes: 9 additions & 2 deletions data/styles.css
Original file line number Diff line number Diff line change
@@ -1,7 +1,8 @@
div[data-sc-content="extra-info"] {
margin-left: 0.5em;
}
div[data-sc-content="example-sentence"] {
div[data-sc-content="example-sentence"],
div[data-sc-content="etymology-entry"] {
background-color: color-mix(in srgb, var(--text-color, var(--fg, #333)) 5%, transparent);
border-color: var(--text-color, var(--fg, #333));
border-style: none none none solid;
Expand All @@ -15,6 +16,12 @@ div[data-sc-content="example-sentence-a"] {
font-size: 1.1em;
font-style: italic;
}
div[data-sc-content="example-sentence-b"] {
div[data-sc-content="example-sentence-b"],
div[data-sc-content="etymology-entry"] {
font-size: 0.8em;
}
div[data-sc-content="etymology-entry"] {
opacity: 0.8;
border-color: #c1c1c1;
display: none;
}
39 changes: 39 additions & 0 deletions data/test/dict/cs/en/term_bank_1.json
Original file line number Diff line number Diff line change
Expand Up @@ -137,6 +137,19 @@
}
}
]
},
{
"tag": "div",
"data": {
"content": "extra-info"
},
"content": {
"tag": "div",
"data": {
"content": "etymology-entry"
},
"content": "📝 Deverbal from zpravit."
}
}
]
}
Expand Down Expand Up @@ -187,6 +200,19 @@
}
}
]
},
{
"tag": "div",
"data": {
"content": "extra-info"
},
"content": {
"tag": "div",
"data": {
"content": "etymology-entry"
},
"content": "📝 Inherited from Old Czech pro, from Proto-Slavic *pro."
}
}
]
}
Expand All @@ -209,6 +235,19 @@
"content": [
"(reflexive with se) to dispute"
]
},
{
"tag": "div",
"data": {
"content": "extra-info"
},
"content": {
"tag": "div",
"data": {
"content": "etymology-entry"
},
"content": "📝 Inherited from Old Czech přieti, from Proto-Slavic *pьrěti."
}
}
]
}
Expand Down
26 changes: 26 additions & 0 deletions data/test/dict/de/de/term_bank_1.json
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,19 @@
}
}
]
},
{
"tag": "div",
"data": {
"content": "extra-info"
},
"content": {
"tag": "div",
"data": {
"content": "etymology-entry"
},
"content": "📝 von arabisch/persisch ruh, roh entlehnt, = arabisch: الرُخّ (ar-ruchch, aus dem Persischen)"
}
}
]
}
Expand Down Expand Up @@ -259,6 +272,19 @@
}
}
]
},
{
"tag": "div",
"data": {
"content": "extra-info"
},
"content": {
"tag": "div",
"data": {
"content": "etymology-entry"
},
"content": "📝 seit dem 20. Jahrhundert bezeugte Entlehnung aus gleichbedeutendem französisch garage ^(→ fr) m, eigentlich „(das) Ausweichen, Ausweichstelle“; dieses ist eine deverbative Ableitung von französisch garer ^(→ fr) „in eine sichere Verwahrstelle bringen; in Sicherheit bringen; ausweichen“, das seinerseits aus okzitanisch garar ^(→ oc) „Acht geben, bewahren“ übernommen wurde; dieses entstammt entweder mit Übergang von w- zu g- der (nicht belegbaren, aber rekonstruierten) germanischen Form *war-ō- „beachten“ (vergleiche »wahren«) oder dem lateinischen varāre ^(→ la) „ausweichen“ (zu lateinisch vārus ^(→ la) „auseinandergebogen“)"
}
}
]
}
Expand Down
Loading
Loading