yomidevs · StefanVukovic99 · Jan 3, 2025 · Jan 2, 2025 · Jan 2, 2025 · Jan 3, 2025
diff --git a/.gitignore b/.gitignore
@@ -16,6 +16,7 @@
 !data/test/ipa/**/*.json
 
 *.zip
+*.gz
 data/**/*.css
 !data/styles.css
 

diff --git a/3-tidy-up.js b/3-tidy-up.js
@@ -1,4 +1,4 @@
-const { writeFileSync } = require('fs');
+const { writeFileSync, readdirSync, unlinkSync } = require('fs');
 
 const LineByLineReader = require('line-by-line');
 
@@ -125,7 +125,7 @@ lr.on('line', (line) => {
  * @param {KaikkiLine} parsedLine 
  */
 function handleLine(parsedLine) {
-    const { pos, sounds, forms, etymology_number = 0 } = parsedLine;
+    const { pos, sounds, forms, etymology_number = 0, etymology_text} = parsedLine;
     if(!pos) return;
     const word = getCanonicalWordForm(parsedLine);
     if (!word) return;
@@ -209,6 +209,33 @@ function handleLine(parsedLine) {
         saveIpaResult(word, readings, pos, String(etymology_number), ipaObj);
     }
 
+    for (const reading of readings) {
+        const currentEntry = lemmaDict[word][reading][pos][etymology_number];
+
+        if (etymology_text) {
+            const morphemeText = getMorphemes(etymology_text);
+
+            if (targetIso === 'en' && morphemeText) {
+                if (morphemeText === etymology_text) {
+                    currentEntry.morpheme_text = morphemeText;
+                } else {
+                    currentEntry.etymology_text = etymology_text;
+                    currentEntry.morpheme_text = morphemeText;
+                }
+            } else {
+                currentEntry.etymology_text = etymology_text;
+            }
+        }
+
+        if (head_templates) {
+            const headInfo = getHeadInfo(head_templates);
+
+            if (headInfo) {
+                lemmaDict[word][reading][pos][etymology_number].head_info_text = headInfo;
+            }
+        }
+    }
+
     const glossTree = getGlossTree(sensesWithoutInflectionGlosses);
 
     for (const reading of readings) {
@@ -229,6 +256,32 @@ function handleLine(parsedLine) {
 
 }
 
+/**
+ * @param {string} text
+ * @returns {string}
+ * */
+function getMorphemes(text) {
+    for (const part of text.split(/(?<=\.)/g).map(item => item.trim())) {
+        if (part.includes(' + ') && !/Proto|Inherited from/.test(part)) { return part; }
+    }
+
+    return '';
+}
+
+/**
+ * @param {HeadTemplate[]} head_templates
+ * @returns {string}
+ * */
+function getHeadInfo(head_templates) {
+    for (const entry of head_templates) {
+        if (entry.expansion) {
+            if (/(?<=\().+?(?=\))/.test(entry.expansion)) return entry.expansion;
+        }
+    }
+
+    return '';
+}
+
 /**
  * @param {Example} example
  * @returns {StandardizedExample}
@@ -638,6 +691,12 @@ lr.on('end', () => {
     clearConsoleLine();
     process.stdout.write(`Processed ${lineCount} lines...\n`);
 
+    for (const file of readdirSync(writeFolder)) {
+        if (file.includes(`${sourceIso}-${targetIso}`)) {
+            unlinkSync(`${writeFolder}/${file}`);
+        }
+    }
+
     const lemmasFilePath = `${writeFolder}/${sourceIso}-${targetIso}-lemmas.json`;
     consoleOverwrite(`3-tidy-up.js: Writing lemma dict to ${lemmasFilePath}...`);
     writeFileSync(lemmasFilePath, JSON.stringify(lemmaDict, mapJsonReplacer));

diff --git a/4-make-yomitan.js b/4-make-yomitan.js
@@ -145,6 +145,66 @@ function getStructuredExamples(examples) {
     });
 }
 
+/**
+ * @param {string} type
+ * @param {string} content
+ * @returns {import('types').TermBank.StructuredContent}
+ */
+function buildDetailsEntry(type, content) {
+    return {
+        "tag": "details",
+        "data": {
+            "content": `details-entry-${type}`
+        },
+        "content": [
+            {
+                "tag": "summary",
+                "data": {
+                    "content": "summary-entry"
+                },
+                "content": type
+            },
+            {
+                "tag": "div",
+                "data": {
+                    "content": `${type}-content`
+                },
+                "content": content
+            }
+        ]
+    };
+}
+
+/**
+ * @param {LemmaInfo} info 
+ * @returns {import('types').TermBank.StructuredContent}
+ */
+function getStructuredDetails(info) {
+    const result = [];
+
+    const {
+        etymology_text: etymology,
+        morpheme_text: morphemes,
+        head_info_text: headInfo
+    } = info;
+
+    for (const [title, content] of [
+        ['mophemes', morphemes],
+        ['etymology', etymology],
+        ['head-info', headInfo],
+    ]) {
+        if (title && content) result.push(buildDetailsEntry(title, content));
+    }
+
+    return {
+        "tag": "div",
+        "data": {
+            "content": "details-section"
+        },
+        "content": [...result]
+    };
+}
+
 /**
  * @param {GlossTwig} glossTwig
  * @param {string[]} senseTags
@@ -315,6 +375,20 @@ let lastTermBankIndex = 0;
 
                     debug(entries);
                     for (const [tags, entry] of Object.entries(entries)) {
+                        if (info.etymology_text || info.head_info_text || info.morpheme_text) {
+                            const lastDef = entry[5][entry[5].length - 1];
+
+                            if (
+                                lastDef &&
+                                typeof lastDef === 'object' &&
+                                'type' in lastDef &&
+                                lastDef.type === 'structured-content' &&
+                                Array.isArray(lastDef.content)
+                            ) {
+                                lastDef.content.push(getStructuredDetails(info));
+                            }
+                        }
+
                         ymtLemmas.push(entry);
                     }
                 }

diff --git a/data/styles.css b/data/styles.css
@@ -17,4 +17,32 @@ div[data-sc-content="example-sentence-a"] {
 }
 div[data-sc-content="example-sentence-b"] {
     font-size: 0.8em;
+}
+div[data-sc-content="details-section"] {
+    margin: 0.25em 0;
+}
+details[data-sc-content^="details-entry"] {
+    padding-left: 0;
+}
+summary[data-sc-content="summary-entry"] {
+    user-select: none;
+    width: max-content;
+}
+ul.gloss-list[data-count="1"] summary[data-sc-content="summary-entry"] {
+    list-style-position: inside;
+}
+summary[data-sc-content="summary-entry"]::marker {
+    color: var(--checkbox-disabled-color);
+}
+summary[data-sc-content="summary-entry"] {
+    color: var(--text-color-light4);
+}
+details[data-sc-content^="details-entry"][open=""] summary[data-sc-content="summary-entry"] {
+    color: var(--text-color);
+}
+summary[data-sc-content="summary-entry"]:hover {
+    cursor: pointer;
+}
+summary[data-sc-content="summary-entry"] ~ div {
+    margin: 0.5em 0;
 }
diff --git a/data/test/dict/cs/en/term_bank_1.json b/data/test/dict/cs/en/term_bank_1.json
@@ -137,6 +137,36 @@
                 }
               }
             ]
+          },
+          {
+            "tag": "div",
+            "data": {
+              "content": "details-section"
+            },
+            "content": [
+              {
+                "tag": "details",
+                "data": {
+                  "content": "details-entry-etymology"
+                },
+                "content": [
+                  {
+                    "tag": "summary",
+                    "data": {
+                      "content": "summary-entry"
+                    },
+                    "content": "etymology"
+                  },
+                  {
+                    "tag": "div",
+                    "data": {
+                      "content": "etymology-content"
+                    },
+                    "content": "Deverbal from zpravit."
+                  }
+                ]
+              }
+            ]
           }
         ]
       }
@@ -187,6 +217,36 @@
                 }
               }
             ]
+          },
+          {
+            "tag": "div",
+            "data": {
+              "content": "details-section"
+            },
+            "content": [
+              {
+                "tag": "details",
+                "data": {
+                  "content": "details-entry-etymology"
+                },
+                "content": [
+                  {
+                    "tag": "summary",
+                    "data": {
+                      "content": "summary-entry"
+                    },
+                    "content": "etymology"
+                  },
+                  {
+                    "tag": "div",
+                    "data": {
+                      "content": "etymology-content"
+                    },
+                    "content": "Inherited from Old Czech pro, from Proto-Slavic *pro."
+                  }
+                ]
+              }
+            ]
           }
         ]
       }
@@ -209,6 +269,36 @@
             "content": [
               "(reflexive with se) to dispute"
             ]
+          },
+          {
+            "tag": "div",
+            "data": {
+              "content": "details-section"
+            },
+            "content": [
+              {
+                "tag": "details",
+                "data": {
+                  "content": "details-entry-etymology"
+                },
+                "content": [
+                  {
+                    "tag": "summary",
+                    "data": {
+                      "content": "summary-entry"
+                    },
+                    "content": "etymology"
+                  },
+                  {
+                    "tag": "div",
+                    "data": {
+                      "content": "etymology-content"
+                    },
+                    "content": "Inherited from Old Czech přieti, from Proto-Slavic *pьrěti."
+                  }
+                ]
+              }
+            ]
           }
         ]
       }
-Original file line number
+Diff line change
@@ Expand Up / @@ -16,6 +16,7 @@ @@
     !data/test/ipa/**/*.json
     *.zip
+    *.gz
     data/**/*.css
     !data/styles.css
@@ Expand Down @@