Skip to content

Commit

Permalink
[prakriya] Increase Kaumudi test coverage
Browse files Browse the repository at this point in the history
Quality:
- Substantially increase test coverage in Kaumudi 43 - 52.
- Add around 70 new tests.
- Model varttikas explicitly and consistently with the `Varttika`
  variant.
- Add varttikas and Dhatupatha gana-sutras to `data/`.
- Fix various small typos in `dhatupatha.tsv`.
- Increase performance by around 10%. We did this mainly by reducing use
  of `CompactString` in routines that focus on string manipulation and
  avoiding an unnecessary allocation in the `it_samjna` rules.
- Add broad support for kaNDvAdi-dhAtus.
- Add better support for iT-Agama rules with liT.

API:
- Upgrade wasm API to more richly model the effects of a rule.
- Update web frontend to hide empty terms and show varttika text.

Development:
- Add `create_all_tinantas` script to generate all tinantas and write
  them to one file.
  • Loading branch information
akprasad committed Dec 11, 2023
1 parent b39418f commit 2eb4434
Show file tree
Hide file tree
Showing 81 changed files with 3,612 additions and 1,148 deletions.
1 change: 1 addition & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

119 changes: 113 additions & 6 deletions vidyut-lipi/src/lib.rs
Original file line number Diff line number Diff line change
@@ -1,17 +1,19 @@
//! Hacky transliteration functions that other crates might need.
#![doc = include_str!("../README.md")]
#![deny(missing_docs)]
#![deny(clippy::unwrap_used)]

//! Hacky transliteration functions that other crates might need.
use std::cmp;

/// Defines the available transliteration schemes.
#[derive(Clone, Copy, Debug, PartialEq, Eq)]
#[derive(Clone, Copy, Debug, Hash, Eq, PartialEq)]
pub enum Scheme {
/// SlP1 transliteration.
Slp1,
/// IAST transliteration.
Iast,
/// Devanagari.
Devanagari,
}

fn map_char(cur: &str) -> Option<&'static str> {
Expand Down Expand Up @@ -87,13 +89,118 @@ fn iast_to_slp1(input: &str) -> String {
ret
}

fn slp1_to_devanagari(text: &str) -> String {
const VIRAMA: char = '\u{094D}';

let mut ret = String::new();
for c in text.chars() {
let out = match c {
'a' => "अ",
'A' => "आ",
'i' => "इ",
'I' => "ई",
'u' => "उ",
'U' => "ऊ",
'f' => "ऋ",
'F' => "ॠ",
'x' => "ऌ",
'X' => "ॡ",
'e' => "ए",
'E' => "ऐ",
'o' => "ओ",
'O' => "औ",
'~' => "\u{0901}",
'M' => "\u{0902}",
'H' => "\u{0903}",
'k' => "क",
'K' => "ख",
'g' => "ग",
'G' => "घ",
'N' => "ङ",
'c' => "च",
'C' => "छ",
'j' => "ज",
'J' => "झ",
'Y' => "ञ",
'w' => "ट",
'W' => "ठ",
'q' => "ड",
'Q' => "ढ",
'R' => "ण",
't' => "त",
'T' => "थ",
'd' => "द",
'D' => "ध",
'n' => "न",
'p' => "प",
'P' => "फ",
'b' => "ब",
'B' => "भ",
'm' => "म",
'y' => "य",
'r' => "र",
'l' => "ल",
'v' => "व",
'S' => "श",
'z' => "ष",
's' => "स",
'h' => "ह",
'L' => "ळ",
other => {
ret.push(other);
continue;
}
};

let vowel_mark = match c {
'a' => Some(""),
'A' => Some("\u{093E}"),
'i' => Some("\u{093F}"),
'I' => Some("\u{0940}"),
'u' => Some("\u{0941}"),
'U' => Some("\u{0942}"),
'f' => Some("\u{0943}"),
'F' => Some("\u{0944}"),
'x' => Some("\u{0962}"),
'X' => Some("\u{0963}"),
'e' => Some("\u{0947}"),
'E' => Some("\u{0948}"),
'o' => Some("\u{094B}"),
'O' => Some("\u{094C}"),
_ => None,
};

if ret.chars().last() == Some(VIRAMA) && vowel_mark.is_some() {
// Pop virama and add.
ret.pop();
ret += vowel_mark.expect("ok");
} else {
ret += out;
}

let is_consonant = "kKgGNcCjJYwWqQRtTdDnpPbBmyrlvSzshL".contains(c);
if is_consonant {
ret.push(VIRAMA);
}
}
ret
}

/// Transliterates the given input text.
///
/// Only the IAST -> SLP1 mapping is defined. All other mappings will panic.
/// ### Panics
///
/// Only the IAST -> SLP1 and SLP1 -> Devanagari mappings are defined. All other mappings will
/// panic.
pub fn transliterate(input: &str, from: Scheme, to: Scheme) -> String {
assert!(from == Scheme::Iast);
assert!(to == Scheme::Slp1);
iast_to_slp1(input)
use Scheme::*;
if from == Iast && to == Slp1 {
iast_to_slp1(input)
} else if from == Slp1 && to == Devanagari {
slp1_to_devanagari(input)
} else {
panic!("Unsupported scheme combination: {from:?} -> {to:?}")
}
}

#[cfg(test)]
Expand Down
1 change: 1 addition & 0 deletions vidyut-prakriya/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@ console_error_panic_hook = "0.1.7"

[dev-dependencies]
test_utils = { path = "test_utils" }
vidyut-lipi = { path = "../vidyut-lipi" }

[lib]
crate-type = ["cdylib", "rlib"]
24 changes: 12 additions & 12 deletions vidyut-prakriya/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -53,40 +53,40 @@ test_tinantas:
cargo build --release
../target/release/test_tinantas \
--test-cases test-files/tinantas-basic-kartari.csv \
--hash "ff57ac933f1a44a21285448f1b064d1b4464a7aee650981cb30244c8ee3ed111"
--hash "28293a6aa746f04dff1a5c3b822396c236df269460d691bd137ba01a2c80557f"
../target/release/test_tinantas \
--test-cases test-files/tinantas-nic-kartari.csv \
--hash "263fa8bc08eeb912ffe4daa876452a1bea1a4117620d4e3d86aa876f03795140"
--hash "5d508a8b3a52ebe92a1524b0b51bbed90e27301f265669aa8e7a887d55bf1eff"
../target/release/test_tinantas \
--test-cases test-files/tinantas-san-kartari.csv \
--hash "c9c2c2e79eecf4a6bbe105537ceb64f77004ce3ff3b53a07a4ac410f0131de0e"
--hash "ef278531cd45ba1df9af60e5b4b8ef582fdbe7a7eb31478fe35b9600c42380fc"
../target/release/test_tinantas \
--test-cases test-files/tinantas-yan-kartari.csv \
--hash "585c83fa6a241d7f31d2b12e7382e65d05b3f7703edaa1609ee3617810aa74d2"
--hash "be90fccc6f3907862cf820969094fd958b44b58a447a7a6369edbecde75d8cd4"
../target/release/test_tinantas \
--test-cases test-files/tinantas-yan-luk-kartari.csv \
--hash "9168fb70bce7903708004da407dfe1c14aedc199ab770c2ee524b7f573233cfc"
--hash "aec7b4524b159e195e38ce4b0e5e1335940b60cb41606ba31d1b9fbd010f7d11"
../target/release/test_tinantas \
--test-cases test-files/tinantas-basic-karmani.csv \
--hash "63d076bf9d8b88d7a03f224626410ec2be751f2b50ef0f0777c5cddcecad3dfd"
--hash "b3621fe5bf2295f2f1d15270aa26b9fc973450126cd4a551e53f91571e4d531c"
../target/release/test_tinantas \
--test-cases test-files/tinantas-nic-karmani.csv \
--hash "2b9aa1158d26ce7a9da28f7ec8685ce61554daa3f8434770aec3e16e000eefe2"
--hash "f4760cd6da0496f6db82f1812ba168a18e8cde4e5dffafee815e1e88d19abb36"
../target/release/test_tinantas \
--test-cases test-files/tinantas-san-karmani.csv \
--hash "e60669056aec2124c27c8d60f3cebe19b75e11d193e87522458be06291db221f"
--hash "089160604d7a5a40f8f71e1234cccc3667135b3e2654c4ac346677727e5aae80"
../target/release/test_tinantas \
--test-cases test-files/tinantas-yan-karmani.csv \
--hash "7e0ed1e074eff8eb104dc88b8cf7525d1e28b8686bb82dccb9d036a4696caed7"
--hash "260a684b78e4e66a68f89366c07850d1680a72424e2934392678359289a1916b"

test_krdantas:
cargo build --release
../target/release/test_krdantas \
--test-cases test-files/krdantas-ktvA.csv \
--hash "6d480b70491153eeac8958d5f66c32e4e3e428f54c6e0fc2d75154889642ef7b"
--hash "59470473bec6cb2d2254217d604a2302c6ee8926571441b3281004a30abc81ce"
../target/release/test_krdantas \
--test-cases test-files/krdantas-kta.csv \
--hash "bb99612ddcc34a66aff16553ed325f8b1a09d4bfcc920f86d312c959c6b1800b"
--hash "2ce641122f4c6ac243acb348d376f35586c5f960ea338b9b1f7b79f0f72e3c12"

test_subantas:
cargo run --bin test_subantas -- \
Expand All @@ -102,7 +102,7 @@ check_rule_coverage:
# Profiles the program's execution time on OSX. This command will probably not
# work on other operating systems.
profile-time-osx:
cargo instruments -t time --release --bin create_test_file
cargo instruments -t time --release --example create_all_tinantas > /dev/null


# Other
Expand Down
18 changes: 18 additions & 0 deletions vidyut-prakriya/data/dhatupatha-ganasutras.tsv
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
01.0933_GawAdayo mitaH
01.0934_janIjFzkanasuraYjo'mantASca
01.0935_jvalahvalahmalanamAmanupasargAdvA
01.0936_glAsnAvanuvamAM ca
01.0937_na kamyamicamAm
01.0938_Samo darSane
01.0939_yamo'parivezaRe
01.0940_sKadiravapariByAM ca
02.0076_carkarItaM ca
04.0162_svAdaya oditaH
10.0493_jYapAdayo mitaH
10.0494_nAnye mito'hetO
10.0495_kusma nAmno vA
10.0496_A kusmAdAtmanepadinaH
10.0497_A garvAdAtmanepadinaH
10.0498_A DfzAdvA
10.0499_A svadaH sakarmakAt
10.0500_hantyarTASca
10 changes: 5 additions & 5 deletions vidyut-prakriya/data/dhatupatha.tsv
Original file line number Diff line number Diff line change
Expand Up @@ -781,7 +781,7 @@ code dhatu artha
01.0780 Iza~ uYCe
01.0781 kaza~ hiMsAyAm
01.0782 Kaza~ hiMsAyAm
01.0783 Siza~ hiMsAyAm
01.0783 Si\\za~ hiMsAyAm
01.0784 jaza~ hiMsAyAm
01.0785 Jaza~ hiMsAyAm
01.0786 Saza~ hiMsAyAm
Expand Down Expand Up @@ -1246,7 +1246,7 @@ code dhatu artha
03.0002 YiBI\ Baye
03.0003 hrI\ lajjAyAm
03.0004 pF pAlanapUraRayoH
03.0005 pf pAlanapUraRayoH
03.0005 pf\ pAlanapUraRayoH
03.0006 quBf\Y DAraRapozaRayoH
03.0007 mA\N mAne Sabde ca
03.0008 o~hA\N gatO
Expand Down Expand Up @@ -1785,7 +1785,7 @@ code dhatu artha
10.0034 kuwwa~ CedanaBartsanayoH
10.0035 puwwa~ alpIBAve
10.0036 cuwwa~ alpIBAve
10.0037 awwa~ anAdare
10.0037 adwa~ anAdare
10.0038 zuwwa~ anAdare
10.0039 lunwa~ steye
10.0040 lunWa~ steye
Expand All @@ -1810,7 +1810,7 @@ code dhatu artha
10.0059 Sliza~ SlezaRe
10.0060 paTi~ gatO
10.0061 piCa~ kuwwane
10.0062 Cada~ saMvaraRe
10.0062 Cadi~ saMvaraRe
10.0063 SraRa~ dAne
10.0064 taqa~ AGAte
10.0065 Kaqa~ Bedane
Expand Down Expand Up @@ -2086,7 +2086,7 @@ code dhatu artha
10.0335 ruja~ hiMsAyAm
10.0336 zvada~ AsvAdane
10.0337 svAda~ AsvAdane
10.0338 yuja~ saMyamane
10.0338 yu\ja~ saMyamane
10.0339 pfca~ saMyamane
10.0340 arca~ pUjAyAm
10.0341 zaha~ marzaRe
Expand Down
93 changes: 93 additions & 0 deletions vidyut-prakriya/data/varttikas.tsv
Original file line number Diff line number Diff line change
@@ -0,0 +1,93 @@
1.1.33.1 viBAzAprakaraRe tIyasya NitsUpasaNKyAnam
1.2.1.1 vyaceH kuwAditvamanasIti vaktavyam
1.2.6.1 SranTigranTidamBisvaYjInAmiti vaktavyam
1.3.3.1 ira itsaMjJA vAcyA
1.3.22.1 ANaH pratijYAyAm
1.3.29.1 dfSeSceti vaktavyam
1.3.30.1 upasargAdasyatyUhrorvA vacanam
1.4.60.3 duraH zatvaRatvayorupasargatvapratizeDo vaktavyaH
1.4.65.1 antaHSabdasya aNkiviDiRatvezu upasargasaMjYA vaktavyA
2.1.37.1 BayaBItaBItiBIBiriti vaktavyam
2.4.45.1 iRvadika iti vaktavyam
2.4.54.1 varjane pratizeDo vaktavyaH
2.4.56.2 valAdAvArDaDAtuke vezyate
3.1.11.1 ojaso'psaraso nityamitarezAM viBAzayA
3.1.14.1 satrakazwakakzakfcCragahaneByaH kaRvacikIrzAyAm iti vaktavyam
3.1.15.2 tapasaH parasmEpadaM ca
3.1.16.1 PenAcceti vaktavyam
3.1.17.1 sudinadurdinanIhareByaSceti vaktavyam
3.1.17.2 awAwwASIkAkowApowAsowApruzwApluzwAgrahaRaM kartavyam
3.1.22.1 sUcisUtramUtryawyartyaSUrRotInAM grahaRaM
3.1.25.1 arTavedasatyAnAmApugvaktavyaH
3.1.35.1 kAsyanekAca iti vaktavyam
3.1.36.1 UrRoteSca pratizeDo vaktavyaH
3.1.44.1 spfSamfSakfzatfpadfpAM cleH sijvA vaktavyaH
3.1.48.1 RiSridrusruzu kamerupasaMKyAnaM kartavyam
3.1.96.1 vasestavyat kartari Ricca
3.1.96.2 kelimara upasaNKyAnam
3.1.97.1 takiSasicatiyatijanInAmupasaNKyAnam
3.1.97.2 hano vA vaDa ca
3.1.100.1 carerANi cAgurO
3.2.8.1 pibateH surASIDvoriti vaktavyam
3.2.9.1 SaktilANgalANkuSayazwitomaraGawaGawIDanuzzu graherupasaNKyAnam
3.2.9.2 sUtre ca DAryarTe
3.2.60.1 samAnAnyayoSceti vaktavyam
3.2.174.1 krukannapi vaktavyaH
3.2.178.1 kvibvacipracCAyatastukawaprujuSrIRAM dIrGo'saMprasAraRaM ca
3.3.125.1 KanerqaqarekekavakA vAcyAH
4.1.6.1 DAtorugitaH pratizeDaH
4.1.68.1 SvaSurasyokArAkAralopaSca
4.1.97.1 vyAsavaruqanizAdacaRqAlabimbAnAm iti vaktavyam
4.1.128.1 cawakAc ceti vaktavyam
4.2.28.1 CaprakaraRe pENgAkzIputrAdiBya upasaNKyAnam
4.2.28.2 SatarudrAcCaSca GaSca
4.2.40.1 gaRikAyASca yaY vaktavyaH
4.2.42.1 pfzWAdupasaNKyAnam
4.2.43.1 gajAcceti vaktavyam
4.2.93.1 avArapArAdvigfhItAdapi viparItAcceti vaktavyam
4.4.41.1 aDarmAcceti vaktavyam
4.4.49.1 narAcceti vaktavyam
5.1.6.1 yatprakaraRe raTAcca
5.1.33.1 kevalAyASceti vaktavyam
5.1.38.2 saMnipAtAcceti vaktavyam
5.1.74.1 kroSaSatayojanaSatayorupasaNKyAnam
5.1.77.1 AhftaprakaraRe vArijaNgalasTalkAntArapUrvapadAdupasaNKyAnam
5.1.77.2 ajapaTaSaNkupaTAByAM copasaNKyAnam
5.1.77.3 maDukamaricayoraR sTalAt
5.2.29.1 alAbUtilomABaNgAByo rajasyupasaNKyAnam
5.2.112.1 anyeByo'pi dfSyata iti vaktavyam
5.4.3.1 caYcabfhayorupasaNKyAnam
5.4.78.1 palyarAjaByAM ca
6.1.3.3 Irzyates tftIyasya dve Bavata iti vaktavyam
6.1.64.1 subDAtuzWivuzvazkatInAM satvapratizeDo vaktavyaH
6.1.64.2 zWivu ityasya dvitIyasTakArazWakAraSca izyate
6.3.11.1 antAcca
6.3.118.1 anutsAhaBrAtfpitFRAm ityeva
6.4.16.1 gameriNAdeSasyeti vaktavyam
6.4.24.2 raYjerRO mfgaramaRa upasaNKyAnaM kartavyam
6.4.24.3 GinuRi ca raYjerupasaNKyAnaM kartavyam
6.4.34.1 kvO ca SAsa ittvaM BavatIti vaktavyam
6.4.34.2 kvippratyaye tu tasya api BavatIti vaktavyam
6.4.84.1 dfnkarapunaH pUrvasya Buvo yaRvaktavyaH
6.4.114.1 daridrAterArDaDAtuke lopo vaktavyaH
6.4.114.2 adyatanyAM veti vaktavyam
6.4.120.1 damBeretvaM vaktavyam
6.4.122.1 SranTeSceti vaktavyam
6.4.144.1 nAnatasya wilope sabrahmacAripIWasarpikalApikuTumitEtilijAjalilANgaliSilAliSiKaRqisUkarasadmasuparvaRAmupasaNKyAnaM kartavyam
6.4.148.1 yasyetyONaH SyAM pratizeDo vaktavyaH
6.4.155.1 RAvizWavat prAtipadikasya kAryaM BavatIti vaktavyam
7.1.26.1 ekatarAtpratizeDo vaktavyaH
7.1.59.1 Se tfmPAdInAmupasaNKyAnaM kartavyam
7.1.90.1 oto Riditi vAcyam
7.2.49.1 tanipatidaridrARAmupasaNKyAnam
7.2.68.1 dfSeSceti vaktavyam
7.3.34.1 anAcamikamivamInAm iti vaktavyam
7.3.37.1 lugAgamastu tasya vaktavyaH
7.3.37.2 DUYprIYornugvaktavyaH
7.4.3.1 kARyAdInAM ceti vaktavyam
7.4.30.1 hanterhisAyAM yaNi GnIBAvo vaktavyaH
7.4.54.1 sani rADo hiMsAyAmaca is vaktavyaH
7.4.90.1 rIgftvata iti vaktavyam
8.2.8.1 NAvuttarapade pratizeDasya pratizeDo vaktavyaH
8.2.8.3 vA napuMsakAnAm iti vaktavyam
8.3.118.1 svaYjerupasaNKyAnaM kartavyam
Loading

0 comments on commit 2eb4434

Please sign in to comment.