-
-
Notifications
You must be signed in to change notification settings - Fork 21
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
[prakriya] Use the full Ganapatha with metadata
This resolves GitHub issues #160, #161, and #162. CONTEXT Various rules in the Ashtadhyayi refer to one or more *ganas*, collections of stems and other linguistic data that enumerate the items that can trigger the rule. Our previous approach used a small number of manually curated ganas that we then used in the appropriate rules. But this gana data was ultimately just copied from the data on ashtadhyayi.com, perhaps with a few tweaks along the way. Given this partial treatment, I've felt that it would be worthwhile to properly and maturely model all of these ganas. TECHNICAL DECISIONS Given the number and size of these ganas, it seems simplest to couple tightly with ashtadhyayi.com's data as a source of truth then derive the API we want programmatically. If there are errors in the upstream, fixing them will fix our data as well and improve the ecosystem overall. The main technical decision here was this: do we store the data in the code itself, or do we rely instead on an external data file? Both positions have merits. Using an external data file means that users can specify precisely which gana they want to use when passing some linguistic item into the system. Doing so also follows the pattern we use for the Dhatupatha, which is likewise stored as an external file. Despite these merits, I opted to store the Ganapatha data directly in the code itself. I am not convinced that this approach is right, but I chose it for the following reasons: - The Ganapatha and the Dhatupatha follow different usage patterns. Dhatus are almost always part of exactly one gana, whereas items in the Ganapatha are often in multiple ganas. - Dhatu ganas are common knowledge and familiar, and almost anyone who knows some dhatu will also know its gana. Therefore, there is less friction in defining the gana explicitly (e.g. through a data file) then passing it in. This is not the case for items in the Ganapatha. - My hunch is that there is generally more variance in Dhatupathas, which motivates giving the user extra control by modeling it as an external file. I don't think this is the case for the Ganapatha.
- Loading branch information
Showing
28 changed files
with
8,225 additions
and
2,185 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
uv.lock |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
3.13 |
Empty file.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,304 @@ | ||
"""Generates ganapatha.rs based on data from ashtadhyayi.com. | ||
Usage: | ||
uv run create_ganapatha.py > ../src/ganapatha.rs | ||
""" | ||
|
||
import json | ||
import urllib.request | ||
from vidyut.lipi import transliterate, Scheme | ||
|
||
|
||
REPLACE = { | ||
"pfTvAdiH": { | ||
"Uru": "uru", | ||
}, | ||
"gahAdiH": { | ||
"antasTa": "antaHsTa", | ||
}, | ||
} | ||
|
||
CODE_REPLACE = { | ||
"3.1.70": "2.1.70", | ||
} | ||
|
||
INSERT_AFTER = {"BOrikyAdiH": {"BOriki": ["vEpeya"]}} | ||
|
||
EXPAND_COMMENT = { | ||
"sarvAdiH": { | ||
"<<pUrvaparAvaradakziRottarAparADarARivyavasTAyAmasaMjYAyAm>>": [ | ||
"pUrva", | ||
"para", | ||
"avara", | ||
"dakziRa", | ||
"uttara", | ||
"apara", | ||
"aDara", | ||
], | ||
"<<svamajYAtiDanAKyAyAm>>": ["sva"], | ||
"<<antaraM bahiryogopasaMvyAnayoH>>": ["antara"], | ||
}, | ||
"kASyAdiH": { | ||
"<<ApadAdipUrvapadAtkAlAntAt>>": ["ApatkAla", "UrDvakAla", "tatkAla"], | ||
}, | ||
} | ||
|
||
DELETE = { | ||
"kASyAdiH": {"Apad", "UrDva", "tat"}, | ||
# TODO: check on this. marIci, if added here, seems to block mArIca. | ||
"bAhvAdiH": {"marIci"}, | ||
} | ||
|
||
|
||
def load_ganapatha() -> dict: | ||
url = "https://raw.githubusercontent.com/ashtadhyayi-com/data/refs/heads/master/ganapath/data.txt" | ||
f = urllib.request.urlopen(url) | ||
return json.load(f) | ||
|
||
|
||
def load_sutrapatha() -> dict: | ||
with open("../data/sutrapatha.tsv") as f: | ||
dp = f.read() | ||
|
||
map = {} | ||
for line in dp.splitlines(): | ||
code, text = line.strip().split("\t") | ||
map[code] = text | ||
return map | ||
|
||
|
||
def _to_const_name(slp_name: str) -> str: | ||
if slp_name == "bAhvAdiH": | ||
return "BAAHVADI" | ||
|
||
keys = "fFxXeEoOKGCJYwWqQRTDPBSz" | ||
values = [ | ||
"R", | ||
"R", | ||
"L", | ||
"L", | ||
"E", | ||
"AI", | ||
"O", | ||
"AU", | ||
"KH", | ||
"GH", | ||
"CH", | ||
"JH", | ||
"N", | ||
"T", | ||
"TH", | ||
"D", | ||
"DH", | ||
"N", | ||
"TH", | ||
"DH", | ||
"PH", | ||
"BH", | ||
"SH", | ||
"SH", | ||
] | ||
assert len(keys) == len(values) | ||
map = dict(zip(keys, values)) | ||
|
||
buf = [] | ||
for c in slp_name: | ||
buf.append(map.get(c, c)) | ||
if slp_name.endswith("iH"): | ||
buf.pop() | ||
return "".join(buf).upper() | ||
|
||
|
||
def main(): | ||
sutrapatha = load_sutrapatha() | ||
ganapatha = load_ganapatha() | ||
|
||
print("""// Autogenerated by scripts/create_ganapatha.py | ||
//! Implements rules from the *Gaṇapāṭha*. | ||
//! | ||
//! This module is largely auto-generated from the Ganapatha data on <https://ashtadhyayi.com>. | ||
//! We have made a few minor corrections after the fact. | ||
//! | ||
//! For the source data, see <https://ashtadhyayi.com/ganapath>. | ||
#![allow(unused)] | ||
/// Models a *gaṇa-sūtra* from the *Gaṇapāṭha*. | ||
#[derive(Copy, Clone, Debug, Hash, Eq, Ord, PartialEq, PartialOrd)] | ||
pub struct GanapathaEntry { | ||
name: &'static str, | ||
number: u16, | ||
code: &'static str, | ||
items: &'static [&'static str], | ||
kind: GanaKind, | ||
varttika: Option<&'static str> | ||
} | ||
/// Models the *gaṇa* type. | ||
#[derive(Copy, Clone, Debug, Hash, Eq, Ord, PartialEq, PartialOrd)] | ||
pub enum GanaKind { | ||
/// A basic *gaṇa* whose members are all listed. | ||
Basic, | ||
/// A *gaṇa* whose members form an incomplete set. The full set can be known only by | ||
/// observing actual usage. | ||
Akrti, | ||
} | ||
impl GanapathaEntry { | ||
pub(crate) const fn basic( | ||
name: &'static str, | ||
number: u16, | ||
code: &'static str, | ||
items: &'static [&'static str], | ||
) -> Self { | ||
Self { | ||
name, | ||
number, | ||
code, | ||
items, | ||
kind: GanaKind::Basic, | ||
varttika: None, | ||
} | ||
} | ||
pub(crate) const fn akrti( | ||
name: &'static str, | ||
number: u16, | ||
code: &'static str, | ||
items: &'static [&'static str], | ||
) -> Self { | ||
Self { | ||
name, | ||
number, | ||
code, | ||
items, | ||
kind: GanaKind::Akrti, | ||
varttika: None, | ||
} | ||
} | ||
pub(crate) const fn with_varttika(mut self: GanapathaEntry, varttika: &'static str) -> Self { | ||
self.varttika = Some(varttika); | ||
self | ||
} | ||
/// The name of this *gaṇa* in SLP1 transliteration. | ||
pub fn name(&self) -> &str { | ||
self.name | ||
} | ||
/// The number of this *gaṇa* relative to other *gaṇa*s in the Ganapatha. | ||
pub fn number(&self) -> u16 { | ||
self.number | ||
} | ||
/// The string ID of the Ashtadhyayi rule that first uses this *gaṇa*. | ||
pub fn code(&self) -> &str { | ||
self.code | ||
} | ||
/// All items in the *gaṇa*. | ||
pub fn items(&self) -> &[&str] { | ||
self.items | ||
} | ||
/// The type of this *gaṇa*. | ||
pub fn kind(&self) -> GanaKind { | ||
self.kind | ||
} | ||
/// A *vārttika* associated with this *gaṇa*, if one exists. | ||
pub fn varttika(&self) -> Option<&str> { | ||
self.varttika | ||
} | ||
} | ||
""") | ||
|
||
seen = set() | ||
ordered_names = [] | ||
for sutra in ganapatha["data"]: | ||
index = sutra["ind"] | ||
|
||
name = transliterate(sutra["name"], Scheme.Devanagari, Scheme.Slp1) | ||
if name == "ugavAdiH": | ||
name = "gavAdiH" | ||
elif name == "kattryAdiH": | ||
name = "katryAdiH" | ||
else: | ||
name = name.replace("Mk", "Nk") | ||
name = name.replace("Md", "nd") | ||
name = name.replace("MD", "nD") | ||
|
||
const_name = _to_const_name(name) | ||
if const_name in seen: | ||
# Disambiguate with the adhyAya number. | ||
const_name += "_" + code[0] | ||
seen.add(const_name) | ||
ordered_names.append(const_name) | ||
|
||
code = sutra["sutra"] | ||
code = CODE_REPLACE.get(code, code) | ||
|
||
words = transliterate(sutra["words"], Scheme.Devanagari, Scheme.Slp1) | ||
words = [x.strip() for x in words.split(".") if x.strip()] | ||
|
||
code_text = transliterate(sutrapatha[code], Scheme.Slp1, Scheme.Iso15919) | ||
varttika_text = transliterate(sutra["vartika"], Scheme.Devanagari, Scheme.Slp1) | ||
is_akrti = sutra["type"] == "A" | ||
|
||
name_iso = transliterate(name[:-1], Scheme.Slp1, Scheme.Iso15919) | ||
print(f"/// *{name_iso}-gaṇa* ({index}), first used in the following *sūtra*:") | ||
print("///") | ||
print(f"/// > {code} *{code_text}*") | ||
if varttika_text: | ||
print("/// The *sūtra* has the following *vārttika*:") | ||
print("///") | ||
print(f"/// > *{varttika_text}*") | ||
|
||
if is_akrti: | ||
print(f"pub(crate) const {const_name}: GanapathaEntry = GanapathaEntry::akrti(") | ||
else: | ||
print(f"pub(crate) const {const_name}: GanapathaEntry = GanapathaEntry::basic(") | ||
print(f'"{name}", {index}, "{code}", &[') | ||
for w in words: | ||
if w in DELETE.get(name, set()): | ||
continue | ||
|
||
w = REPLACE.get(name, {}).get(w, w) | ||
extras = INSERT_AFTER.get(name, {}).get(w, []) | ||
|
||
if w.startswith("<") or " " in w: | ||
print(f" // {w}") | ||
expanded = EXPAND_COMMENT.get(name, {}).get(w, []) | ||
for e in expanded: | ||
print(f' "{e}",') | ||
else: | ||
print(f' "{w}",') | ||
|
||
for e in extras: | ||
print(f' "{e}",') | ||
|
||
if varttika_text: | ||
print(f']).with_varttika("{varttika_text}");\n') | ||
else: | ||
print("]);\n") | ||
|
||
print(""" | ||
/// Returns an ordered iterator over all *gaṇa-sūtra*s. | ||
pub fn all_sutras() -> impl Iterator<Item = &'static GanapathaEntry> { | ||
const SUTRAS: &[GanapathaEntry] = &[""") | ||
|
||
for name in ordered_names: | ||
print(f" {name},") | ||
|
||
print("""]; | ||
SUTRAS.iter() | ||
}""") | ||
|
||
|
||
if __name__ == "__main__": | ||
main() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,14 @@ | ||
[project] | ||
name = "scripts" | ||
version = "0.1.0" | ||
description = "Add your description here" | ||
readme = "README.md" | ||
requires-python = ">=3.13" | ||
dependencies = [ | ||
"vidyut>=0.3.1", | ||
] | ||
|
||
[dependency-groups] | ||
dev = [ | ||
"ruff>=0.9.1", | ||
] |
Oops, something went wrong.