Skip to content

Commit

Permalink
[prakriya] Use the full Ganapatha with metadata
Browse files Browse the repository at this point in the history
This resolves GitHub issues #160, #161, and #162.

CONTEXT

Various rules in the Ashtadhyayi refer to one or more *ganas*,
collections of stems and other linguistic data that enumerate the
items that can trigger the rule.

Our previous approach used a small number of manually curated ganas
that we then used in the appropriate rules. But this gana data was
ultimately just copied from the data on ashtadhyayi.com, perhaps with
a few tweaks along the way.

Given this partial treatment, I've felt that it would be worthwhile
to properly and maturely model all of these ganas.

TECHNICAL DECISIONS

Given the number and size of these ganas, it seems simplest to couple
tightly with ashtadhyayi.com's data as a source of truth then derive the
API we want programmatically. If there are errors in the upstream,
fixing them will fix our data as well and improve the ecosystem overall.

The main technical decision here was this: do we store the data in the
code itself, or do we rely instead on an external data file?

Both positions have merits. Using an external data file means that users
can specify precisely which gana they want to use when passing some
linguistic item into the system. Doing so also follows the pattern we
use for the Dhatupatha, which is likewise stored as an external file.

Despite these merits, I opted to store the Ganapatha data directly in
the code itself. I am not convinced that this approach is right, but I
chose it for the following reasons:

- The Ganapatha and the Dhatupatha follow different usage patterns.
  Dhatus are almost always part of exactly one gana, whereas items in
  the Ganapatha are often in multiple ganas.

- Dhatu ganas are common knowledge and familiar, and almost anyone who
  knows some dhatu will also know its gana. Therefore, there is less
  friction in defining the gana explicitly (e.g. through a data file)
  then passing it in. This is not the case for items in the Ganapatha.

- My hunch is that there is generally more variance in Dhatupathas,
  which motivates giving the user extra control by modeling it as an
  external file. I don't think this is the case for the Ganapatha.
  • Loading branch information
akprasad committed Jan 16, 2025
1 parent a90eb98 commit aa423e7
Show file tree
Hide file tree
Showing 28 changed files with 8,225 additions and 2,185 deletions.
1 change: 1 addition & 0 deletions vidyut-prakriya/scripts/.gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
uv.lock
1 change: 1 addition & 0 deletions vidyut-prakriya/scripts/.python-version
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
3.13
Empty file.
2 changes: 1 addition & 1 deletion vidyut-prakriya/scripts/check_kaumudi_coverage.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,6 @@
continue

if i in tested_rules:
pass # print(f"{RULE_OK} {i}")
pass # print(f"{RULE_OK} {i}")
else:
print(f"{RULE_MISSING} {i}")
8 changes: 5 additions & 3 deletions vidyut-prakriya/scripts/check_rule_coverage.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
RULE_UNTESTED = "⚠️ "
RULE_MISSING = "❌"


def print_legend():
print("===== Legend ======")
print(f"{RULE_OK}\t\tSutra is tested.")
Expand All @@ -17,14 +18,15 @@ def print_legend():
print("These statuses are heuristics. Verify them by checking the underlying code.")
print("===================")


base = Path(__file__).parent.parent
src = base / "src"
tests = base / "tests"

all_rules = []
with open(base / "data/sutrapatha.tsv") as f:
for line in f:
code, text = line.split('\t')
code, text = line.split("\t")
all_rules.append(code)


Expand All @@ -40,7 +42,7 @@ def print_legend():
with open(tests / path) as f:
for line in f:
for match in re.findall(r"(\d+_\d+_\d+)", line):
tested_rules.add(match.replace('_', '.'))
tested_rules.add(match.replace("_", "."))

had_ok = False
for rule in all_rules:
Expand All @@ -64,7 +66,7 @@ def print_legend():
pada_tested = Counter()
pada_missing = Counter()
for rule in all_rules:
ap, _, sutra = rule.rpartition('.')
ap, _, sutra = rule.rpartition(".")
pada_total[ap] += 1
if rule in tested_rules:
pada_tested[ap] += 1
Expand Down
6 changes: 3 additions & 3 deletions vidyut-prakriya/scripts/check_unadi_coverage.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,14 +13,14 @@
all_rules = []
with open(base / "data/unadipatha.tsv") as f:
for line in f:
code, text = line.strip().split('\t')
code, text = line.strip().split("\t")
all_rules.append(code)

tested_rules = set()
with open(base / "tests/kaumudi_67.rs") as f:
for line in f:
for match in re.findall(r'unadi_(\d+_\d+)', line):
tested_rules.add(match.replace('_', '.'))
for match in re.findall(r"unadi_(\d+_\d+)", line):
tested_rules.add(match.replace("_", "."))


num_ok = 0
Expand Down
304 changes: 304 additions & 0 deletions vidyut-prakriya/scripts/create_ganapatha.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,304 @@
"""Generates ganapatha.rs based on data from ashtadhyayi.com.
Usage:
uv run create_ganapatha.py > ../src/ganapatha.rs
"""

import json
import urllib.request
from vidyut.lipi import transliterate, Scheme


REPLACE = {
"pfTvAdiH": {
"Uru": "uru",
},
"gahAdiH": {
"antasTa": "antaHsTa",
},
}

CODE_REPLACE = {
"3.1.70": "2.1.70",
}

INSERT_AFTER = {"BOrikyAdiH": {"BOriki": ["vEpeya"]}}

EXPAND_COMMENT = {
"sarvAdiH": {
"<<pUrvaparAvaradakziRottarAparADarARivyavasTAyAmasaMjYAyAm>>": [
"pUrva",
"para",
"avara",
"dakziRa",
"uttara",
"apara",
"aDara",
],
"<<svamajYAtiDanAKyAyAm>>": ["sva"],
"<<antaraM bahiryogopasaMvyAnayoH>>": ["antara"],
},
"kASyAdiH": {
"<<ApadAdipUrvapadAtkAlAntAt>>": ["ApatkAla", "UrDvakAla", "tatkAla"],
},
}

DELETE = {
"kASyAdiH": {"Apad", "UrDva", "tat"},
# TODO: check on this. marIci, if added here, seems to block mArIca.
"bAhvAdiH": {"marIci"},
}


def load_ganapatha() -> dict:
url = "https://raw.githubusercontent.com/ashtadhyayi-com/data/refs/heads/master/ganapath/data.txt"
f = urllib.request.urlopen(url)
return json.load(f)


def load_sutrapatha() -> dict:
with open("../data/sutrapatha.tsv") as f:
dp = f.read()

map = {}
for line in dp.splitlines():
code, text = line.strip().split("\t")
map[code] = text
return map


def _to_const_name(slp_name: str) -> str:
if slp_name == "bAhvAdiH":
return "BAAHVADI"

keys = "fFxXeEoOKGCJYwWqQRTDPBSz"
values = [
"R",
"R",
"L",
"L",
"E",
"AI",
"O",
"AU",
"KH",
"GH",
"CH",
"JH",
"N",
"T",
"TH",
"D",
"DH",
"N",
"TH",
"DH",
"PH",
"BH",
"SH",
"SH",
]
assert len(keys) == len(values)
map = dict(zip(keys, values))

buf = []
for c in slp_name:
buf.append(map.get(c, c))
if slp_name.endswith("iH"):
buf.pop()
return "".join(buf).upper()


def main():
sutrapatha = load_sutrapatha()
ganapatha = load_ganapatha()

print("""// Autogenerated by scripts/create_ganapatha.py
//! Implements rules from the *Gaṇapāṭha*.
//!
//! This module is largely auto-generated from the Ganapatha data on <https://ashtadhyayi.com>.
//! We have made a few minor corrections after the fact.
//!
//! For the source data, see <https://ashtadhyayi.com/ganapath>.
#![allow(unused)]
/// Models a *gaṇa-sūtra* from the *Gaṇapāṭha*.
#[derive(Copy, Clone, Debug, Hash, Eq, Ord, PartialEq, PartialOrd)]
pub struct GanapathaEntry {
name: &'static str,
number: u16,
code: &'static str,
items: &'static [&'static str],
kind: GanaKind,
varttika: Option<&'static str>
}
/// Models the *gaṇa* type.
#[derive(Copy, Clone, Debug, Hash, Eq, Ord, PartialEq, PartialOrd)]
pub enum GanaKind {
/// A basic *gaṇa* whose members are all listed.
Basic,
/// A *gaṇa* whose members form an incomplete set. The full set can be known only by
/// observing actual usage.
Akrti,
}
impl GanapathaEntry {
pub(crate) const fn basic(
name: &'static str,
number: u16,
code: &'static str,
items: &'static [&'static str],
) -> Self {
Self {
name,
number,
code,
items,
kind: GanaKind::Basic,
varttika: None,
}
}
pub(crate) const fn akrti(
name: &'static str,
number: u16,
code: &'static str,
items: &'static [&'static str],
) -> Self {
Self {
name,
number,
code,
items,
kind: GanaKind::Akrti,
varttika: None,
}
}
pub(crate) const fn with_varttika(mut self: GanapathaEntry, varttika: &'static str) -> Self {
self.varttika = Some(varttika);
self
}
/// The name of this *gaṇa* in SLP1 transliteration.
pub fn name(&self) -> &str {
self.name
}
/// The number of this *gaṇa* relative to other *gaṇa*s in the Ganapatha.
pub fn number(&self) -> u16 {
self.number
}
/// The string ID of the Ashtadhyayi rule that first uses this *gaṇa*.
pub fn code(&self) -> &str {
self.code
}
/// All items in the *gaṇa*.
pub fn items(&self) -> &[&str] {
self.items
}
/// The type of this *gaṇa*.
pub fn kind(&self) -> GanaKind {
self.kind
}
/// A *vārttika* associated with this *gaṇa*, if one exists.
pub fn varttika(&self) -> Option<&str> {
self.varttika
}
}
""")

seen = set()
ordered_names = []
for sutra in ganapatha["data"]:
index = sutra["ind"]

name = transliterate(sutra["name"], Scheme.Devanagari, Scheme.Slp1)
if name == "ugavAdiH":
name = "gavAdiH"
elif name == "kattryAdiH":
name = "katryAdiH"
else:
name = name.replace("Mk", "Nk")
name = name.replace("Md", "nd")
name = name.replace("MD", "nD")

const_name = _to_const_name(name)
if const_name in seen:
# Disambiguate with the adhyAya number.
const_name += "_" + code[0]
seen.add(const_name)
ordered_names.append(const_name)

code = sutra["sutra"]
code = CODE_REPLACE.get(code, code)

words = transliterate(sutra["words"], Scheme.Devanagari, Scheme.Slp1)
words = [x.strip() for x in words.split(".") if x.strip()]

code_text = transliterate(sutrapatha[code], Scheme.Slp1, Scheme.Iso15919)
varttika_text = transliterate(sutra["vartika"], Scheme.Devanagari, Scheme.Slp1)
is_akrti = sutra["type"] == "A"

name_iso = transliterate(name[:-1], Scheme.Slp1, Scheme.Iso15919)
print(f"/// *{name_iso}-gaṇa* ({index}), first used in the following *sūtra*:")
print("///")
print(f"/// > {code} *{code_text}*")
if varttika_text:
print("/// The *sūtra* has the following *vārttika*:")
print("///")
print(f"/// > *{varttika_text}*")

if is_akrti:
print(f"pub(crate) const {const_name}: GanapathaEntry = GanapathaEntry::akrti(")
else:
print(f"pub(crate) const {const_name}: GanapathaEntry = GanapathaEntry::basic(")
print(f'"{name}", {index}, "{code}", &[')
for w in words:
if w in DELETE.get(name, set()):
continue

w = REPLACE.get(name, {}).get(w, w)
extras = INSERT_AFTER.get(name, {}).get(w, [])

if w.startswith("<") or " " in w:
print(f" // {w}")
expanded = EXPAND_COMMENT.get(name, {}).get(w, [])
for e in expanded:
print(f' "{e}",')
else:
print(f' "{w}",')

for e in extras:
print(f' "{e}",')

if varttika_text:
print(f']).with_varttika("{varttika_text}");\n')
else:
print("]);\n")

print("""
/// Returns an ordered iterator over all *gaṇa-sūtra*s.
pub fn all_sutras() -> impl Iterator<Item = &'static GanapathaEntry> {
const SUTRAS: &[GanapathaEntry] = &[""")

for name in ordered_names:
print(f" {name},")

print("""];
SUTRAS.iter()
}""")


if __name__ == "__main__":
main()
14 changes: 14 additions & 0 deletions vidyut-prakriya/scripts/pyproject.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
[project]
name = "scripts"
version = "0.1.0"
description = "Add your description here"
readme = "README.md"
requires-python = ">=3.13"
dependencies = [
"vidyut>=0.3.1",
]

[dependency-groups]
dev = [
"ruff>=0.9.1",
]
Loading

0 comments on commit aa423e7

Please sign in to comment.