Skip to content

Commit

Permalink
[prakriya] Add sutras for all source types
Browse files Browse the repository at this point in the history
Not all files are complete, but this provides adequate coverage for now.
  • Loading branch information
akprasad committed Jan 22, 2025
1 parent 804b62c commit ac3f15d
Show file tree
Hide file tree
Showing 30 changed files with 463 additions and 68 deletions.
1 change: 1 addition & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion bindings-python/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@ create_data:
#
# VIDYUT_DATA_DIR="/path/to/your/dir" make integration_tests
integration_tests: develop
VIDYUT_DATA_DIR="data-0.3.0" uv run pytest test/integration/
VIDYUT_DATA_DIR="../vidyut-data/data/build/vidyut-latest" uv run pytest test/integration/

# Lints all Rust and Python code.
lint:
Expand Down
20 changes: 14 additions & 6 deletions bindings-python/src/prakriya.rs
Original file line number Diff line number Diff line change
Expand Up @@ -267,16 +267,24 @@ impl PyData {
pub fn load_sutras(&self) -> PyResult<Vec<PySutra>> {
let mut sutras = Vec::new();

for (filename, source) in &[
("dhatupatha-ganasutras.tsv", PySource::Dhatupatha),
("sutrapatha.tsv", PySource::Ashtadhyayi),
("varttikas.tsv", PySource::Varttika),
("unadipatha.tsv", PySource::Unadipatha),
for (source, filename) in &[
(PySource::Ashtadhyayi, "sutrapatha.tsv"),
(PySource::Dhatupatha, "dhatupatha-ganasutras.tsv"),
(PySource::Kashika, "kashika.tsv"),
(PySource::Kaumudi, "kaumudi.tsv"),
(PySource::Linganushasana, "linganushasanam.tsv"),
(PySource::Phit, "phit-sutras.tsv"),
(PySource::Unadipatha, "unadipatha.tsv"),
(PySource::Varttika, "varttikas.tsv"),
] {
let path = self.0.join(filename);
let text = std::fs::read_to_string(path)?;

for line in text.lines() {
for (i, line) in text.lines().enumerate() {
if i == 0 {
// Skip headers.
continue;
}
let fields: Vec<_> = line.split('\t').collect();
if let &[code, text] = &fields[..] {
sutras.push(PySutra::new(*source, code.to_string(), text.to_string()))
Expand Down
8 changes: 4 additions & 4 deletions bindings-python/test/integration/test_kosha.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,14 +53,14 @@ def test_basic_subanta(kosha):

def test_basic_avyaya(kosha):
entries = kosha.get("ca")
entries = [e for e in entries if isinstance(e, PadaEntry.Avyaya)]
entries = [e for e in entries if isinstance(e, PadaEntry.Subanta)]

ca = entries[0]
assert ca.lemma == "ca"

assert repr(ca) == (
"PadaEntry.Avyaya(pratipadika_entry="
"PratipadikaEntry.Basic(pratipadika=Pratipadika(text='ca'), lingas=[Linga.Pum]))"
"PadaEntry.Subanta(pratipadika_entry="
"PratipadikaEntry.Basic(pratipadika=Pratipadika(text='ca', is_avyaya=True), lingas=[Linga.Pum]))"
)


Expand Down Expand Up @@ -167,4 +167,4 @@ def test_contains_subanta(kosha, word):
)
def test_contains_avyaya(kosha, word):
entries = kosha.get(word)
assert any(isinstance(e, PadaEntry.Avyaya) for e in entries)
assert any(isinstance(e, PadaEntry.Subanta) for e in entries)
4 changes: 3 additions & 1 deletion bindings-python/vidyut/docs/source/index.rst
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,8 @@ Vidyut
======

Welcome! This documentation describes the Python bindings for `Vidyut`_, a Rust toolkit that
provides reliable infrastructure for Sanskrit software.
provides reliable infrastructure for Sanskrit software. (For a quick demo of what Vidyut
can do, see our WebAssembly bindings `here`_.)

Vidyut aims to provide performant and high-quality solutions for the common problems that
Sanskrit programmers face. These problems include:
Expand All @@ -24,6 +25,7 @@ Vidyut is under active development as part of the `Ambuda`_ project. If you want
about Vidyut or get involved, we encourage you to `join our community`_ of Sanskrit enthusiasts.
All are welcome regardless of background.

.. _here: https://ambuda-org.github.io/
.. _Vidyut: https://github.com/ambuda-org/vidyut
.. _Ambuda: https://ambuda.org
.. _join our community: https://github.com/ambuda-org/vidyut?tab=readme-ov-file#community
Expand Down
2 changes: 1 addition & 1 deletion vidyut-cheda/src/chedaka.rs
Original file line number Diff line number Diff line change
Expand Up @@ -126,7 +126,7 @@ impl Config {
}

pub fn sandhi_rules(&self) -> PathBuf {
self.vidyut_base_path.join("sandhi-rules.csv")
self.vidyut_base_path.join("sandhi/rules.csv")
}

pub fn kosha_path(&self) -> PathBuf {
Expand Down
1 change: 0 additions & 1 deletion vidyut-cheda/src/strict_mode.rs
Original file line number Diff line number Diff line change
Expand Up @@ -68,7 +68,6 @@ fn if_not_in_compound_then_linga_match(cur: &Phrase, pool: &TokenPool, s: &Suban
} else if s.is_avyaya() {
true
} else {
dbg!(s.pratipadika_entry());
match s.pratipadika_entry() {
PratipadikaEntry::Basic(b) => b.lingas().contains(&s.linga()),
// Otherwise, any linga is allowed.
Expand Down
1 change: 1 addition & 0 deletions vidyut-data/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ vidyut-cheda = { path = "../vidyut-cheda" }
vidyut-kosha = { path = "../vidyut-kosha" }
vidyut-lipi = { path = "../vidyut-lipi" }
vidyut-prakriya = { path = "../vidyut-prakriya" }
vidyut-sandhi = { path = "../vidyut-sandhi" }
clap.workspace = true
csv = "1.3.1"
fst = "0.4.7"
Expand Down
5 changes: 3 additions & 2 deletions vidyut-data/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -22,12 +22,13 @@ create_all_data:
@./scripts/create_all_data.sh

create_sandhi_rules:
mkdir -p data/build/vidyut-latest/sandhi/
RUST_LOG=info cargo run --release --bin create_sandhi_rules -- \
--data-dir data/build/vidyut-latest
--output-path data/build/vidyut-latest/sandhi/rules.csv

# Creates a kosha and write it to disk.
create_kosha:
# cd scripts && uv run fetch_dhatu_metadata.py > ../data/raw/lex/dhatu-metadata.csv
cd scripts && uv run fetch_dhatu_metadata.py > ../data/raw/lex/dhatu-metadata.csv
RUST_BACKTRACE=1 RUST_LOG=info cargo run --release --bin create_kosha -- \
--input-dir data/raw/lex \
--dhatupatha ../vidyut-prakriya/data/dhatupatha.tsv \
Expand Down
51 changes: 51 additions & 0 deletions vidyut-data/scripts/fetch_sutra_data.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
import csv
import json
import urllib.request
from vidyut.lipi import transliterate, Scheme

PHIT = "https://raw.githubusercontent.com/ashtadhyayi-com/data/refs/heads/master/fit/data.txt"
LINGA = "https://raw.githubusercontent.com/ashtadhyayi-com/data/refs/heads/master/linganushasanam/data.txt"


def process_phits():
f = urllib.request.urlopen(PHIT)
data = json.load(f)

rows = []
for row in data["data"]:
pada = row["p"]
number = row["n"]

code = f"{pada}.{number}"
text = transliterate(row["s"], Scheme.Devanagari, Scheme.Slp1)
text = text.strip()
rows.append((code, text))

out_file = "../../vidyut-prakriya/data/phit-sutras.tsv"
with open(out_file, 'w') as f:
w = csv.writer(f, delimiter="\t")
w.writerow(["code", "text"])
for row in rows:
w.writerow(row)


def process_lingas():
f = urllib.request.urlopen(LINGA)
data = json.load(f)

rows = []
for row in data["data"]:
code = str(row["id"])
text = transliterate(row["sutra"], Scheme.Devanagari, Scheme.Slp1)
text = text.strip()
rows.append((code, text))

out_file = "../../vidyut-prakriya/data/linganushasanam.tsv"
with open(out_file, 'w') as f:
w = csv.writer(f, delimiter="\t")
w.writerow(["code", "text"])
for row in rows:
w.writerow(row)

process_phits()
process_lingas()
16 changes: 5 additions & 11 deletions vidyut-data/src/bin/create_sandhi_rules.rs
Original file line number Diff line number Diff line change
@@ -1,21 +1,19 @@
//! Generates most of the common sandhi rules that occur between two *pada*s.
/*
use clap::Parser;
use std::error::Error;
use std::path::{Path, PathBuf};
use vidyut_cheda::Config;
use vidyut_cheda::Result;
use vidyut_sandhi::{generate_rules, Rule};

#[derive(Parser, Debug)]
#[command(author, version, about)]
struct Args {
/// Path to the output data directory.
/// Where to write our rules.
#[arg(short, long)]
data_dir: PathBuf,
output_path: PathBuf,
}

fn write_rules(rules: &[Rule], path: &Path) -> Result<()> {
fn write_rules(rules: &[Rule], path: &Path) -> Result<(), Box<dyn Error>> {
let mut w = csv::Writer::from_path(path)?;
w.write_record(["first", "second", "result"])?;
for r in rules {
Expand All @@ -28,13 +26,9 @@ fn write_rules(rules: &[Rule], path: &Path) -> Result<()> {
fn main() {
let args = Args::parse();
let rules = generate_rules();
let config = Config::new(args.data_dir);

if let Err(err) = write_rules(&rules, config.sandhi()) {
if let Err(err) = write_rules(&rules, &args.output_path) {
println!("{}", err);
std::process::exit(1);
}
}
*/

fn main() {}
16 changes: 11 additions & 5 deletions vidyut-kosha/src/entries.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
//! Models the entries stored in the kosha.
//!
//! All entries make heavy use of lifetime annotations to refer to data defined on `Kosha`.
//! To persist this data for your application, clone the specific fields you need.
use crate::errors::{Error, Result};
use serde::{Deserialize, Serialize};
use vidyut_prakriya::args as vp;
Expand All @@ -17,6 +18,9 @@ pub struct DhatuEntry<'a> {
}

/// Metadata for some dhatu.
///
/// We store metadata in its own `struct` so that we avoid bloating `DhatuEntry` and the objects
/// that use `DhatuEntry`, such as `PratipadikaEntry` and `PadaEntry`.
#[derive(Clone, Debug, Default, Eq, Hash, Ord, PartialEq, PartialOrd, Serialize, Deserialize)]
pub struct DhatuMeta {
pub(crate) clean_text: String,
Expand Down Expand Up @@ -118,7 +122,8 @@ impl<'a> DhatuEntry<'a> {
self.meta.map_or("", |x| &x.clean_text)
}

/// Returns the Sanskrit meaning of this dhatu's *mūla* as an SLP1 string.
/// Returns the Sanskrit meaning of this dhatu's *mūla* as an SLP1 string. All of these
/// meaning strings come directly from the Dhatupatha.
///
/// We have meaning strings only for the ~2000 *mūla* dhatus from the Dhatupatha. Any roots
/// derived from these ~2000 will share their `artha` with the dhatu they come from.
Expand Down Expand Up @@ -150,7 +155,8 @@ impl<'a> DhatuEntry<'a> {

/// Sets the metadata on this dhatu.
///
/// This method is for libraries building a `Kosha` from scratch.
/// This method is for libraries building a `Kosha` from scratch. Otherwise, prefer using
/// the accessor methods defined on `DhatuEntry`.
pub fn with_meta(mut self, meta: &'a DhatuMeta) -> Self {
self.meta = Some(meta);
self
Expand All @@ -170,7 +176,7 @@ impl<'a> From<DhatuEntry<'a>> for Dhatu {
}

impl DhatuMeta {
/// Returns a builder over this `DhatuEntry`.
/// Returns a builder for some `DhatuMeta` struct.
///
/// This builder is utility code for inserting new `DhatuEntry` objects into a `Kosha`. If you
/// are not building a `Kosha` yourself, you can ignore this method.
Expand Down Expand Up @@ -216,13 +222,13 @@ impl DhatuMetaBuilder {
self
}

/// (Optional) Sets the dhatu pada for this entry.
/// (Optional) Sets the dhatu pada.
pub fn pada(mut self, pada: String) -> Self {
self.pada = Some(pada);
self
}

/// Builds a `DhatuEntry`.
/// Builds a `DhatuMeta`.
pub fn build(self) -> Result<DhatuMeta> {
Ok(DhatuMeta {
clean_text: match self.clean_text {
Expand Down
14 changes: 7 additions & 7 deletions vidyut-prakriya/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -55,19 +55,19 @@ test_krdantas:
cargo run --release --example snapshot_tests validate \
--test-type krdantas \
-f test-files/krdantas-basic.msgpack \
-h "abe607447082b6ca6758018a7457e27f0bf9f5b271ac918d79ad8248e6f5f742" \
-h "a9f8335433aaabf6497a3bd3c8bbfd5bc1f42cc4f3398eb93ce606e825cd9449" \
-f test-files/krdantas-nic.msgpack \
-h "4eedde749a5849b36136e304af9273e03993667d486f02d3bf8323419fe59495" \
-h "c0eeacf5c5d987148aab0d08c5c6c63b5220d69014f2f2cff84d488ffbd4906b" \
-f test-files/krdantas-san.msgpack \
-h "7e08b483201ae98d3c16c130e2c696a5a7ad8c16f7cafce1c2f3910d1d17ef38" \
-h "0a3650fb93ac3d5893d88a1bbd1e517e65ff1c8ac4f87187248e15a9bb689ea7" \
-f test-files/krdantas-yan.msgpack \
-h "ec01fd40625cb40ad75195c713aad8c52ab4d8bb8bfc87e6818637211699efe2" \
-h "e8b244e52be039dd8efaaa5fc274150b4550cc74a09e33b45d0bfd22b70c9b2a" \
-f test-files/krdantas-yan-luk.msgpack \
-h "9b6f271c9ae57bf9ae05472fb4652489b0d8d6fee4d8c499fc63fc7edd1064ae" \
-h "62a0366f77543a2a17b5aa0806b42c1855ee1e85fe33e011c3284e87eecbe4a7" \
-f test-files/krdantas-san-nic.msgpack \
-h "ec38109021286d0ffb6ed5343f472ab8a6ffe8179aa6f76cb11449b3fb650c4c" \
-h "e28a61e8c3e078c4d2ddae93e78ab5fb8ed44d700f2ae55ca72d9ae9544b6f58" \
-f test-files/krdantas-nic-san.msgpack \
-h "de4a2869943599d21ce8a760bf2861b116aeee4bf6c78e3ac416b7bbbdcf1011"
-h "292154fceae7d2821420076949d9df89564f6f7fc9e38a00298363f9116cef63"

test_dhatus:
cargo run --release --example snapshot_tests validate \
Expand Down
6 changes: 5 additions & 1 deletion vidyut-prakriya/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -109,7 +109,11 @@ for p in prakriyas {
println!("{}", p.text());
println!("---------------------------");
for step in p.history() {
let terms: Vec<_> = step.result().iter().map(|x| x.text()).filter(|x| !x.is_empty()).collect();
let terms: Vec<_> = step.result()
.iter()
.map(|x| x.text())
.filter(|x| !x.is_empty())
.collect();
let result = terms.join(" + ");
println!("{:<10} | {}", step.rule().code(), result);
}
Expand Down
35 changes: 25 additions & 10 deletions vidyut-prakriya/data/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,18 +2,21 @@ Data
====

We have designed `vidyut-prakriya` so that it can run without any side data.
But in practice, it is useful to have a dhatupatha available to generate
various words and stems. For this reason, the `vidyut-prakriya` crate includes
`dhatupatha.tsv`, a comprehensive Dhatupatha that we also use in our exhaustive
test suite.
But in practice, it is useful to have side data available to generate words
and show the text of specific rules.

Most of the data files here were sourced from from [ashtadhyayi.com][a-com],
and the author of ashtadhyayi.com has graciously agreed to share thse files
with us under an MIT license.

Creating the data file
----------------------
[a-com]: https://ashtadhyayi.com


`dhatupatha.tsv`
----------------

`dhatupatha.tsv` was sourced from [ashtadhyayi.com][a-com], and the author
of ashtadhyayi.com has graciously agreed to share their dhatupatha with us
under an MIT license.
`dhatupatha.tsv` is a comprehensive Dhatupatha that we also use in our
exhaustive test suite.

This dhatupatha is a superset of traditional dhatupathas from five different
sources:
Expand Down Expand Up @@ -46,6 +49,18 @@ create `dhatupatha.tsv`, we downloaded this file and made the following changes:
[sanskrit-verb]: https://github.com/drdhaval2785/SanskritVerb


[a-com]: https://ashtadhyayi.com
Sutra files
-----------

Our sutras are split into the following 5 files:

- `dhatupatha-ganasutras.tsv` contains gana-sutras from the Dhatupatha.
- `linganushasanam.tsv` contains sutras from the Linganushasanam.
- `phit-sutras.tsv` contains sutras from the Phit Sutras.
- `sutrapatha.tsv` contains the Ashtadhyayi.
- `unadipatha.tsv` contains the Unadipatha.

All sutra files are 2-column TSV files with the following columns:

- `code`, a short identifier for this sutra.
- `text`, the sutra text encoded in SLP1.
1 change: 1 addition & 0 deletions vidyut-prakriya/data/dhatupatha-ganasutras.tsv
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
code text
01.0933 GawAdayo mitaH
01.0934 janIjFzkanasuraYjo'mantASca
01.0935 jvalahvalahmalanamAmanupasargAdvA
Expand Down
10 changes: 10 additions & 0 deletions vidyut-prakriya/data/kashika.tsv
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
code text
3.2.93 karmaRIti vartamAne punaH karmagrahaRaM kartuH kutsAnimitte karmaRi yaTA syAt, karmamAtre mA BUt
3.2.138 cakAro'nuktasamuccayArTaH
4.4.13 krayavikrayagrahaRaM saMGAtavigfhItArTam
4.4.101 Rapratyayo'pyatrezyate
6.1.3 kecidAhurvyaYjanasyeti
6.1.64 zWivu ityasya dvitIyasTakArazWakAraScezyate
7.2.16 cakAro'nuktasamuccayArTaH
7.3.78 sartervegitAyAM gatO DAvAdeSamicCanti
7.4.71 fkArEkadeSo rePo halgrahaRena gfhyate
Loading

0 comments on commit ac3f15d

Please sign in to comment.