Skip to content

Commit

Permalink
[prakriya] Add nyap constructor and bindings
Browse files Browse the repository at this point in the history
This lets us more easily derive words like *nadI*.

In addition, tweak various docs for clarity.
  • Loading branch information
akprasad committed Jan 21, 2025
1 parent 1426587 commit 188ffec
Show file tree
Hide file tree
Showing 11 changed files with 134 additions and 110 deletions.
2 changes: 1 addition & 1 deletion Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -28,5 +28,5 @@ docs:
cargo doc --all --no-deps --open --document-private-items

# Runs documentation tests for all crates in the repository.
test_doc:
test_docs:
cargo test --all --doc
28 changes: 16 additions & 12 deletions README.md
Original file line number Diff line number Diff line change
@@ -1,18 +1,17 @@
<div align="center">
<h1>विद्युत्</h1>
<p><i>Reliable infrastructure for Sanskrit software</i></p>
</div>

Vidyut provides reliable infrastructure for Sanskrit software.

Specifically, Vidyut aims to provide performant and high-quality solutions for the
common problems that Sanskrit programmers face. Some of these problems include:

- *Transliteration*, or conversion of Sanskrit text from one script to another. (भू → bhū)
Vidyut aims to provide performant and high-quality solutions for the common problems
that Sanskrit programmers face. Some of these problems include:

- *Word generation*, or converting bases and suffixes into complete words. (भू → भवति)

- *Word lookup*, or mapping a complete word back to its bases and suffixes. (भवति → भू)

- *Transliteration*, or conversion of Sanskrit text from one script to another. (भू → bhū)

- *Metrical analysis*, or understanding the meter used by a piece of Sanskrit text.

- *Sandhi changes*, or applying and undoing the sound changes that occur between pieces of
Expand Down Expand Up @@ -100,20 +99,25 @@ Python project.
Once your setup is ready, you can install the `vidyut` package:

```shell
# With pip
$ pip install vidyut

# With uv
$ uv add vidyut

# With pip
$ pip install vidyut
````

You can also install directly from this repository. Doing so compiles the repository
from scratch and might take several minutes, so we strongly suggest using our latest
[PyPI release][pypi] instead.

```
# The command is very slow, so pass `--verbose` to monitor its status.
pip install -e "git+https://github.com/ambuda-org/vidyut.git#egg=vidyut&subdirectory=bindings-python" --verbose
```shell
# Building from scratch is slow, so we pass `--verbose` to monitor its status.
# With uv
$ uv add "git+https://github.com/ambuda-org/vidyut.git#subdirectory=bindings-python" --verbose
# With pip
$ pip install -e "git+https://github.com/ambuda-org/vidyut.git#egg=vidyut&subdirectory=bindings-python" --verbose
```

We recommend using our pre-built linguistic data, which is available as a ZIP file
Expand Down
38 changes: 32 additions & 6 deletions bindings-python/src/prakriya/args.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1193,15 +1193,15 @@ impl PyPratipadika {
impl PyPratipadika {
pub fn __repr__(&self) -> String {
match &self.pratipadika {
Pratipadika::Basic(_) => format!(
"Pratipadika(text='{}', is_avyaya={})",
self.text,
Pratipadika::Basic(_) => {
if self.pratipadika.is_avyaya() {
"True"
format!("Pratipadika(text='{}', is_avyaya=True)", self.text)
} else if self.pratipadika.is_nyap() {
format!("Pratipadika(text='{}', is_nyap=True)", self.text)
} else {
"False"
format!("Pratipadika(text='{}')", self.text)
}
),
}
_ => "Pratipadika(...)".to_string(),
}
}
Expand Down Expand Up @@ -1230,6 +1230,26 @@ impl PyPratipadika {
})
}

/// Create a new pratipadika that is treated as ending in a nyAp-pratyaya.
///
/// `text` should be an SLP1 string.
#[staticmethod]
#[pyo3(signature = (text))]
pub fn nyap(text: String) -> PyResult<Self> {
let safe = match Slp1String::from(text.clone()) {
Ok(s) => s,
Err(_) => {
return Err(PyValueError::new_err(format!(
"{text} must be an SLP1 string."
)))
}
};
Ok(Self {
pratipadika: Pratipadika::nyap(safe),
text,
})
}

/// Create a new pratipadika that is a krdanta.
#[staticmethod]
#[pyo3(signature = (dhatu, krt))]
Expand Down Expand Up @@ -1266,6 +1286,12 @@ impl PyPratipadika {
pub fn is_avyaya(&self) -> bool {
self.pratipadika.is_avyaya()
}

/// Whether or not this pratipadika should be treated as a *nyAp-anta*.
#[getter]
pub fn is_nyap(&self) -> bool {
self.pratipadika.is_nyap()
}
}

impl From<Pratipadika> for PyPratipadika {
Expand Down
4 changes: 2 additions & 2 deletions bindings-python/test/unit/kosha/test_entries.py
Original file line number Diff line number Diff line change
Expand Up @@ -95,7 +95,7 @@ def test_pratipadika_entry__dunders():

# __repr__
assert repr(rama_entry) == (
"PratipadikaEntry.Basic(pratipadika=Pratipadika(text='rAma', is_avyaya=False), "
"PratipadikaEntry.Basic(pratipadika=Pratipadika(text='rAma'), "
"lingas=[Linga.Pum])"
)

Expand Down Expand Up @@ -198,7 +198,7 @@ def test_pada_entry__dunders():
assert repr(rama_pada) == (
"PadaEntry.Subanta("
"pratipadika_entry=PratipadikaEntry.Basic("
"pratipadika=Pratipadika(text='rAma', is_avyaya=False), lingas=[Linga.Pum]), "
"pratipadika=Pratipadika(text='rAma'), lingas=[Linga.Pum]), "
"linga=Linga.Pum, vibhakti=Vibhakti.Prathama, vacana=Vacana.Eka)"
)

Expand Down
4 changes: 2 additions & 2 deletions bindings-python/test/unit/prakriya/test_args.py
Original file line number Diff line number Diff line change
Expand Up @@ -129,7 +129,7 @@ def test_dhatu__dunders():

def test_pratipadika_new():
p = Pratipadika.basic("deva")
assert repr(p) == "Pratipadika(text='deva', is_avyaya=False)"
assert repr(p) == "Pratipadika(text='deva')"


def test_pratipadika_new__fails_if_no_args():
Expand All @@ -156,7 +156,7 @@ def test_pratipadika__dunders():
_ = sorted([deva, eva])

# __repr__
assert repr(deva) == "Pratipadika(text='deva', is_avyaya=False)"
assert repr(deva) == "Pratipadika(text='deva')"


def test_subanta():
Expand Down
12 changes: 12 additions & 0 deletions bindings-python/test/unit/prakriya/test_lib.py
Original file line number Diff line number Diff line change
Expand Up @@ -229,6 +229,18 @@ def test_derive_subantas():
assert expected == actual


def test_derive_subantas_with_nyap():
v = Vyakarana()
prakriyas = v.derive(
Pada.Subanta(
Pratipadika.nyap("nadI"), Linga.Stri, Vibhakti.Prathama, Vacana.Eka
)
)
expected = {"nadI"}
actual = {x.text for x in prakriyas}
assert expected == actual


@pytest.mark.parametrize(
"code,expected",
[
Expand Down
35 changes: 19 additions & 16 deletions vidyut-prakriya/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -8,17 +8,19 @@
[paper]: https://iscls.github.io/assets/files/proceedings/2024.iscls.7.pdf

`vidyut-prakriya` generates Sanskrit words with their prakriyās (derivations)
according to the rules of Paninian grammar and currently implements around
2,000 rules. Our long-term goal is to provide a complete implementation of the
Ashtadhyayi.
according to the rules of traditional Sanskrit grammar. It currently implements
more than 2,000 rules from the *Aṣṭādhyāyī*, the core text of the grammatical
tradition. Our long-term goal is to provide a complete implementation of the
*Aṣṭādhyāyī*.

This [crate][crate] is under active development as part of the [Ambuda][ambuda]
project. If you enjoy our work and wish to contribute to it, please see the
[Contributing](#contributing) section below. We also encourage you to [join our
Discord server][discord], where you can meet other Sanskrit programmers and
enthusiasts.

An online demo is available [here][demo].
An online demo, which also demonstrates this crate's WebAssembly bindings, is
available [here][demo].

- [Overview](#overview)
- [Usage](#usage)
Expand All @@ -39,30 +41,31 @@ Overview
`vidyut-prakriya` has three distinguishing qualities:

1. *Fidelity*. We follow the rules of Paninian grammar as closely as possible.
Each word we return can optionally include a prakriyā that lists each rule
Each word we return can optionally include a *prakriyā* that lists each rule
that was used as well as its result.

2. *Speed*. On my laptop (a 2.4GHz 8-core CPU with 64 GB of DDR4 RAM), this
crate generates almost 50,000 words per second on a single thread. All else
equal, a fast program is easier to run and test, which means that we can
produce a larger word list at a higher standard of quality.
2. *Speed*. We have paid special attention to overall performance, especially
by caching partial results. These kinds of changes make `vidyut-prakriya`
several orders of magnitude faster than other word generators.

3. *Portability*. This crate compiles to fast native code and can be bound to
most other progamming languages with a bit of effort. In particular, this
crate can be compiled to WebAssembly, which means that it can run in a
modern web browser.
most other progamming languages with a bit of effort. We provide first-class
support for Python bindings through our [vidyut][vidyut-py] Python package,
and we also maintain bindings for WebAssembly.

`vidyut-prakriya` has excellent support for Sanskrit's basic word types,
including *subanta*s, *tiṅanta*s, *kṛdanta*s, and *taddhitānta*s. It has
moderate support for *samāsa*s and weak support for accent rules.

[vidyut-py]: https://vidyut.readthedocs.io/en/latest/


Usage
-----

`vidyut-prakriya` supports two modes of use:

### Command-line use
### As a binary

The first way to use `vidyut-prakriya` is as a command-line tool for generating
Sanskrit words. For example, you can generate all basic *tiṅanta*s in *kartari
Expand All @@ -80,10 +83,10 @@ compile and complete within a few seconds.
You can find other example commands by exploring the `Makefile` and in
particular the various invocations in `create_test_files`.

### Programmatic use
### As a library

The second way to use `vidyut-prakriya` is programmatically. For example, we
can generate simple verbs like so:
The second way to use `vidyut-prakriya` is as a library in your own binaries.
For example, we can generate simple verbs like so:

```rust
use vidyut_prakriya::Vyakarana;
Expand Down
12 changes: 12 additions & 0 deletions vidyut-prakriya/src/args/krt.rs
Original file line number Diff line number Diff line change
Expand Up @@ -668,6 +668,18 @@ impl Krdanta {
self.require.as_ref()
}

/// Sets the prayoga to use with this krdanta.
pub fn with_prayoga(mut self, prayoga: Prayoga) -> Self {
self.prayoga = Some(prayoga);
self
}

/// Sets the lakara to use with this krdanta.
pub fn with_lakara(mut self, lakara: Lakara) -> Self {
self.lakara = Some(lakara);
self
}

/// Sets the required value for this krdanta.
pub fn with_require(mut self, s: impl AsRef<str>) -> Self {
self.require = Some(s.as_ref().to_string());
Expand Down
14 changes: 14 additions & 0 deletions vidyut-prakriya/src/args/pratipadika.rs
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,11 @@ impl BasicPratipadika {
pub fn is_avyaya(&self) -> bool {
self.is_avyaya
}

/// Returns whether this pratipadika should be treated as ending in a *nyAp pratyaya*.
pub fn is_nyap(&self) -> bool {
self.is_nyap
}
}

/// A nominal stem.
Expand Down Expand Up @@ -85,6 +90,15 @@ impl Pratipadika {
_ => false,
}
}

/// Returns whether the pratipadika describes an avyaya.
pub fn is_nyap(&self) -> bool {
match self {
Self::Basic(b) => b.is_nyap(),
Self::Krdanta(_) => false,
_ => false,
}
}
}

impl TryFrom<&str> for Pratipadika {
Expand Down
12 changes: 5 additions & 7 deletions vidyut-prakriya/src/dhatupatha.rs
Original file line number Diff line number Diff line change
@@ -1,7 +1,5 @@
/*!
Utility functions for working with the Dhatupatha file included in this crate. For details, see the
comments on the `Dhatupatha` struct.
*/
//! Utility functions for working with the Dhatupatha file included in this crate.
//! For details, see the comments on the `Dhatupatha` struct.
use crate::args::{Antargana, Dhatu, Gana};
use crate::core::errors::*;
Expand Down Expand Up @@ -119,7 +117,7 @@ pub fn create_dhatu(aupadeshika: impl AsRef<str>, gana: Gana, number: u16) -> Re
}

impl Dhatupatha {
/// Loads a dhatupatha from the provided TSV.
/// Loads a dhatupatha from a TSV file.
///
/// This function expects a TSV with headers and at least two columns. The first column is a
/// short numeric code associated with the dhatu (e.g. `"01.0001"`), and the second column is
Expand All @@ -139,7 +137,7 @@ impl Dhatupatha {
Self::from_text(&content)
}

/// Loads a dhatupatha from the input text string.
/// Loads a dhatupatha from a TSV string.
///
/// This function is best suited for environments that don't have access to an underlying file
/// system, such as when running with WebAssembly.
Expand Down Expand Up @@ -196,7 +194,7 @@ impl Dhatupatha {
}
}

/// Returns an iterator over this dhatupatha's contents.
/// Returns an iterator over all dhatus in the Dhatupatha.
pub fn iter(&self) -> std::slice::Iter<Entry> {
self.0.iter()
}
Expand Down
Loading

0 comments on commit 188ffec

Please sign in to comment.