From 7993d6d321a0d3467eb9899ea90020b8e6b205e6 Mon Sep 17 00:00:00 2001 From: Aleksandr Date: Tue, 5 Nov 2024 13:16:13 +0100 Subject: [PATCH] bump version, fix readme and add value error --- README.md | 20 ++++++++++++++++---- linguaf/__init__.py | 2 +- linguaf/readability.py | 4 ++++ linguaf/syntactical_complexity.py | 2 ++ setup.py | 2 +- 5 files changed, 24 insertions(+), 6 deletions(-) diff --git a/README.md b/README.md index 1e32ab6..1915c9e 100644 --- a/README.md +++ b/README.md @@ -48,7 +48,7 @@ Example: from linguaf import descriptive_statistics as ds -ds.words_per_sentence(documents) +ds.avg_words_per_sentence(documents) # Output: 15 ``` @@ -64,7 +64,7 @@ from linguaf import syntactical_complexity as sc sc.mean_dependency_distance(documents) -# Output: 2.307306255835668 +# Output: 2.375 ``` ### Lexical Diversity @@ -83,7 +83,7 @@ from linguaf import lexical_diversity as ld ld.log_type_token_ratio(documents) -# Output: 94.03574963462502 +# Output: 0.9403574963462502 ``` ### Readability @@ -125,7 +125,19 @@ pip install . ## Language Support -At the moment, library supports English and Russian languages for all the methods. +At the moment, library supports the following languages: +* English πŸ‡¬πŸ‡§ (`en`): full support +* Russian πŸ‡·πŸ‡Ί (`ru`): full support +* German πŸ‡©πŸ‡ͺ (`de`) +* French πŸ‡«πŸ‡· (`fr`) +* Spanish πŸ‡ͺπŸ‡Έ (`es`) +* Chinese πŸ‡¨πŸ‡³ (`zh`) +* Lithuanian πŸ‡±πŸ‡Ή (`lt`) +* Belarusian πŸ‡§πŸ‡Ύ (`be`) +* Ukrainian πŸ‡ΊπŸ‡¦ (`uk`) +* Armenian πŸ‡¦πŸ‡² (`hy`) + +**Important:** not every method is implemented for every language. If you use a particular method that does not support the input language, you'll get a `ValueError`. ## Citation diff --git a/linguaf/__init__.py b/linguaf/__init__.py index cad6c0d..032c211 100644 --- a/linguaf/__init__.py +++ b/linguaf/__init__.py @@ -2,7 +2,7 @@ SUPPORTED_LANGS = ['en', 'ru', 'de', 'fr', 'es', 'zh', # stopwords from nltk 'lt', 'be', 'uk', 'hy'] # stopwords from other sources -__version__ = '0.1.1' +__version__ = '0.1.2' def __load_json(filepath): diff --git a/linguaf/readability.py b/linguaf/readability.py index cc6c320..6130c40 100644 --- a/linguaf/readability.py +++ b/linguaf/readability.py @@ -26,6 +26,8 @@ def flesch_reading_ease(documents: list, lang: str = 'en', remove_stopwords: boo return 206.835 - 1.015*asl - 84.6*(syl_total/len(words)) elif lang == 'ru': return 206.835 - 1.3*asl - 60.1*(syl_total/len(words)) # coefficients for russian + else: + raise ValueError("Syllable counting is currently not supported for the language " + lang + "!") def flesch_kincaid_grade(documents: list, lang: str = 'en', remove_stopwords: bool = False) -> float: @@ -51,6 +53,8 @@ def flesch_kincaid_grade(documents: list, lang: str = 'en', remove_stopwords: bo return 0.39*asl + 11.8*(syl_total/len(words)) - 15.59 elif lang == 'ru': return 0.5*asl + 8.4*(syl_total/len(words)) - 15.59 # coefficients for russian + else: + raise ValueError("Syllable counting is currently not supported for the language " + lang + "!") def automated_readability_index(documents: list, lang: str = 'en', remove_stopwords: bool = False) -> float: diff --git a/linguaf/syntactical_complexity.py b/linguaf/syntactical_complexity.py index 3c2c570..cd051db 100644 --- a/linguaf/syntactical_complexity.py +++ b/linguaf/syntactical_complexity.py @@ -41,5 +41,7 @@ def mean_dependency_distance(documents: list, lang: str = 'en') -> float: doc = nlp(text) for token in doc: dd += abs(token.head.i - token.i) + else: + raise ValueError("Syllable counting is currently not supported for the language " + lang + "!") return dd/(len(words) - len(sentences)) diff --git a/setup.py b/setup.py index dd34811..5b062b6 100644 --- a/setup.py +++ b/setup.py @@ -21,7 +21,7 @@ def read_requirements(): setuptools.setup( name="linguaf", - version="0.1.0", + version="0.1.2", author="Aleksandr Perevalov", author_email="perevalovproduction@gmail.com", description="Python package for calculating famous measures in computational linguistics",