diff --git a/MANIFEST.in b/MANIFEST.in index 13b954b..dd0eb38 100644 --- a/MANIFEST.in +++ b/MANIFEST.in @@ -1,11 +1,17 @@ +include CITATION +include LICENSE +include README.md +include pyproject.toml +include requirements.txt +include setup.py +include tests/*.py +prune .ipynb_checkpoints # added by check-manifest include *.md include *.py include *.txt include *.yaml include *.yml -include CITATION -include LICENSE include pylintrc recursive-include bin *.py recursive-include bin *.sh diff --git a/README.md b/README.md index fd95ad4..643f2cf 100644 --- a/README.md +++ b/README.md @@ -132,7 +132,7 @@ Source code for **PyTextRank** plus its logo, documentation, and examples have an [MIT license](https://spdx.org/licenses/MIT.html) which is succinct and simplifies use in commercial applications. -All materials herein are Copyright © 2016-2023 Derwen, Inc. +All materials herein are Copyright © 2016-2024 Derwen, Inc. ## Attribution diff --git a/SECURITY.md b/SECURITY.md new file mode 100644 index 0000000..567d8ec --- /dev/null +++ b/SECURITY.md @@ -0,0 +1,14 @@ +# Security Policy + +## Supported Versions + +Versions which are currently being supported with security updates: + +| Version | Supported | +| ------- | ------------------ | +| > 0.2 | :white_check_mark: | + +## Reporting a Vulnerability + +To report a vulnerability, please create a new [*issue*](https://github.com/DerwenAI/pytextrank/issues). +We will be notified immediately, and will attempt to respond on the reported issue immediately. diff --git a/bin/nb_md.sh b/bin/nb_md.sh index 30c220b..531df43 100755 --- a/bin/nb_md.sh +++ b/bin/nb_md.sh @@ -1,4 +1,4 @@ -#!/bin/bash -e +#!/bin/bash -e -x for notebook_path in examples/*.ipynb; do [ -e "$notebook_path" ] || continue @@ -8,6 +8,6 @@ for notebook_path in examples/*.ipynb; do cp $notebook_path docs/$notebook jupyter nbconvert docs/$notebook --to markdown - python bin/vis_doc.py docs/"$stem".md + python3 bin/vis_doc.py docs/"$stem".md rm docs/$notebook -done \ No newline at end of file +done diff --git a/bin/preview.py b/bin/preview.py index 28aeefe..9793610 100755 --- a/bin/preview.py +++ b/bin/preview.py @@ -1,7 +1,7 @@ #!/usr/bin/env python # -*- coding: utf-8 -*- -from flask import Flask, redirect, send_from_directory, url_for # pylint: disable=E0401 +from flask import Flask, redirect, send_from_directory, url_for # pylint: disable=E0401 from pathlib import PurePosixPath import os diff --git a/bin/push_pypi.sh b/bin/push_pypi.sh index 4127f80..5b6cec7 100755 --- a/bin/push_pypi.sh +++ b/bin/push_pypi.sh @@ -1,8 +1,10 @@ -#!/bin/bash -e +#!/bin/bash -e -x -## debugging the uploaded README: -# pandoc README.md --from markdown --to rst -s -o README.rst +rm -rf dist build pytextrank.egg-info +python3 -m build +twine check dist/* -rm -rf dist -python setup.py sdist bdist_wheel -twine upload --verbose dist/* \ No newline at end of file +# this assumes the use of `~/.pypirc` +# https://packaging.python.org/en/latest/specifications/pypirc/ + +twine upload ./dist/* --verbose diff --git a/docs/ack.md b/docs/ack.md index d13eb91..c35e2fe 100644 --- a/docs/ack.md +++ b/docs/ack.md @@ -47,8 +47,7 @@ Computer Science (advisor: [Douglas Lenat](https://en.wikipedia.org/wiki/Douglas with additional work in Design and Linguistics. His business experience includes: Director, VP, and CTO positions leading data teams and machine learning projects; -former CTO/Board member at two publicly-traded tech firms on NASDAQ OTC:BB; -and an equity partner at [Amplify Partners](https://derwen.ai/s/hcxhybks9nbh). +former CTO/Board member at two publicly-traded tech firms on NASDAQ OTC:BB. Cited in 2015 as one of the [Top 30 People in Big Data and Analytics](http://www.kdnuggets.com/2015/02/top-30-people-big-data-analytics.html) by Innovation Enterprise. @@ -67,16 +66,15 @@ by Innovation Enterprise. [Cascading](https://www.cascading.org/) * consultant to enterprise organizations for [data strategy](../glossary/#data-strategy); advisor to several AI start-ups, including - [Recognai](https://derwen.ai/s/hk4g), + [Argilla](https://derwen.ai/s/mz2xj9bsz2jx), [KUNGFU.AI](https://derwen.ai/s/rwg8prbgqp36), - [Primer](https://derwen.ai/s/tm9jxzcm67hc) + [DataSpartan](https://derwen.ai/s/hxsfttck3dkx) As an author/speaker/instructor, Paco has taught many people (+9000) in industry across a range of topics – [*data science*](../glossary/#data-science), [*natural language*](../glossary/#natural-language), [*cloud computing*](../glossary/#cloud-computing), -[*reinforcement learning*](../glossary/#reinforcement-learning), [*computable content*](../glossary/#computable-content), etc. – and through guest lectures at @@ -115,9 +113,9 @@ Source code for **pytextrank** plus its logo, documentation, and examples have an [MIT license](https://spdx.org/licenses/MIT.html) which is succinct and simplifies use in commercial applications. -All materials herein are Copyright © 2016-2023 Derwen, Inc. +All materials herein are Copyright © 2016-2024 Derwen, Inc. -[![logo for Derwen, Inc.](https://derwen.ai/static/block_logo.png)](https://derwen.ai/) +[![logo for Derwen, Inc.](https://derwen.ai/static/design/block_logo.png)](https://derwen.ai/) ## Production Use Cases diff --git a/docs/index.md b/docs/index.md index 4f2e95f..c2f154f 100644 --- a/docs/index.md +++ b/docs/index.md @@ -78,8 +78,6 @@ complementary, hybrid AI solutions. Links for other open source community resources: * [Issue Tracker](https://github.com/DerwenAI/pytextrank/issues) - * [Project Board](https://github.com/DerwenAI/pytextrank/projects/1) - * [Milestones](https://github.com/DerwenAI/pytextrank/milestones) * [spaCy uniVerse](https://spacy.io/universe/project/spacy-pytextrank) Other good ways to help troubleshoot issues: @@ -94,7 +92,7 @@ get help about **pytextrank** and related topics. * [community Slack](https://knowledgegraphconf.slack.com/ssb/redirect) – specifically on the `#ask` channel - * [*Graph-Based Data Science*](https://www.linkedin.com/groups/6725785/) group on LinkedIn – join to receive related updates, news, conference coupons, etc. + * [*Graph Data Science*](https://www.linkedin.com/groups/6725785/) group on LinkedIn – join to receive related updates, news, conference coupons, etc. For related course materials and training, please check for calendar updates in the article diff --git a/docs/ref.md b/docs/ref.md index cd13bbc..2351880 100644 --- a/docs/ref.md +++ b/docs/ref.md @@ -1,4 +1,7 @@ # Reference: `pytextrank` package +Package definitions for the `pytextrank` library. + + ## [`BaseTextRankFactory` class](#BaseTextRankFactory) A factory class that provides the document with its instance of @@ -118,7 +121,7 @@ list of ranked phrases, in descending order --- #### [`get_personalization` method](#pytextrank.BaseTextRank.get_personalization) -[*\[source\]*](https://github.com/DerwenAI/pytextrank/blob/main/pytextrank/base.py#L376) +[*\[source\]*](https://github.com/DerwenAI/pytextrank/blob/main/pytextrank/base.py#L385) ```python get_personalization() @@ -136,7 +139,7 @@ Defaults to a no-op for the base *TextRank* algorithm. --- #### [`get_unit_vector` method](#pytextrank.BaseTextRank.get_unit_vector) -[*\[source\]*](https://github.com/DerwenAI/pytextrank/blob/main/pytextrank/base.py#L638) +[*\[source\]*](https://github.com/DerwenAI/pytextrank/blob/main/pytextrank/base.py#L649) ```python get_unit_vector(limit_phrases) @@ -158,7 +161,7 @@ the unit vector, as a list of `VectorElem` objects --- #### [`calc_sent_dist` method](#pytextrank.BaseTextRank.calc_sent_dist) -[*\[source\]*](https://github.com/DerwenAI/pytextrank/blob/main/pytextrank/base.py#L682) +[*\[source\]*](https://github.com/DerwenAI/pytextrank/blob/main/pytextrank/base.py#L693) ```python calc_sent_dist(limit_phrases) @@ -176,7 +179,7 @@ a list of sentence distance measures --- #### [`segment_paragraphs` method](#pytextrank.BaseTextRank.segment_paragraphs) -[*\[source\]*](https://github.com/DerwenAI/pytextrank/blob/main/pytextrank/base.py#L731) +[*\[source\]*](https://github.com/DerwenAI/pytextrank/blob/main/pytextrank/base.py#L742) ```python segment_paragraphs(sent_dist) @@ -193,7 +196,7 @@ a list of Paragraph data objects --- #### [`summary` method](#pytextrank.BaseTextRank.summary) -[*\[source\]*](https://github.com/DerwenAI/pytextrank/blob/main/pytextrank/base.py#L786) +[*\[source\]*](https://github.com/DerwenAI/pytextrank/blob/main/pytextrank/base.py#L797) ```python summary(limit_phrases=10, limit_sentences=4, preserve_order=False, level="sentence") @@ -221,7 +224,7 @@ texts for sentences, in order --- #### [`write_dot` method](#pytextrank.BaseTextRank.write_dot) -[*\[source\]*](https://github.com/DerwenAI/pytextrank/blob/main/pytextrank/base.py#L860) +[*\[source\]*](https://github.com/DerwenAI/pytextrank/blob/main/pytextrank/base.py#L871) ```python write_dot(path="graph.dot") @@ -235,7 +238,7 @@ path for the output file; defaults to `"graph.dot"` --- #### [`plot_keyphrases` method](#pytextrank.BaseTextRank.plot_keyphrases) -[*\[source\]*](https://github.com/DerwenAI/pytextrank/blob/main/pytextrank/base.py#L890) +[*\[source\]*](https://github.com/DerwenAI/pytextrank/blob/main/pytextrank/base.py#L901) ```python plot_keyphrases() @@ -257,7 +260,7 @@ A factory class that provides the document with its instance of --- #### [`__init__` method](#pytextrank.TopicRankFactory.__init__) -[*\[source\]*](https://github.com/DerwenAI/pytextrank/blob/main/pytextrank/topicrank.py#L31) +[*\[source\]*](https://github.com/DerwenAI/pytextrank/blob/main/pytextrank/topicrank.py#L32) ```python __init__(edge_weight=1.0, pos_kept=None, token_lookback=3, scrubber=None, stopwords=None, threshold=0.25, method="average") @@ -268,7 +271,7 @@ Constructor for the factory class. --- #### [`__call__` method](#pytextrank.TopicRankFactory.__call__) -[*\[source\]*](https://github.com/DerwenAI/pytextrank/blob/main/pytextrank/topicrank.py#L58) +[*\[source\]*](https://github.com/DerwenAI/pytextrank/blob/main/pytextrank/topicrank.py#L59) ```python __call__(doc) @@ -315,7 +318,7 @@ Algorithm Overview: --- #### [`__init__` method](#pytextrank.TopicRank.__init__) -[*\[source\]*](https://github.com/DerwenAI/pytextrank/blob/main/pytextrank/topicrank.py#L120) +[*\[source\]*](https://github.com/DerwenAI/pytextrank/blob/main/pytextrank/topicrank.py#L121) ```python __init__(doc, edge_weight, pos_kept, token_lookback, scrubber, stopwords, threshold, method) @@ -347,7 +350,7 @@ clustering method used in *TopicRank* candidate clustering: see [`scipy.cluster. --- #### [`calc_textrank` method](#pytextrank.TopicRank.calc_textrank) -[*\[source\]*](https://github.com/DerwenAI/pytextrank/blob/main/pytextrank/topicrank.py#L307) +[*\[source\]*](https://github.com/DerwenAI/pytextrank/blob/main/pytextrank/topicrank.py#L318) ```python calc_textrank() @@ -364,7 +367,7 @@ list of ranked phrases, in descending order --- #### [`reset` method](#pytextrank.TopicRank.reset) -[*\[source\]*](https://github.com/DerwenAI/pytextrank/blob/main/pytextrank/topicrank.py#L367) +[*\[source\]*](https://github.com/DerwenAI/pytextrank/blob/main/pytextrank/topicrank.py#L378) ```python reset() @@ -677,6 +680,21 @@ the filtered text representing as a list of lines +--- +#### [`get_repo_version` function](#pytextrank.get_repo_version) +[*\[source\]*](https://github.com/DerwenAI/pytextrank/blob/main/pytextrank/version.py#L49) + +```python +get_repo_version() +``` +Access the Git repository information and return items to identify +the version/commit running in production. + + * *returns* : `typing.Tuple[str, str]` +version tag and commit hash + + + --- #### [`groupby_apply` function](#pytextrank.groupby_apply) [*\[source\]*](https://github.com/DerwenAI/pytextrank/blob/main/pytextrank/util.py#L14) diff --git a/pkg_doc.cfg b/pkg_doc.cfg new file mode 100644 index 0000000..5c9cc82 --- /dev/null +++ b/pkg_doc.cfg @@ -0,0 +1,20 @@ +{ + "src_url": "https://github.com/DerwenAI/pytextrank/blob/main", + + "module": "pytextrank", + + "classes": [ + "BaseTextRankFactory", + "BaseTextRank", + "TopicRankFactory", + "TopicRank", + "PositionRankFactory", + "PositionRank", + "BiasedTextRankFactory", + "BiasedTextRank", + "Lemma", + "Phrase", + "Sentence", + "VectorElem" + ] +} diff --git a/pkg_doc.py b/pkg_doc.py index 566aeb6..e7eb3ef 100755 --- a/pkg_doc.py +++ b/pkg_doc.py @@ -1,48 +1,43 @@ #!/usr/bin/env python # -*- coding: utf-8 -*- -import pyfixdoc +""" +Generate the `apidocs` markdown needed for the package reference. +""" + +import importlib +import json import sys +import pyfixdoc + ###################################################################### ## main entry point if __name__ == "__main__": + ref_md_file: str = sys.argv[1] + # NB: `inspect` is picky about paths and current working directory # this only works if run from the top-level directory of the repo sys.path.insert(0, "../") - # customize the following, per use case - import pytextrank # pylint: disable=W0611 - - class_list = [ - "BaseTextRankFactory", - "BaseTextRank", - "TopicRankFactory", - "TopicRank", - "PositionRankFactory", - "PositionRank", - "BiasedTextRankFactory", - "BiasedTextRank", - "Lemma", - "Phrase", - "Sentence", - "VectorElem", - ] - - pkg_doc = pyfixdoc.PackageDoc( - "pytextrank", - "https://github.com/DerwenAI/pytextrank/blob/main", - class_list, + with open("pkg_doc.cfg", "r", encoding="utf-8") as fp: + config: dict = json.load(fp) + + importlib.import_module(config["module"]) + + pkg_doc: pyfixdoc.PackageDoc = pyfixdoc.PackageDoc( + config["module"], + config["src_url"], + config["classes"], ) - # NB: uncomment to analyze/troubleshoot the results of `inspect` - #pkg_doc.show_all_elements(); sys.exit(0) + # NB: uncomment to analyze/troubleshoot the results of `inspect` + #pkg_doc.show_all_elements(); sys.exit(0) - # build the apidocs markdown - pkg_doc.build() + # build the apidocs markdown + pkg_doc.build() - # output the apidocs markdown - ref_md_file = sys.argv[1] - pkg_doc.write_markdown(ref_md_file) + # output the apidocs markdown + pkg_doc.write_markdown(ref_md_file) diff --git a/pyproject.toml b/pyproject.toml new file mode 100644 index 0000000..7774cb6 --- /dev/null +++ b/pyproject.toml @@ -0,0 +1,121 @@ +[build-system] + +build-backend = "setuptools.build_meta" + +requires = [ + "setuptools >= 69.0", + "setuptools_scm[toml] >= 6.2", + "wheel >= 0.42", +] + + +[tool.setuptools] + +packages = [ "pytextrank" ] + + +[tool.setuptools_scm] + +# required section; empty contents is fine + + +[project.urls] + +home = "https://derwen.ai/docs/ptr/" +DOI = "https://doi.org/10.5281/zenodo.4637885" +code = "http://github.com/DerwenAI/pytextrank" +forum = "https://www.linkedin.com/groups/6725785/" +issues = "https://github.com/DerwenAI/pytextrank/issues" +spaCy_uniVerse = "https://spacy.io/universe/project/spacy-pytextrank" +StackOverflow = "https://stackoverflow.com/search?q=pytextrank" +citations = "https://scholar.google.com/scholar?q=related:5tl6J4xZlCIJ:scholar.google.com/&scioq=&hl=en&as_sdt=0,5" + + +[project] + +name = "pytextrank" +dynamic = ["version"] + +authors = [ + { name = "derwen.ai", email = "info@derwen.ai" }, +] + +description = "Python implementation of TextRank as a spaCy pipeline extension, for graph-based natural language work plus related knowledge graph practices; used for for phrase extraction of text documents." + +readme = "README.md" +license = { file = "LICENSE" } + +requires-python = ">=3.7" + +classifiers = [ + "Development Status :: 5 - Production/Stable", + "License :: OSI Approved :: MIT License", + "Natural Language :: English", + "Intended Audience :: Developers", + "Intended Audience :: Education", + "Intended Audience :: Information Technology", + "Intended Audience :: Science/Research", + "Topic :: Scientific/Engineering :: Artificial Intelligence", + "Topic :: Scientific/Engineering :: Human Machine Interfaces", + "Topic :: Scientific/Engineering :: Information Analysis", + "Topic :: Scientific/Engineering :: Visualization", + "Topic :: Software Development :: Libraries :: Python Modules", + "Topic :: Text Processing :: General", + "Topic :: Text Processing :: Indexing", + "Topic :: Text Processing :: Linguistic", + "Operating System :: OS Independent", + "Programming Language :: Python :: 3", +] + +dependencies = [ + "GitPython >= 3.1", + "graphviz >= 0.13", + "icecream >= 2.1", + "networkx[default] >= 2.6", + "pygments >= 2.7.4", + "scipy >= 1.7", + "spacy >= 3.0", +] + + +[project.optional-dependencies] + +dev = [ + "bandit >= 1.7.7", + "build >= 1.0", + "check-manifest >= 0.48", + "codespell", + "flask >= 3.0", + "grayskull", + "jupyter-server >= 2.11.2", + "jupyterlab >= 3.1.4", + "mistune", + "mkdocs-git-revision-date-plugin >= 0.3", + "mkdocs-material >= 9.5", + "mknotebooks >= 0.8", + "mkrefs >= 0.2.0", + "mypy", + "nbconvert >= 6.4", + "nbmake >= 1.0", + "notebook >= 6.1.5", + "pipdeptree", + "pre-commit >= 3.5", + "pylint >= 2.7.0", + "pymdown-extensions", + "selenium >= 4.16", + "tornado >= 6.3.3", + "twine >= 4.0", + "werkzeug >= 3.0.1", +] + +test = [ + "coverage", + "pytest >= 7.4", +] + +demo = [ + "ipywidgets >= 8.1", + "jupyterlab_execute_time >= 3.1", + "jupyterlab >= 4.0", + "watermark >= 2.4", +] diff --git a/pytextrank/__init__.py b/pytextrank/__init__.py index c6425c6..856fad0 100644 --- a/pytextrank/__init__.py +++ b/pytextrank/__init__.py @@ -2,17 +2,27 @@ # -*- coding: utf-8 -*- # see license https://github.com/DerwenAI/pytextrank#license-and-copyright +""" +Package definitions for the `pytextrank` library. +""" + import pathlib import typing from spacy.language import Language # type: ignore # pylint: disable=E0401 from .base import BaseTextRankFactory, BaseTextRank, Lemma, Paragraph, Phrase, Sentence, VectorElem, StopWordsLike + from .biasedrank import BiasedTextRankFactory, BiasedTextRank + from .positionrank import PositionRankFactory, PositionRank + from .topicrank import TopicRankFactory, TopicRank + from .util import groupby_apply, default_scrubber, maniacal_scrubber, split_grafs, filter_quotes -from .version import MIN_PY_VERSION, _versify, _check_version, __version__ + +from .version import get_repo_version, \ + __version__, __version_major__, __version_minor__, __version_patch__ ###################################################################### diff --git a/pytextrank/version.py b/pytextrank/version.py index 8319fe2..8e8f46d 100644 --- a/pytextrank/version.py +++ b/pytextrank/version.py @@ -2,43 +2,57 @@ # -*- coding: utf-8 -*- # see license https://github.com/DerwenAI/pytextrank#license-and-copyright -import sys +""" +Describe the GitHub repo version tags and commit hash for +the `pytextrank` library. +""" + +from os.path import dirname, abspath +import pathlib import typing +from git import Repo # pylint: disable=E0401 # type: ignore -###################################################################### -## Python version checking -MIN_PY_VERSION: typing.Tuple = (3, 7,) -__version__: str = "3.2.5" +## use the local Git info for version info, if available +REPO_HASH: str = "xxxxxxxxx" # default/placeholder +REPO_TAGS: str = "refs/tags/v1.0.0" # default/placeholder +try: + repo_path: pathlib.Path = pathlib.Path(dirname(abspath(__file__))) + repo: Repo = Repo(repo_path.parents[0]) -def _versify ( - py_version_info: typing.Tuple - ) -> str: - """ -Semiprivate helper function to convert Python version to a point release (a string). + REPO_HASH = str(repo.head.commit) + REPO_TAGS = repo.tags +except Exception as ex: # pylint: disable=W0703 + print(ex) - py_version_info: -Python version info as a named tuple from the operating system, e.g., from -[`sys.version_info[:2]`](https://docs.python.org/3/library/sys.html#sys.version_info) - returns: -Python version info in [*semantic versioning*](https://semver.org/) format - """ - return ".".join([ str(x) for x in py_version_info ]) +# cast version string into a float +try: + v_seq: typing.List[ str ] = str(REPO_TAGS[-1]).replace("v", "").split(".")[:3] + __version__ = ".".join(v_seq) # this is the OpenAPI documentation version -def _check_version () -> None: - """ -Semiprivate helper function to check the Python version info versus -the minimum required for **pytextrank**. + __version_major__ = int(v_seq[0]) + __version_minor__ = int(v_seq[1]) + __version_patch__ = int(v_seq[2]) +except IndexError: + # the code above may fail in Github Actions workflow + __version__ = "0.0+test" -Throws a `RuntimeError` if the installed Python interpreter is out of -date. + __version_major__ = 0 + __version_minor__ = 0 + __version_patch__ = 0 + + +def get_repo_version ( + ) -> typing.Tuple[ str, str ]: """ - py_version_info: typing.Tuple = sys.version_info[:2] +Access the Git repository information and return items to identify +the version/commit running in production. - if py_version_info < MIN_PY_VERSION: - error_msg = "This version of pytextrank requires Python {} or later ({} detected)\n" - raise RuntimeError(error_msg.format(_versify(MIN_PY_VERSION), _versify(py_version_info))) + returns: +version tag and commit hash + """ + return __version__, REPO_HASH diff --git a/requirements-dev.txt b/requirements-dev.txt index 34da7a8..9a37bce 100644 --- a/requirements-dev.txt +++ b/requirements-dev.txt @@ -1,4 +1,4 @@ -bandit +bandit >= 1.7.7 check-manifest >= 0.48 codespell coverage @@ -20,9 +20,9 @@ pre-commit pylint >= 2.7.0 pytest pymdown-extensions -selenium -setuptools >= 65.5.1 +selenium >= 4.16 +setuptools >= 69.0 twine tornado >= 6.3.3 # not directly required, pinned by Snyk to avoid a vulnerability werkzeug >= 3.0.1 # not directly required, pinned by Snyk to avoid a vulnerability -wheel >= 0.38.0 \ No newline at end of file +wheel >= 0.42 diff --git a/requirements.txt b/requirements.txt index c0ffc85..467b50a 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,3 +1,4 @@ +GitPython >= 3.1 graphviz >= 0.13 icecream >= 2.1 networkx[default] >= 2.6 diff --git a/setup.py b/setup.py index 9656307..a97e118 100644 --- a/setup.py +++ b/setup.py @@ -1,100 +1,7 @@ -# type: ignore +""" +PyPi legacy support +https://setuptools.pypa.io/en/latest/userguide/quickstart.html#setup-py +""" -import importlib.util -import pathlib -import setuptools -import typing - - -KEYWORDS = [ - "biased textrank", - "entity linking", - "extractive summarization", - "graph algorithms", - "knowledge graph", - "natural language processing", - "nlp", - "parsing", - "phrase extraction", - "pipeline component", - "positionrank", - "spacy", - "text analytics", - "textgraphs", - "textrank", - "topicrank", - ] - - -def parse_requirements_file (filename: str) -> typing.List: - """read and parse a Python `requirements.txt` file, returning as a list of str""" - results: list = [] - - with pathlib.Path(filename).open() as f: - for l in f.readlines(): - results.append(l.strip().replace(" ", "").split("#")[0]) - - return results - - -if __name__ == "__main__": - spec = importlib.util.spec_from_file_location("pytextrank.version", "pytextrank/version.py") - pytr_version = importlib.util.module_from_spec(spec) - spec.loader.exec_module(pytr_version) - pytr_version._check_version() # pylint: disable=W0212 - - base_packages = parse_requirements_file("requirements.txt") - docs_packages = parse_requirements_file("requirements-dev.txt") - viz_packages = parse_requirements_file("requirements-viz.txt") - - setuptools.setup( - name="pytextrank", - version = pytr_version.__version__, - - python_requires = ">=" + pytr_version._versify(pytr_version.MIN_PY_VERSION), # pylint: disable=W0212 - packages = setuptools.find_packages(exclude=[ "docs", "examples" ]), - install_requires = base_packages, - extras_require = { - "base": base_packages, - "docs": docs_packages, - "viz": viz_packages, - }, - - author="Paco Nathan", - author_email="paco@derwen.ai", - license="MIT", - - description="Python implementation of TextRank as a spaCy pipeline extension, for graph-based natural language work plus related knowledge graph practices; used for for phrase extraction and lightweight extractive summarization of text documents.", - long_description = pathlib.Path("README.md").read_text(), - long_description_content_type = "text/markdown", - - keywords = ", ".join(KEYWORDS), - classifiers = [ - "Programming Language :: Python :: 3", - "License :: OSI Approved :: MIT License", - "Operating System :: OS Independent", - "Development Status :: 5 - Production/Stable", - "Intended Audience :: Developers", - "Intended Audience :: Education", - "Intended Audience :: Information Technology", - "Intended Audience :: Science/Research", - "Topic :: Scientific/Engineering :: Artificial Intelligence", - "Topic :: Scientific/Engineering :: Human Machine Interfaces", - "Topic :: Scientific/Engineering :: Information Analysis", - "Topic :: Text Processing :: General", - "Topic :: Text Processing :: Indexing", - "Topic :: Text Processing :: Linguistic", - ], - - url = "https://derwen.ai/docs/ptr/", - project_urls = { - "Source": "http://github.com/DerwenAI/pytextrank", - "spaCy uniVerse": "https://spacy.io/universe/project/spacy-pytextrank", - "Issue Tracker": "https://github.com/DerwenAI/pytextrank/issues", - "Discussion Forum": "https://www.linkedin.com/groups/6725785/", - "StackOverflow": "https://stackoverflow.com/search?q=pytextrank", - "Citations": "https://scholar.google.com/scholar?q=related:5tl6J4xZlCIJ:scholar.google.com/&scioq=&hl=en&as_sdt=0,5", - }, - - zip_safe=False, - ) +from setuptools import setup +setup()