From 205c543ab06f0a77ac1154ff9aec068f53a696fc Mon Sep 17 00:00:00 2001 From: Karan Goel Date: Thu, 2 Jul 2020 16:49:37 -0700 Subject: [PATCH] Initial commit --- .coveragerc | 13 + .flake8 | 11 + .github/workflows/ci.yml | 124 + .gitignore | 147 + .isort.cfg | 7 + .pre-commit-config.yaml | 14 + .readthedocs.yml | 22 + LICENSE.md | 201 + Makefile | 29 + README.md | 134 + docs/Makefile | 20 + docs/make.bat | 35 + docs/requirements.txt | 5 + docs/source/apidocs/modules.rst | 7 + .../apidocs/robustnessgym.cachedops.allen.rst | 45 + .../apidocs/robustnessgym.cachedops.rst | 77 + docs/source/apidocs/robustnessgym.core.rst | 125 + docs/source/apidocs/robustnessgym.rst | 21 + .../robustnessgym.slicebuilders.attacks.rst | 29 + .../apidocs/robustnessgym.slicebuilders.rst | 63 + ...stnessgym.slicebuilders.subpopulations.rst | 93 + ...tnessgym.slicebuilders.transformations.rst | 53 + docs/source/apidocs/robustnessgym.tasks.rst | 29 + docs/source/conf.py | 78 + docs/source/getting-started/install.md | 58 + docs/source/getting-started/main.md | 0 docs/source/getting-started/nutshell.md | 203 + docs/source/getting-started/quickstart.rst | 257 + docs/source/index.rst | 46 + poetry.lock | 4791 +++++++++++++++++ pyproject.toml | 74 + robustnessgym/__init__.py | 202 + robustnessgym/cachedops/__init__.py | 16 + robustnessgym/cachedops/allen/__init__.py | 0 .../cachedops/allen/allen_predictor.py | 35 + .../cachedops/allen/constituency_parser.py | 17 + .../cachedops/allen/dependency_parser.py | 10 + .../cachedops/allen/semantic_role_labeler.py | 10 + robustnessgym/cachedops/bootleg.py | 122 + robustnessgym/cachedops/similarity.py | 128 + robustnessgym/cachedops/spacy.py | 215 + robustnessgym/cachedops/stanza.py | 102 + robustnessgym/cachedops/strip_text.py | 21 + robustnessgym/cachedops/textblob.py | 45 + robustnessgym/core/__init__.py | 0 robustnessgym/core/cachedops.py | 587 ++ robustnessgym/core/constants.py | 9 + robustnessgym/core/dataset.py | 617 +++ robustnessgym/core/dataset_to_task.py | 9 + robustnessgym/core/decorators.py | 78 + robustnessgym/core/identifier.py | 69 + robustnessgym/core/metrics.py | 99 + robustnessgym/core/model.py | 369 ++ robustnessgym/core/operation.py | 264 + robustnessgym/core/report.py | 486 ++ robustnessgym/core/slice.py | 64 + robustnessgym/core/storage.py | 15 + robustnessgym/core/testbench.py | 611 +++ robustnessgym/core/tools.py | 190 + robustnessgym/core/version.py | 86 + robustnessgym/slicebuilders/__init__.py | 4 + robustnessgym/slicebuilders/attack.py | 21 + .../slicebuilders/attacks/__init__.py | 2 + .../slicebuilders/attacks/morpheus.py | 118 + .../slicebuilders/attacks/textattack.py | 93 + robustnessgym/slicebuilders/curator.py | 23 + robustnessgym/slicebuilders/slicebuilder.py | 694 +++ robustnessgym/slicebuilders/subpopulation.py | 369 ++ .../slicebuilders/subpopulations/__init__.py | 0 .../subpopulations/constituency_overlap.py | 136 + .../subpopulations/entity_frequency.py | 92 + .../slicebuilders/subpopulations/hans.py | 494 ++ .../slicebuilders/subpopulations/length.py | 62 + .../subpopulations/lexical_overlap.py | 39 + .../slicebuilders/subpopulations/phrase.py | 426 ++ .../slicebuilders/subpopulations/position.py | 61 + .../slicebuilders/subpopulations/score.py | 284 + .../subpopulations/similarity.py | 243 + .../slicebuilders/subpopulations/wordlists.py | 74 + robustnessgym/slicebuilders/transformation.py | 204 + .../slicebuilders/transformations/__init__.py | 3 + .../slicebuilders/transformations/_eda.py | 349 ++ .../slicebuilders/transformations/eda.py | 87 + .../slicebuilders/transformations/fairseq.py | 160 + .../slicebuilders/transformations/gpt3.py | 25 + .../slicebuilders/transformations/nlpaug.py | 69 + .../transformations/similarity.py | 65 + robustnessgym/tasks/__init__.py | 0 robustnessgym/tasks/schema.py | 69 + robustnessgym/tasks/task.py | 376 ++ tests/__init__.py | 0 tests/cached_ops/__init__.py | 0 tests/cached_ops/test_spacy.py | 53 + tests/cached_ops/test_stanza.py | 35 + tests/core/__init__.py | 0 tests/core/test_cachedops.py | 183 + tests/core/test_dataset.py | 171 + tests/core/test_decorators.py | 15 + tests/core/test_identifier.py | 75 + tests/core/test_report.py | 314 ++ tests/core/test_slice.py | 22 + tests/core/test_testbench.py | 188 + tests/core/test_tools.py | 31 + tests/slicebuilders/__init__.py | 0 .../slicebuilders/subpopulations/__init__.py | 0 .../test_constituency_overlap.py | 52 + .../subpopulations/test_length.py | 27 + .../slicebuilders/transformations/__init__.py | 0 .../slicebuilders/transformations/test_eda.py | 60 + .../transformations/test_fairseq.py | 45 + .../transformations/test_nlpaug.py | 65 + tests/testbeds.py | 87 + 112 files changed, 17352 insertions(+) create mode 100644 .coveragerc create mode 100644 .flake8 create mode 100644 .github/workflows/ci.yml create mode 100644 .gitignore create mode 100644 .isort.cfg create mode 100644 .pre-commit-config.yaml create mode 100644 .readthedocs.yml create mode 100644 LICENSE.md create mode 100644 Makefile create mode 100644 README.md create mode 100644 docs/Makefile create mode 100644 docs/make.bat create mode 100644 docs/requirements.txt create mode 100644 docs/source/apidocs/modules.rst create mode 100644 docs/source/apidocs/robustnessgym.cachedops.allen.rst create mode 100644 docs/source/apidocs/robustnessgym.cachedops.rst create mode 100644 docs/source/apidocs/robustnessgym.core.rst create mode 100644 docs/source/apidocs/robustnessgym.rst create mode 100644 docs/source/apidocs/robustnessgym.slicebuilders.attacks.rst create mode 100644 docs/source/apidocs/robustnessgym.slicebuilders.rst create mode 100644 docs/source/apidocs/robustnessgym.slicebuilders.subpopulations.rst create mode 100644 docs/source/apidocs/robustnessgym.slicebuilders.transformations.rst create mode 100644 docs/source/apidocs/robustnessgym.tasks.rst create mode 100644 docs/source/conf.py create mode 100644 docs/source/getting-started/install.md create mode 100644 docs/source/getting-started/main.md create mode 100644 docs/source/getting-started/nutshell.md create mode 100644 docs/source/getting-started/quickstart.rst create mode 100644 docs/source/index.rst create mode 100644 poetry.lock create mode 100644 pyproject.toml create mode 100644 robustnessgym/__init__.py create mode 100644 robustnessgym/cachedops/__init__.py create mode 100644 robustnessgym/cachedops/allen/__init__.py create mode 100644 robustnessgym/cachedops/allen/allen_predictor.py create mode 100644 robustnessgym/cachedops/allen/constituency_parser.py create mode 100644 robustnessgym/cachedops/allen/dependency_parser.py create mode 100644 robustnessgym/cachedops/allen/semantic_role_labeler.py create mode 100644 robustnessgym/cachedops/bootleg.py create mode 100644 robustnessgym/cachedops/similarity.py create mode 100644 robustnessgym/cachedops/spacy.py create mode 100644 robustnessgym/cachedops/stanza.py create mode 100644 robustnessgym/cachedops/strip_text.py create mode 100644 robustnessgym/cachedops/textblob.py create mode 100644 robustnessgym/core/__init__.py create mode 100644 robustnessgym/core/cachedops.py create mode 100644 robustnessgym/core/constants.py create mode 100644 robustnessgym/core/dataset.py create mode 100644 robustnessgym/core/dataset_to_task.py create mode 100644 robustnessgym/core/decorators.py create mode 100644 robustnessgym/core/identifier.py create mode 100644 robustnessgym/core/metrics.py create mode 100644 robustnessgym/core/model.py create mode 100644 robustnessgym/core/operation.py create mode 100644 robustnessgym/core/report.py create mode 100644 robustnessgym/core/slice.py create mode 100644 robustnessgym/core/storage.py create mode 100644 robustnessgym/core/testbench.py create mode 100644 robustnessgym/core/tools.py create mode 100644 robustnessgym/core/version.py create mode 100644 robustnessgym/slicebuilders/__init__.py create mode 100644 robustnessgym/slicebuilders/attack.py create mode 100644 robustnessgym/slicebuilders/attacks/__init__.py create mode 100644 robustnessgym/slicebuilders/attacks/morpheus.py create mode 100644 robustnessgym/slicebuilders/attacks/textattack.py create mode 100644 robustnessgym/slicebuilders/curator.py create mode 100644 robustnessgym/slicebuilders/slicebuilder.py create mode 100644 robustnessgym/slicebuilders/subpopulation.py create mode 100644 robustnessgym/slicebuilders/subpopulations/__init__.py create mode 100644 robustnessgym/slicebuilders/subpopulations/constituency_overlap.py create mode 100644 robustnessgym/slicebuilders/subpopulations/entity_frequency.py create mode 100644 robustnessgym/slicebuilders/subpopulations/hans.py create mode 100644 robustnessgym/slicebuilders/subpopulations/length.py create mode 100644 robustnessgym/slicebuilders/subpopulations/lexical_overlap.py create mode 100644 robustnessgym/slicebuilders/subpopulations/phrase.py create mode 100644 robustnessgym/slicebuilders/subpopulations/position.py create mode 100644 robustnessgym/slicebuilders/subpopulations/score.py create mode 100644 robustnessgym/slicebuilders/subpopulations/similarity.py create mode 100644 robustnessgym/slicebuilders/subpopulations/wordlists.py create mode 100644 robustnessgym/slicebuilders/transformation.py create mode 100644 robustnessgym/slicebuilders/transformations/__init__.py create mode 100644 robustnessgym/slicebuilders/transformations/_eda.py create mode 100644 robustnessgym/slicebuilders/transformations/eda.py create mode 100644 robustnessgym/slicebuilders/transformations/fairseq.py create mode 100644 robustnessgym/slicebuilders/transformations/gpt3.py create mode 100644 robustnessgym/slicebuilders/transformations/nlpaug.py create mode 100644 robustnessgym/slicebuilders/transformations/similarity.py create mode 100644 robustnessgym/tasks/__init__.py create mode 100644 robustnessgym/tasks/schema.py create mode 100644 robustnessgym/tasks/task.py create mode 100644 tests/__init__.py create mode 100644 tests/cached_ops/__init__.py create mode 100644 tests/cached_ops/test_spacy.py create mode 100644 tests/cached_ops/test_stanza.py create mode 100644 tests/core/__init__.py create mode 100644 tests/core/test_cachedops.py create mode 100644 tests/core/test_dataset.py create mode 100644 tests/core/test_decorators.py create mode 100644 tests/core/test_identifier.py create mode 100644 tests/core/test_report.py create mode 100644 tests/core/test_slice.py create mode 100644 tests/core/test_testbench.py create mode 100644 tests/core/test_tools.py create mode 100644 tests/slicebuilders/__init__.py create mode 100644 tests/slicebuilders/subpopulations/__init__.py create mode 100644 tests/slicebuilders/subpopulations/test_constituency_overlap.py create mode 100644 tests/slicebuilders/subpopulations/test_length.py create mode 100644 tests/slicebuilders/transformations/__init__.py create mode 100644 tests/slicebuilders/transformations/test_eda.py create mode 100644 tests/slicebuilders/transformations/test_fairseq.py create mode 100644 tests/slicebuilders/transformations/test_nlpaug.py create mode 100644 tests/testbeds.py diff --git a/.coveragerc b/.coveragerc new file mode 100644 index 00000000..f6412e66 --- /dev/null +++ b/.coveragerc @@ -0,0 +1,13 @@ +[run] +branch = True +source = robustnessgym + +[report] +exclude_lines = + if self.debug: + pragma: no cover + raise NotImplementedError + if __name__ == .__main__.: +ignore_errors = True +omit = + tests/* \ No newline at end of file diff --git a/.flake8 b/.flake8 new file mode 100644 index 00000000..d23a2221 --- /dev/null +++ b/.flake8 @@ -0,0 +1,11 @@ +# This is our code-style check. We currently allow the following exceptions: +# - E731: do not assign a lambda expression, use a def +# - W503: line break before binary operator +# - E741: do not use variables named 'l', 'O', or 'I' +# - E203: whitespace before ':' +[flake8] +count = True +max-line-length = 88 +statistics = True +ignore = E731,W503,E741,E203 +exclude = setup.py \ No newline at end of file diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml new file mode 100644 index 00000000..51995188 --- /dev/null +++ b/.github/workflows/ci.yml @@ -0,0 +1,124 @@ +name: CI + +on: + push: + branches: [ master ] + pull_request: + branches: [ master ] + + # Allows you to run this workflow manually from the Actions tab + workflow_dispatch: + +jobs: + + Linting: + runs-on: ubuntu-latest + strategy: + matrix: + python-version: ['3.8'] + + steps: + - uses: actions/checkout@v2 + - name: Set up Python ${{ matrix.python-version }} + uses: actions/setup-python@v2 + with: + python-version: ${{ matrix.python-version }} + + - uses: actions/cache@v2 + with: + path: ~/.cache/pip + key: ${{ runner.os }}-pip + + - name: Install Dependencies + run: | + python -m pip install --upgrade pip + make dev + + - name: Lint with isort, black, docformatter, flake8 + run: | + make autoformat + make lint + + Documentation: + needs: Linting + runs-on: ubuntu-latest + strategy: + matrix: + python-version: ['3.8'] + + steps: + - uses: actions/checkout@v2 + - name: Set up Python ${{ matrix.python-version }} + uses: actions/setup-python@v2 + with: + python-version: ${{ matrix.python-version }} + + - uses: actions/cache@v2 + with: + path: ~/.cache/pip + key: ${{ runner.os }}-pip + + - name: Install Dependencies + run: | + python -m pip install --upgrade pip + make dev + + - name: Generate Docs + run: | + make docs + + Build: + needs: Documentation + runs-on: ${{ matrix.os }} + strategy: + matrix: + os: [ubuntu-latest] #, macos-latest] + python-version: ['3.8'] + + steps: + - uses: actions/checkout@v2 + + - name: Set up Python ${{ matrix.python-version }} + uses: actions/setup-python@v2 + with: + python-version: ${{ matrix.python-version }} + + - name: Install Poetry + uses: snok/install-poetry@v1.1.1 + with: + version: 1.1.4 + virtualenvs-create: true + virtualenvs-in-project: true + + - name: Set up Poetry cache for Python dependencies + id: cached-poetry-dependencies + uses: actions/cache@v2 + with: + path: .venv + key: poetry-${{ runner.os }}-${{ hashFiles('**/poetry.lock') }} + + - name: Install Dependencies + if: steps.cached-poetry-dependencies.outputs.cache-hit != 'true' + run: | + poetry install --no-interaction + + - name: Test with pytest + run: | + source .venv/bin/activate + make test-cov + + - name: Upload to codecov.io + uses: codecov/codecov-action@v1 + with: + file: ./coverage.xml + flags: unittests + name: codecov-umbrella + fail_ci_if_error: true + + - name: Publish distribution 📦 to PyPI + if: startsWith(github.event.ref, 'refs/tags') + env: + PYPI_TOKEN: ${{ secrets.PYPI_TOKEN }} + run: | + poetry config pypi-token.pypi $PYPI_TOKEN + poetry publish --build diff --git a/.gitignore b/.gitignore new file mode 100644 index 00000000..2118d244 --- /dev/null +++ b/.gitignore @@ -0,0 +1,147 @@ +# Jupyter notebooks +notebooks/ + +# Note files +notes.md + +# Mac +.DS_STORE + +# VSCode +.vscode + +# Apache Arrow +*.arrow + +# Pycharm +.idea + +# Byte-compiled / optimized / DLL files +__pycache__/ +*.py[cod] +*$py.class + +# C extensions +*.so + +# Distribution / packaging +.Python +build/ +develop-eggs/ +dist/ +downloads/ +eggs/ +.eggs/ +lib/ +lib64/ +parts/ +sdist/ +var/ +wheels/ +pip-wheel-metadata/ +share/python-wheels/ +*.egg-info/ +.installed.cfg +*.egg +MANIFEST + +# PyInstaller +# Usually these files are written by a python script from a template +# before PyInstaller builds the exe, so as to inject date/other infos into it. +*.manifest +*.spec + +# Installer logs +pip-log.txt +pip-delete-this-directory.txt + +# Unit test / coverage reports +htmlcov/ +.tox/ +.nox/ +.coverage +.coverage.* +.cache +nosetests.xml +coverage.xml +*.cover +*.py,cover +.hypothesis/ +.pytest_cache/ + +# Translations +*.mo +*.pot + +# Django stuff: +*.log +local_settings.py +db.sqlite3 +db.sqlite3-journal + +# Flask stuff: +instance/ +.webassets-cache + +# Scrapy stuff: +.scrapy + +# Sphinx documentation +docs/_build/ + +# PyBuilder +target/ + +# Jupyter Notebook +.ipynb_checkpoints + +# IPython +profile_default/ +ipython_config.py + +# pyenv +.python-version + +# pipenv +# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. +# However, in case of collaboration, if having platform-specific dependencies or dependencies +# having no cross-platform support, pipenv may install dependencies that don't work, or not +# install all needed dependencies. +#Pipfile.lock + +# PEP 582; used by e.g. github.com/David-OConnor/pyflow +__pypackages__/ + +# Celery stuff +celerybeat-schedule +celerybeat.pid + +# SageMath parsed files +*.sage.py + +# Environments +.env +.venv +env/ +venv/ +ENV/ +env.bak/ +venv.bak/ + +# Spyder project settings +.spyderproject +.spyproject + +# Rope project settings +.ropeproject + +# mkdocs documentation +/site + +# mypy +.mypy_cache/ +.dmypy.json +dmypy.json + +# Pyre type checker +.pyre/ diff --git a/.isort.cfg b/.isort.cfg new file mode 100644 index 00000000..8584edd7 --- /dev/null +++ b/.isort.cfg @@ -0,0 +1,7 @@ +[settings] +multi_line_output = 3 +include_trailing_comma = True +force_grid_wrap = 0 +use_parentheses = True +ensure_newline_before_comments = True +line_length = 88 \ No newline at end of file diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml new file mode 100644 index 00000000..45bbe91b --- /dev/null +++ b/.pre-commit-config.yaml @@ -0,0 +1,14 @@ +repos: +- repo: https://github.com/timothycrosley/isort + rev: 5.7.0 + hooks: + - id: isort +- repo: https://github.com/psf/black + rev: 20.8b1 + hooks: + - id: black + language_version: python3 +- repo: https://gitlab.com/pycqa/flake8 + rev: 3.8.4 + hooks: + - id: flake8 \ No newline at end of file diff --git a/.readthedocs.yml b/.readthedocs.yml new file mode 100644 index 00000000..78190f29 --- /dev/null +++ b/.readthedocs.yml @@ -0,0 +1,22 @@ +# .readthedocs.yml +# Read the Docs configuration file +# See https://docs.readthedocs.io/en/stable/config-file/v2.html for details + +# Required +version: 2 + +# Build documentation in the docs/ directory with Sphinx +sphinx: + configuration: docs/source/conf.py + +# Optionally build your docs in additional formats such as PDF and ePub +formats: all + +# Optionally set the version of Python and requirements required to build your docs +python: + version: 3.8 + install: + - requirements: docs/requirements.txt + - method: pip + path: . + system_packages: true \ No newline at end of file diff --git a/LICENSE.md b/LICENSE.md new file mode 100644 index 00000000..27004ea6 --- /dev/null +++ b/LICENSE.md @@ -0,0 +1,201 @@ + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright 2020 Stanford Hazy Research, Salesforce Research + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. \ No newline at end of file diff --git a/Makefile b/Makefile new file mode 100644 index 00000000..c779ffe4 --- /dev/null +++ b/Makefile @@ -0,0 +1,29 @@ +autoformat: + black robustnessgym/ tests/ + isort --atomic robustnessgym/ tests/ + docformatter --in-place --recursive robustnessgym tests + +lint: + isort -c robustnessgym/ tests/ + black robustnessgym/ tests/ --check + flake8 robustnessgym/ tests/ + +test: + pytest + +test-cov: + pytest --cov=./ --cov-report=xml + +docs: + sphinx-build -b html docs/source/ docs/build/html/ + +docs-check: + sphinx-build -b html docs/source/ docs/build/html/ -W + +livedocs: + sphinx-autobuild -b html docs/source/ docs/build/html/ + +dev: + pip install black isort flake8 docformatter pytest-cov sphinx-rtd-theme nbsphinx recommonmark pre-commit + +all: autoformat lint docs test diff --git a/README.md b/README.md new file mode 100644 index 00000000..c580e400 --- /dev/null +++ b/README.md @@ -0,0 +1,134 @@ +Robustness Gym +================================ +![GitHub Workflow Status](https://img.shields.io/github/workflow/status/robustness-gym/robustness-gym/CI) +![GitHub](https://img.shields.io/github/license/robustness-gym/robustness-gym) +[![codecov](https://codecov.io/gh/robustness-gym/robustness-gym/branch/master/graph/badge.svg?token=MOLQYUSYQU)](https://codecov.io/gh/robustness-gym/robustness-gym) +[![Documentation Status](https://readthedocs.org/projects/robustnessgym/badge/?version=latest)](https://robustnessgym.readthedocs.io/en/latest/?badge=latest) +[![pre-commit](https://img.shields.io/badge/pre--commit-enabled-brightgreen?logo=pre-commit&logoColor=white)](https://github.com/pre-commit/pre-commit) +[![website](https://img.shields.io/badge/website-live-brightgreen)](https://robustnessgym.com) + +Robustness Gym is a evaluation toolkit for natural language processing in Python. + +## + +### Installation +``` +pip install robustnessgym +``` + +### Robustness Gym in 5 minutes + +#### Datasets that extend Huggingface `datasets` +```python +# robustnessgym.Dataset wraps datasets.Dataset +from robustnessgym import Dataset + +# Use Dataset.load_dataset(..) exactly like datasets.load_dataset(..) +dataset = Dataset.load_dataset('boolq') +dataset = Dataset.load_dataset('boolq', split='train[:10]') +``` + +#### Cache information +```python +# Get a dataset +from robustnessgym import Dataset +dataset = Dataset.load_dataset('boolq') + +# Run the Spacy pipeline +from robustnessgym import Spacy +spacy = Spacy() +# .. on the 'question' column of the dataset +dataset = spacy(batch_or_dataset=dataset, + columns=['question']) + + +# Run the Stanza pipeline +from robustnessgym import Stanza +stanza = Stanza() +# .. on both the question and passage columns of a batch +dataset = stanza(batch_or_dataset=dataset[:32], + columns=['question', 'passage']) + +# .. use any of the other built-in operations in Robustness Gym! + + +# Or, create your own CachedOperation +from robustnessgym import CachedOperation, Identifier +from robustnessgym.core.decorators import singlecolumn + +# Write a silly function that operates on a single column of a batch +@singlecolumn +def silly_fn(batch, columns): + """ + Capitalize text in the specified column of the batch. + """ + column_name = columns[0] + assert type(batch[column_name]) == str, "Must apply to text column." + return [text.capitalize() for text in batch[column_name]] + +# Wrap the silly function in a CachedOperation +silly_op = CachedOperation(apply_fn=silly_fn, + identifier=Identifier(_name='SillyOp')) + +# Apply it to a dataset +dataset = silly_op(batch_or_dataset=dataset, + columns=['question']) +``` + + +#### Retrieve cached information +```python +from robustnessgym import Spacy, Stanza, CachedOperation + +# Take a batch of data +batch = dataset[:32] + +# Retrieve the (cached) results of the Spacy CachedOperation +spacy_information = Spacy.retrieve(batch, columns=['question']) + +# Retrieve the tokens returned by the Spacy CachedOperation +tokens = Spacy.retrieve(batch, columns=['question'], proc_fns=Spacy.tokens) + +# Retrieve the entities found by the Stanza CachedOperation +entities = Stanza.retrieve(batch, columns=['passage'], proc_fns=Stanza.entities) + +# Retrieve the capitalized output of the silly_op +capitalizations = CachedOperation.retrieve(batch, + columns=['question'], + identifier=silly_op.identifier) + +# Retrieve it directly using the silly_op +capitalizations = silly_op.retrieve(batch, columns=['question']) + +# Retrieve the capitalized output and lower-case it during retrieval +capitalizations = silly_op.retrieve( + batch, + columns=['question'], + proc_fns=lambda decoded_batch: [x.lower() for x in decoded_batch] +) +``` + +#### Create subpopulations +```python +from robustnessgym import Spacy, ScoreSubpopulation +from robustnessgym.core.decorators import singlecolumn + +@singlecolumn +def length(batch, columns): + """ + Length using cached Spacy tokenization. + """ + column_name = columns[0] + # Take advantage of previously cached Spacy informations + tokens = Spacy.retrieve(batch, columns, proc_fns=Spacy.tokens)[column_name] + return [len(tokens_) for tokens_ in tokens] + +# Create a subpopulation that buckets examples based on length +length_subpopulation = ScoreSubpopulation(intervals=[(0, 10), (10, 20)], + score_fn=length) + +dataset, slices, membership = length_subpopulation(dataset, columns=['question']) +# dataset is updated with slice information +# slices is a list of 2 Slice objects +# membership is a matrix of shape (n x 2) +``` diff --git a/docs/Makefile b/docs/Makefile new file mode 100644 index 00000000..d0c3cbf1 --- /dev/null +++ b/docs/Makefile @@ -0,0 +1,20 @@ +# Minimal makefile for Sphinx documentation +# + +# You can set these variables from the command line, and also +# from the environment for the first two. +SPHINXOPTS ?= +SPHINXBUILD ?= sphinx-build +SOURCEDIR = source +BUILDDIR = build + +# Put it first so that "make" without argument is like "make help". +help: + @$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) + +.PHONY: help Makefile + +# Catch-all target: route all unknown targets to Sphinx using the new +# "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS). +%: Makefile + @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) diff --git a/docs/make.bat b/docs/make.bat new file mode 100644 index 00000000..6247f7e2 --- /dev/null +++ b/docs/make.bat @@ -0,0 +1,35 @@ +@ECHO OFF + +pushd %~dp0 + +REM Command file for Sphinx documentation + +if "%SPHINXBUILD%" == "" ( + set SPHINXBUILD=sphinx-build +) +set SOURCEDIR=source +set BUILDDIR=build + +if "%1" == "" goto help + +%SPHINXBUILD% >NUL 2>NUL +if errorlevel 9009 ( + echo. + echo.The 'sphinx-build' command was not found. Make sure you have Sphinx + echo.installed, then set the SPHINXBUILD environment variable to point + echo.to the full path of the 'sphinx-build' executable. Alternatively you + echo.may add the Sphinx directory to PATH. + echo. + echo.If you don't have Sphinx installed, grab it from + echo.http://sphinx-doc.org/ + exit /b 1 +) + +%SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O% +goto end + +:help +%SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O% + +:end +popd diff --git a/docs/requirements.txt b/docs/requirements.txt new file mode 100644 index 00000000..87a81a7e --- /dev/null +++ b/docs/requirements.txt @@ -0,0 +1,5 @@ +sphinx-rtd-theme +sphinx_autodoc_typehints +nbsphinx +recommonmark +toml \ No newline at end of file diff --git a/docs/source/apidocs/modules.rst b/docs/source/apidocs/modules.rst new file mode 100644 index 00000000..22a89931 --- /dev/null +++ b/docs/source/apidocs/modules.rst @@ -0,0 +1,7 @@ +robustnessgym +============= + +.. toctree:: + :maxdepth: 4 + + robustnessgym diff --git a/docs/source/apidocs/robustnessgym.cachedops.allen.rst b/docs/source/apidocs/robustnessgym.cachedops.allen.rst new file mode 100644 index 00000000..789ec04e --- /dev/null +++ b/docs/source/apidocs/robustnessgym.cachedops.allen.rst @@ -0,0 +1,45 @@ +robustnessgym.cachedops.allen package +===================================== + +Submodules +---------- + +robustnessgym.cachedops.allen.allen\_predictor module +----------------------------------------------------- + +.. automodule:: robustnessgym.cachedops.allen.allen_predictor + :members: + :undoc-members: + :show-inheritance: + +robustnessgym.cachedops.allen.constituency\_parser module +--------------------------------------------------------- + +.. automodule:: robustnessgym.cachedops.allen.constituency_parser + :members: + :undoc-members: + :show-inheritance: + +robustnessgym.cachedops.allen.dependency\_parser module +------------------------------------------------------- + +.. automodule:: robustnessgym.cachedops.allen.dependency_parser + :members: + :undoc-members: + :show-inheritance: + +robustnessgym.cachedops.allen.semantic\_role\_labeler module +------------------------------------------------------------ + +.. automodule:: robustnessgym.cachedops.allen.semantic_role_labeler + :members: + :undoc-members: + :show-inheritance: + +Module contents +--------------- + +.. automodule:: robustnessgym.cachedops.allen + :members: + :undoc-members: + :show-inheritance: diff --git a/docs/source/apidocs/robustnessgym.cachedops.rst b/docs/source/apidocs/robustnessgym.cachedops.rst new file mode 100644 index 00000000..2caea856 --- /dev/null +++ b/docs/source/apidocs/robustnessgym.cachedops.rst @@ -0,0 +1,77 @@ +robustnessgym.cachedops package +=============================== + +Subpackages +----------- + +.. toctree:: + :maxdepth: 4 + + robustnessgym.cachedops.allen + +Submodules +---------- + +robustnessgym.cachedops.bootleg module +-------------------------------------- + +.. automodule:: robustnessgym.cachedops.bootleg + :members: + :undoc-members: + :show-inheritance: + +robustnessgym.cachedops.flair module +------------------------------------ + +.. automodule:: robustnessgym.cachedops.flair + :members: + :undoc-members: + :show-inheritance: + +robustnessgym.cachedops.similarity module +----------------------------------------- + +.. automodule:: robustnessgym.cachedops.similarity + :members: + :undoc-members: + :show-inheritance: + +robustnessgym.cachedops.spacy module +------------------------------------ + +.. automodule:: robustnessgym.cachedops.spacy + :members: + :undoc-members: + :show-inheritance: + +robustnessgym.cachedops.stanza module +------------------------------------- + +.. automodule:: robustnessgym.cachedops.stanza + :members: + :undoc-members: + :show-inheritance: + +robustnessgym.cachedops.strip\_text module +------------------------------------------ + +.. automodule:: robustnessgym.cachedops.strip_text + :members: + :undoc-members: + :show-inheritance: + +robustnessgym.cachedops.textblob module +--------------------------------------- + +.. automodule:: robustnessgym.cachedops.textblob + :members: + :undoc-members: + :show-inheritance: + +Module contents +--------------- + +.. automodule:: robustnessgym.cachedops + :members: + :undoc-members: + :show-inheritance: diff --git a/docs/source/apidocs/robustnessgym.core.rst b/docs/source/apidocs/robustnessgym.core.rst new file mode 100644 index 00000000..00d74a7f --- /dev/null +++ b/docs/source/apidocs/robustnessgym.core.rst @@ -0,0 +1,125 @@ +robustnessgym.core package +========================== + +Submodules +---------- + +robustnessgym.core.cachedops module +----------------------------------- + +.. automodule:: robustnessgym.core.cachedops + :members: + :undoc-members: + :show-inheritance: + +robustnessgym.core.constants module +----------------------------------- + +.. automodule:: robustnessgym.core.constants + :members: + :undoc-members: + :show-inheritance: + +robustnessgym.core.dataset module +--------------------------------- + +.. automodule:: robustnessgym.core.dataset + :members: + :undoc-members: + :show-inheritance: + +robustnessgym.core.dataset\_to\_task module +------------------------------------------- + +.. automodule:: robustnessgym.core.dataset_to_task + :members: + :undoc-members: + :show-inheritance: + +robustnessgym.core.decorators module +------------------------------------ + +.. automodule:: robustnessgym.core.decorators + :members: + :undoc-members: + :show-inheritance: + +robustnessgym.core.identifier module +------------------------------------ + +.. automodule:: robustnessgym.core.identifier + :members: + :undoc-members: + :show-inheritance: + +robustnessgym.core.model module +------------------------------- + +.. automodule:: robustnessgym.core.model + :members: + :undoc-members: + :show-inheritance: + +robustnessgym.core.operation module +----------------------------------- + +.. automodule:: robustnessgym.core.operation + :members: + :undoc-members: + :show-inheritance: + +robustnessgym.core.report module +-------------------------------- + +.. automodule:: robustnessgym.core.report + :members: + :undoc-members: + :show-inheritance: + +robustnessgym.core.slice module +------------------------------- + +.. automodule:: robustnessgym.core.slice + :members: + :undoc-members: + :show-inheritance: + +robustnessgym.core.storage module +--------------------------------- + +.. automodule:: robustnessgym.core.storage + :members: + :undoc-members: + :show-inheritance: + +robustnessgym.core.testbench module +----------------------------------- + +.. automodule:: robustnessgym.core.testbench + :members: + :undoc-members: + :show-inheritance: + +robustnessgym.core.tools module +------------------------------- + +.. automodule:: robustnessgym.core.tools + :members: + :undoc-members: + :show-inheritance: + +robustnessgym.core.version module +--------------------------------- + +.. automodule:: robustnessgym.core.version + :members: + :undoc-members: + :show-inheritance: + +Module contents +--------------- + +.. automodule:: robustnessgym.core + :members: + :undoc-members: + :show-inheritance: diff --git a/docs/source/apidocs/robustnessgym.rst b/docs/source/apidocs/robustnessgym.rst new file mode 100644 index 00000000..faab5b08 --- /dev/null +++ b/docs/source/apidocs/robustnessgym.rst @@ -0,0 +1,21 @@ +robustnessgym package +===================== + +Subpackages +----------- + +.. toctree:: + :maxdepth: 4 + + robustnessgym.cachedops + robustnessgym.core + robustnessgym.slicebuilders + robustnessgym.tasks + +Module contents +--------------- + +.. automodule:: robustnessgym + :members: + :undoc-members: + :show-inheritance: diff --git a/docs/source/apidocs/robustnessgym.slicebuilders.attacks.rst b/docs/source/apidocs/robustnessgym.slicebuilders.attacks.rst new file mode 100644 index 00000000..387874a9 --- /dev/null +++ b/docs/source/apidocs/robustnessgym.slicebuilders.attacks.rst @@ -0,0 +1,29 @@ +robustnessgym.slicebuilders.attacks package +=========================================== + +Submodules +---------- + +robustnessgym.slicebuilders.attacks.morpheus module +--------------------------------------------------- + +.. automodule:: robustnessgym.slicebuilders.attacks.morpheus + :members: + :undoc-members: + :show-inheritance: + +robustnessgym.slicebuilders.attacks.textattack module +----------------------------------------------------- + +.. automodule:: robustnessgym.slicebuilders.attacks.textattack + :members: + :undoc-members: + :show-inheritance: + +Module contents +--------------- + +.. automodule:: robustnessgym.slicebuilders.attacks + :members: + :undoc-members: + :show-inheritance: diff --git a/docs/source/apidocs/robustnessgym.slicebuilders.rst b/docs/source/apidocs/robustnessgym.slicebuilders.rst new file mode 100644 index 00000000..e9f89578 --- /dev/null +++ b/docs/source/apidocs/robustnessgym.slicebuilders.rst @@ -0,0 +1,63 @@ +robustnessgym.slicebuilders package +=================================== + +Subpackages +----------- + +.. toctree:: + :maxdepth: 4 + + robustnessgym.slicebuilders.attacks + robustnessgym.slicebuilders.subpopulations + robustnessgym.slicebuilders.transformations + +Submodules +---------- + +robustnessgym.slicebuilders.attack module +----------------------------------------- + +.. automodule:: robustnessgym.slicebuilders.attack + :members: + :undoc-members: + :show-inheritance: + +robustnessgym.slicebuilders.curator module +------------------------------------------ + +.. automodule:: robustnessgym.slicebuilders.curator + :members: + :undoc-members: + :show-inheritance: + +robustnessgym.slicebuilders.slicebuilder module +----------------------------------------------- + +.. automodule:: robustnessgym.slicebuilders.slicebuilder + :members: + :undoc-members: + :show-inheritance: + +robustnessgym.slicebuilders.subpopulation module +------------------------------------------------ + +.. automodule:: robustnessgym.slicebuilders.subpopulation + :members: + :undoc-members: + :show-inheritance: + +robustnessgym.slicebuilders.transformation module +------------------------------------------------- + +.. automodule:: robustnessgym.slicebuilders.transformation + :members: + :undoc-members: + :show-inheritance: + +Module contents +--------------- + +.. automodule:: robustnessgym.slicebuilders + :members: + :undoc-members: + :show-inheritance: diff --git a/docs/source/apidocs/robustnessgym.slicebuilders.subpopulations.rst b/docs/source/apidocs/robustnessgym.slicebuilders.subpopulations.rst new file mode 100644 index 00000000..30914806 --- /dev/null +++ b/docs/source/apidocs/robustnessgym.slicebuilders.subpopulations.rst @@ -0,0 +1,93 @@ +robustnessgym.slicebuilders.subpopulations package +================================================== + +Submodules +---------- + +robustnessgym.slicebuilders.subpopulations.constituency\_overlap module +----------------------------------------------------------------------- + +.. automodule:: robustnessgym.slicebuilders.subpopulations.constituency_overlap + :members: + :undoc-members: + :show-inheritance: + +robustnessgym.slicebuilders.subpopulations.entity\_frequency module +------------------------------------------------------------------- + +.. automodule:: robustnessgym.slicebuilders.subpopulations.entity_frequency + :members: + :undoc-members: + :show-inheritance: + +robustnessgym.slicebuilders.subpopulations.hans module +------------------------------------------------------ + +.. automodule:: robustnessgym.slicebuilders.subpopulations.hans + :members: + :undoc-members: + :show-inheritance: + +robustnessgym.slicebuilders.subpopulations.length module +-------------------------------------------------------- + +.. automodule:: robustnessgym.slicebuilders.subpopulations.length + :members: + :undoc-members: + :show-inheritance: + +robustnessgym.slicebuilders.subpopulations.lexical\_overlap module +------------------------------------------------------------------ + +.. automodule:: robustnessgym.slicebuilders.subpopulations.lexical_overlap + :members: + :undoc-members: + :show-inheritance: + +robustnessgym.slicebuilders.subpopulations.phrase module +-------------------------------------------------------- + +.. automodule:: robustnessgym.slicebuilders.subpopulations.phrase + :members: + :undoc-members: + :show-inheritance: + +robustnessgym.slicebuilders.subpopulations.position module +---------------------------------------------------------- + +.. automodule:: robustnessgym.slicebuilders.subpopulations.position + :members: + :undoc-members: + :show-inheritance: + +robustnessgym.slicebuilders.subpopulations.score module +------------------------------------------------------- + +.. automodule:: robustnessgym.slicebuilders.subpopulations.score + :members: + :undoc-members: + :show-inheritance: + +robustnessgym.slicebuilders.subpopulations.similarity module +------------------------------------------------------------ + +.. automodule:: robustnessgym.slicebuilders.subpopulations.similarity + :members: + :undoc-members: + :show-inheritance: + +robustnessgym.slicebuilders.subpopulations.wordlists module +----------------------------------------------------------- + +.. automodule:: robustnessgym.slicebuilders.subpopulations.wordlists + :members: + :undoc-members: + :show-inheritance: + +Module contents +--------------- + +.. automodule:: robustnessgym.slicebuilders.subpopulations + :members: + :undoc-members: + :show-inheritance: diff --git a/docs/source/apidocs/robustnessgym.slicebuilders.transformations.rst b/docs/source/apidocs/robustnessgym.slicebuilders.transformations.rst new file mode 100644 index 00000000..302ceb6a --- /dev/null +++ b/docs/source/apidocs/robustnessgym.slicebuilders.transformations.rst @@ -0,0 +1,53 @@ +robustnessgym.slicebuilders.transformations package +=================================================== + +Submodules +---------- + +robustnessgym.slicebuilders.transformations.eda module +------------------------------------------------------ + +.. automodule:: robustnessgym.slicebuilders.transformations.eda + :members: + :undoc-members: + :show-inheritance: + +robustnessgym.slicebuilders.transformations.fairseq module +---------------------------------------------------------- + +.. automodule:: robustnessgym.slicebuilders.transformations.fairseq + :members: + :undoc-members: + :show-inheritance: + +robustnessgym.slicebuilders.transformations.gpt3 module +------------------------------------------------------- + +.. automodule:: robustnessgym.slicebuilders.transformations.gpt3 + :members: + :undoc-members: + :show-inheritance: + +robustnessgym.slicebuilders.transformations.nlpaug module +--------------------------------------------------------- + +.. automodule:: robustnessgym.slicebuilders.transformations.nlpaug + :members: + :undoc-members: + :show-inheritance: + +robustnessgym.slicebuilders.transformations.similarity module +------------------------------------------------------------- + +.. automodule:: robustnessgym.slicebuilders.transformations.similarity + :members: + :undoc-members: + :show-inheritance: + +Module contents +--------------- + +.. automodule:: robustnessgym.slicebuilders.transformations + :members: + :undoc-members: + :show-inheritance: diff --git a/docs/source/apidocs/robustnessgym.tasks.rst b/docs/source/apidocs/robustnessgym.tasks.rst new file mode 100644 index 00000000..6dbc6052 --- /dev/null +++ b/docs/source/apidocs/robustnessgym.tasks.rst @@ -0,0 +1,29 @@ +robustnessgym.tasks package +=========================== + +Submodules +---------- + +robustnessgym.tasks.schema module +--------------------------------- + +.. automodule:: robustnessgym.tasks.schema + :members: + :undoc-members: + :show-inheritance: + +robustnessgym.tasks.task module +------------------------------- + +.. automodule:: robustnessgym.tasks.task + :members: + :undoc-members: + :show-inheritance: + +Module contents +--------------- + +.. automodule:: robustnessgym.tasks + :members: + :undoc-members: + :show-inheritance: diff --git a/docs/source/conf.py b/docs/source/conf.py new file mode 100644 index 00000000..65ce7337 --- /dev/null +++ b/docs/source/conf.py @@ -0,0 +1,78 @@ +# Configuration file for the Sphinx documentation builder. +# +# This file only contains a selection of the most common options. For a full +# list see the documentation: +# https://www.sphinx-doc.org/en/master/usage/configuration.html + +# -- Path setup -------------------------------------------------------------- + +# If extensions (or modules to document with autodoc) are in another directory, +# add these directories to sys.path here. If the directory is relative to the +# documentation root, use os.path.abspath to make it absolute, like shown here. +# +import os +import sys +from pathlib import Path + +import toml + +# https://github.com/python-poetry/poetry/issues/144#issuecomment-559793020 +metadata = toml.load(Path(__file__).parent.parent / "pyproject.toml")["tool"]["poetry"] + +sys.path.insert(0, os.path.abspath("")) +sys.path.insert(0, os.path.abspath("..")) +sys.path.insert(0, os.path.abspath("../..")) +sys.setrecursionlimit(1500) + +# -- Project information ----------------------------------------------------- + +project = "Robustness Gym" +copyright = "2020 Robustness Gym" +author = "Robustness Gym" + +# The full version, including alpha/beta/rc tags +# release = "0.0.0dev" +version = release = metadata["version"] + +# -- General configuration --------------------------------------------------- + +# Add any Sphinx extension module names here, as strings. They can be +# extensions coming with Sphinx (named 'sphinx.ext.*') or your custom +# ones. +extensions = [ + "sphinx.ext.autodoc", + "sphinx.ext.coverage", + "sphinx.ext.napoleon", + "sphinx.ext.viewcode", + "sphinx_rtd_theme", + "nbsphinx", + "recommonmark", +] + +# Add any paths that contain templates here, relative to this directory. +templates_path = ["_templates"] + +# List of patterns, relative to source directory, that match files and +# directories to ignore when looking for source files. +# This pattern also affects html_static_path and html_extra_path. +exclude_patterns = [] + +# -- Options for HTML output ------------------------------------------------- + +# The theme to use for HTML and HTML Help pages. See the documentation for +# a list of builtin themes. +# +# html_theme = 'alabaster' +html_theme = "sphinx_rtd_theme" +# html_theme = 'pytorch_sphinx_theme' +# html_theme_path = ["../../../pytorch_sphinx_theme"] +# Add any paths that contain custom static files (such as style sheets) here, +# relative to this directory. They are copied after the builtin static files, +# so a file named "default.css" will overwrite the builtin "default.css". +html_static_path = ["_static"] + +# Don't show module names in front of class names. +add_module_names = False + +# Sort members by group +autodoc_member_order = "groupwise" diff --git a/docs/source/getting-started/install.md b/docs/source/getting-started/install.md new file mode 100644 index 00000000..0a4df268 --- /dev/null +++ b/docs/source/getting-started/install.md @@ -0,0 +1,58 @@ +Installation +============================ + +This page describes how to get Robustness Gym installed and ready to use. Head to the + [tutorials]() to start using Robustness Gym after installation. + +Installing the Robustness Gym package +------------------ +The only things you need to install to get setup. +### Install with pip + +``` shell +pip install robustnessgym +``` + + +Optional Installation +-------------------------- +The steps below aren't necessary unless you need these features. + +#### Progress bars in Jupyter +Enable the following Jupyter extensions to display progress bars properly. +```shell +jupyter nbextension enable --py widgetsnbextension +jupyter labextension install @jupyter-widgets/jupyterlab-manager +``` + + +#### TextBlob setup +To use TextBlob, download and install the TextBlob corpora. +``` +python -m textblob.download_corpora +``` + + + +#### Installing Spacy GPU +To install Spacy with GPU support, use the installation steps given below. +``` shell +pip install cupy +pip install spacy[cuda] +python -m spacy download en_core_web_sm +``` + +#### Installing neuralcoref +The standard version of `neuralcoref` does not use GPUs for prediction and a [pull + request]((https://github.com/huggingface/neuralcoref/pull/149)) that is pending adds this +functionality. +Follow the steps below to use this. +``` +git clone https://github.com/dirkgr/neuralcoref.git@754d470d484f56c5715ef35c220c217f28079eef +cd neuralcoref +git checkout GpuFix +pip install -r requirements.txt +pip install -e . +``` + + diff --git a/docs/source/getting-started/main.md b/docs/source/getting-started/main.md new file mode 100644 index 00000000..e69de29b diff --git a/docs/source/getting-started/nutshell.md b/docs/source/getting-started/nutshell.md new file mode 100644 index 00000000..cd2be074 --- /dev/null +++ b/docs/source/getting-started/nutshell.md @@ -0,0 +1,203 @@ +Robustness Gym in a Nutshell +================================ + +What is Robustness Gym? Should you use it? Read this page to find some quick answers + to common questions. + + +The Big Picture +-------------------- + +Robustness Gym was built out of our own frustrations of being unable to systematically + evaluate and test our machine learning models. + + Traditionally, evaluation has consisted of a few simple steps: + 1. Load some data + 2. Generate predictions using a model + 3. Compute aggregate metrics + + This is no longer sufficient: models are increasingly being deployed in real-world use + cases, and aggregate performance is too coarse to make meaningful model assessments. + Modern evaluation is about understanding if models are _robust_ to all the + scenarios they might encounter, and where the tradeoffs lie. + + This is reflected in Robustness Gym which distills these modern goals + into a new workflow, + 1. Load some data + 2. Compute and cache side-information on data + 3. Build slices of data + 4. Evaluate across the slices + 5. Report and share findings + 6. Iterate + +We'll go into what these steps mean and how to use them in Robustness Gym next. + +The Robustness Gym Workflow +---------------------------- + +### 1. Load some data +Loading data in Robustness Gym is easy. We extend the Huggingface +[datasets](https://github.com/huggingface/datasets) library, +so all datasets there are immediately available for use using the Robustness Gym + `Dataset` class. + +```python +import robustnessgym as rg + +# Load the boolq data +dataset = rg.Dataset.load_dataset('boolq', split='train[:10]') + +# Load the first 10 training examples +dataset = rg.Dataset.load_dataset('boolq', split='train[:10]') + +# Load from jsonl file +dataset = rg.Dataset.from_json("file.jsonl") +``` + +### 2. Compute and cache side-information + +One of the most common operations in evaluation is interpreting and analyzing + examples in dataset. +This can mean tagging data, adding additional information about examples from a + knowledge base, or making predictions about the example. + + It's often useful to have this information available conveniently stored alongside + the example, ready to use for analysis. + + This is the idea of the `CachedOperation` class in Robustness Gym. Think of it as a + `.map()` over your dataset, except it provides convenience functions to retrieve + any information you cache. + +Robustness Gym ships with a few cached operations that you can use out-of-the-box. + +```python +from robustnessgym import Spacy, Stanza, TextBlob + +# Create the Spacy CachedOperation +spacy_op = Spacy() + +# Apply it on the "text" column of a dataset +dataset = spacy_op(batch_or_dataset=dataset, columns=["text"]) + +# Easily retrieve whatever information you need, wherever you need it + +# Retrieve the tokens extracted by Spacy for the first 2 examples in the dataset +tokens = Spacy.retrieve(batch=dataset[:2], columns=["text"], proc_fns=Spacy.tokens) + +# Retrieve everything Spacy cached for the first 2 examples, and process it yourself +spacy_info = Spacy.retrieve(batch=dataset[:2], columns=["text"]) + +# ...do stuff with spacy_info +``` + +### 3. Build slices +Robustness Gym supports a general set of +abstractions to create slices of data. Slices are just +datasets that are constructed by applying an instance of the `SliceBuilder` class +in Robustness Gym. + +Robustness Gym currently supports slices of four kinds: +1. __Evaluation Sets__: slice constructed from a pre-existing dataset +2. __Subpopulations__: slice constructed by filtering a larger dataset +3. __Transformations__: slice constructed by transforming a dataset +4. __Attacks__: slice constructed by attacking a dataset adversarially + +#### 3.1 Evaluation Sets +```python +from robustnessgym import Dataset, Slice + +# Evaluation Sets: direct construction of a slice +boolq_slice = Slice(Dataset.load_dataset('boolq')) +``` + +#### 3.2 Subpopulations +```python +from robustnessgym import LengthSubpopulation +# A simple subpopulation that splits the dataset into 3 slices +# The intervals act as buckets: the first slice will bucket based on text with +# length between 0 and 4 +length_sp = LengthSubpopulation(intervals=[(0, 4), (8, 12), ("80%", "100%")]) + +# Apply it +dataset, slices, membership = length_sp(batch_or_dataset=dataset, columns=['text']) + +# dataset is an updated dataset where every example is tagged with its slice +# slices are a list of Slice objects: think of this as a list of 3 datasets +# membership is a matrix of shape (n x 3) with 0/1 entries, assigning each of the n +# examples depending on whether they're in the slice or not +``` + +#### 3.3 Transformations +```python +from robustnessgym import EasyDataAugmentation + +# Easy Data Augmentation (https://github.com/jasonwei20/eda_nlp) +eda = EasyDataAugmentation(num_transformed=2) + +# Apply it +dataset, eda_slices, eda_membership = eda(batch_or_dataset=dataset, columns=['text']) + +# eda_slices is just 2 transformed versions of the original dataset +``` +#### 3.4 Attacks +```python +from robustnessgym import TextAttack +from textattack.models.wrappers import HuggingFaceModelWrapper + +# TextAttack +textattack = TextAttack.from_recipe(recipe='BAEGarg2019', + model=HuggingFaceModelWrapper(...)) +``` + +### 4. Evaluate slices +At this point, you can just use your own code (e.g. in numpy) to calculate metrics +, since the slices are just datasets. + +```python +import numpy as np + +def accuracy(true: np.array, pred: np.array): + """ + Your function for computing accuracy. + """ + return np.mean(true == pred) + +# Some model in your code +model = MyModel() + +# Evaluation on the length slices +metrics = {} +for sl in slices: + metrics[sl.identifier] = accuracy(true=sl["label"], pred=MyModel.predict(sl['text'])) +``` + +Robustness Gym includes a `TestBench` abstraction to make this process easier. + +```python +from robustnessgym import TestBench, Identifier, BinarySentiment + +# Construct a testbench +testbench = TestBench( + # Your identifier for the testbench + identifier=Identifier(_name="MyTestBench"), + # The task this testbench should be used to evaluate + task=BinarySentiment(), +) + +# Add slices +testbench.add_slices(slices) + +# Evaluate: Robustness Gym knows what metrics to use from the task +metrics = testbench.evaluate(model) +``` + +You can also get a Robustness Report using the TestBench. + +```python +# Create the report +report = testbench.create_report(model) + +# Generate the figures +_, figure = report.figures() +figure.write_image('my_figure.pdf', engine="kaleido") +``` \ No newline at end of file diff --git a/docs/source/getting-started/quickstart.rst b/docs/source/getting-started/quickstart.rst new file mode 100644 index 00000000..8649a4c7 --- /dev/null +++ b/docs/source/getting-started/quickstart.rst @@ -0,0 +1,257 @@ +Quickstart +======================== + +This page gives a quick overview on how to start using Robustness Gym. + +The central operation in Robustness Gym is the construction of *slices* of data: +a slice is just a dataset that is used to test specific model properties. + +Robustness Gym comes with a set of general abstractions to build slices with ease. +We'll use a simple example to show you how these work. + +Robustness Gym also has a lot of built-in functionality that you can use out-of-the-box +(thanks to some other great open-source projects) for creating slices. You can +read more about these in [](), and check out []() if you'd like to +contribute some of your own slice building code to Robustness Gym. + +Let's dive in quickly! + +Building Slices +--------------------- + +Robustness Gym contains a ``SliceBuilder`` class for writing code to build slices. +This +class defines a common interface that all ``SliceBuilders`` must follow: + +#. Any ``SliceBuilder`` object can be called using ``slicebuilder(batch_or_dataset, + columns)``. + +#. This call always returns a ``(dataset, slices, matrix)`` tuple. + +To see how this works, let's see a simple example. We're going to + +#. Create a dummy dataset containing just 4 text examples. + +#. Use a ``ScoreSubpopulation`` (a kind of ``SliceBuilder``) to build 2 slices. + +Let's start by creating the dataset. + +.. code-block:: python + + from robustnessgym import Dataset, Identifier + + dataset = Dataset.from_batch({ + 'text': ['a person is walking', + 'a person is running', + 'a person is sitting', + 'a person is walking on a street eating a bagel'] + }, identifier=Identifier(_name='MyDataset')) + + +Here, we used the ``.from_batch(..)`` method to create a dataset called ``MyDataset``. +This dataset has a single column called `text` with 4 examples or rows. + +The ``Identifier`` class is used to store identifying information for ``Dataset`` +objects, ``SliceBuilder`` objects and more. + +.. tip:: + Most objects in Robustness Gym have a ``.identifier`` property that can be used to + inspect the object. + +Next, let's create the ``ScoreSubpopulation`` to build slices. + +.. code-block:: python + + def length(batch, columns): + """ + A simple function to compute the length of all examples in a batch. + + batch: a dict of lists + columns: a list of str + + return: a list of lengths + """ + assert len(columns) == 1, "Pass in a single column." + + # The name of the column to grab text from + column_name = columns[0] + text_batch = batch[column_name] + + # Tokenize the text using .split() and calculate the number of tokens + return [len(text.split()) for text in text_batch] + + +We pause here to point out three things: + +#. The ``def func(batch, columns)`` is a common pattern in Robustness Gym for + adding custom functionality. + + The ``batch`` here refers to a batch of data, + + .. code-block:: python + + {'text': ['a person is walking', 'a person is running'], 'index': [0, 1]} + + is a batch of size 2 from the dataset (``dataset[:2]``). + + The ``columns`` parameter specifies the relevant columns of the batch. + This has some advantages e.g. suppose ``otherdataset`` has a column of text named + `sentence` instead. + We can reuse ``length`` for both datasets, + + .. code-block:: python + + length(batch=dataset[:2], columns=['text']) + length(batch=otherdataset[:2], columns=['sentence']) + +#. ``length`` returns a list of scores (lengths in this case). This is an + important ingredient of the ``ScoreSubpopulation``, which constructs (as the + name suggests) slices by bucketing examples based on their score. + +#. We tokenized text inside the length function. This is bad: + + #. Tokenization is a basic step in text processing, and we should only do it once. + #. If it was some other, more expensive operation, we should definitely do it once. + +Let's keep going and wrap ``length`` in a ``ScoreSubpopulation``. + + +.. code-block:: python + + from robustnessgym import ScoreSubpopulation + + # Create the score subpopulation for length + length_sp = ScoreSubpopulation(intervals=[(0, 5), (5, 10)], score_fn=length) + + +The ``ScoreSubpopulation`` requires + +#. a list of ``intervals``, each interval is a tuple containing the range of lengths + that are considered part of that slice. +#. a ``score_fn``, used to assign scores to a batch of examples + +Let's run this on the dataset. + +.. code-block:: python + + # Run the length subpopulation on the dataset + dataset, slices, membership = length_sp(batch_or_dataset=dataset, columns=['text']) + + +This call just executes the ``length`` function on the dataset, and buckets the +examples based on which intervals they fall in. As we briefly mentioned earlier, this +returns the ``(dataset, slices, membership)`` tuple, + +#. ``dataset`` now tags each example with slice information i.e. what slices does + the example belong to +#. ``slices`` is a list of ``Slice`` objects (2 here, since we specified 2 + intervals). Each ``Slice`` object is a dataset containing just the examples that + were part of the slice. +#. ``membership`` is a ``np.array`` matrix of shape ``(n, m)``, where ``n`` is the + number of examples in the original dataset, and ``m`` is the number of slices + built. Entry ``(i, j)`` is 1 if example ``i`` is in slice ``j``. + +And that's (almost) it! Most code you write in Robustness Gym will follow a +similar workflow. Before we end, we take a short segue to talk about the other major +abstraction in Robustness Gym: the ``CachedOperation`` class. + +Caching Information +--------------------- + +As we noted earlier, we tokenized text inside the ``length`` function, when we should +ideally run this step separately and reuse it across multiple ``SliceBuilder`` objects. + +When creating Robustness Gym, we noticed this pattern frequently: cache +some information (``CachedOperation``), and use that information to build some slices +(``SliceBuilder``). + +Let's look at the same example as before, and use a ``CachedOperation`` for +tokenization this time. + + +.. code-block:: python + + from robustnessgym import CachedOperation, Identifier + + def tokenize(batch, columns): + """ + A simple function to tokenize a batch of examples. + + batch: a dict of lists + columns: a list of str + + return: a list of tokenized text + """ + assert len(columns) == 1, "Pass in a single column." + + # The name of the column to grab text from + column_name = columns[0] + text_batch = batch[column_name] + + # Tokenize the text using .split() + return [text.split() for text in text_batch] + + # Create the CachedOperation + cachedop = CachedOperation(apply_fn=tokenize, + identifier=Identifier(_name="Tokenizer")) + + +We've written ``tokenize`` with the familiar ``func(batch, columns)`` function +signature. This function is then wrapped into a ``CachedOperation`` for use. + +.. tip:: + A ``CachedOperation`` can be created with *any* ``func(batch, columns)``. The only + constraint is that it must return a list, with size equal to that of the batch. + + +Let's create our ``ScoreSubpopulation`` for length again. + +.. code-block:: python + + from robustnessgym.decorators import singlecolumn + + def length(batch, columns): + """ + A simple function to compute the length of all examples in a batch. + + batch: a dict of lists + columns: a list of str + + return: a list of lengths + """ + assert len(columns) == 1, "Pass in a single column." + + # The name of the column to grab text from + column_name = columns[0] + text_batch = batch[column_name] + + CachedOperation.retrieve( + batch=batch, + columns=[column_name], + proc_fns=lambda decoded_batch: [] + ) + + # Tokenize the text using .split() and calculate the number of tokens + return [len(text.split()) for text in text_batch] + + + +Robustness Gym ships with ``CachedOperations`` that use standard text processing +pipelines to tokenize and tag text. + + +There's a ton more to Robustness Gym (and more coming). +Here are some pointers on where to head to next, depending on your specific goals: + +#. If you want a more detailed tutorial and walkthrough, head to the [Tutorial 1]() + Jupyter notebook +#. If you'd like to see what ``SliceBuilders`` are available in Robustness Gym + today, check out [](). +#. If you're interested in a walkthrough of the ``SliceBuilder`` class in more + detail, head to [](). Head to []() for a deep dive into the ``CachedOperation`` + class. This is recommended for expert users. +#. If you'd like to learn more about the motivation behind Robustness Gym, check out + [](). +#. If you're interested in becoming a contributor, read [](). + + diff --git a/docs/source/index.rst b/docs/source/index.rst new file mode 100644 index 00000000..135043e9 --- /dev/null +++ b/docs/source/index.rst @@ -0,0 +1,46 @@ +.. Robustness Gym documentation master file, created by + sphinx-quickstart on Fri Jan 1 16:41:09 2021. + You can adapt this file completely to your liking, but it should at least + contain the root `toctree` directive. + +Welcome to Robustness Gym +========================================== +Robustness Gym is a toolkit for evaluating natural language processing models. + +.. + Read more about [Robustness Gym](). + +Robustness Gym is *under active development* so expect rough edges. +Feedback and contributions are welcomed and appreciated. +You can submit bugs and feature suggestions on Github Issues_ +and submit contributions using a pull request. + +You can get started by going to the installation_ page. + +.. _Issues: https://github.com/robustness-gym/robustness-gym/issues/ +.. _installation: getting-started/install.md + +.. toctree:: + :maxdepth: 2 + :caption: Getting Started + + getting-started/install.md + getting-started/nutshell.md + getting-started/quickstart.md + + +.. toctree:: + :maxdepth: 2 + :caption: API Docs + + apidocs/robustnessgym + + +.. + Indices and tables + ================== + + * :ref:`genindex` + * :ref:`modindex` + * :ref:`search` + diff --git a/poetry.lock b/poetry.lock new file mode 100644 index 00000000..a99f8511 --- /dev/null +++ b/poetry.lock @@ -0,0 +1,4791 @@ +[[package]] +name = "absl-py" +version = "0.11.0" +description = "Abseil Python Common Libraries, see https://github.com/abseil/abseil-py." +category = "main" +optional = false +python-versions = "*" + +[package.dependencies] +six = "*" + +[[package]] +name = "alabaster" +version = "0.7.12" +description = "A configurable sidebar-enabled Sphinx theme" +category = "dev" +optional = false +python-versions = "*" + +[[package]] +name = "allennlp" +version = "1.3.0" +description = "An open-source NLP research library, built on PyTorch." +category = "main" +optional = false +python-versions = ">=3.6.1" + +[package.dependencies] +boto3 = ">=1.14,<2.0" +filelock = ">=3.0,<3.1" +h5py = "*" +jsonnet = {version = ">=0.10.0", markers = "sys_platform != \"win32\""} +jsonpickle = "*" +nltk = "*" +numpy = "*" +overrides = "3.1.0" +pytest = "*" +requests = ">=2.18" +scikit-learn = "*" +scipy = "*" +sentencepiece = "*" +spacy = ">=2.1.0,<2.4" +tensorboardX = ">=1.2" +torch = ">=1.6.0,<1.8.0" +tqdm = ">=4.19" +transformers = ">=4.0,<4.1" + +[[package]] +name = "allennlp-models" +version = "1.3.0" +description = "Officially supported models for the AllenNLP framework" +category = "main" +optional = false +python-versions = ">=3.6.1" + +[package.dependencies] +allennlp = ">=1.3.0,<1.4" +conllu = "4.2.1" +ftfy = "*" +nltk = "*" +py-rouge = "1.1" +torch = ">=1.7.0,<1.8.0" +word2number = ">=1.1" + +[[package]] +name = "antlr4-python3-runtime" +version = "4.8" +description = "ANTLR 4.8 runtime for Python 3.7" +category = "main" +optional = false +python-versions = "*" + +[[package]] +name = "anyio" +version = "2.0.2" +description = "High level compatibility layer for multiple asynchronous event loop implementations" +category = "main" +optional = false +python-versions = ">=3.6.2" + +[package.dependencies] +idna = ">=2.8" +sniffio = ">=1.1" + +[package.extras] +curio = ["curio (>=1.4)"] +doc = ["sphinx-rtd-theme", "sphinx-autodoc-typehints (>=1.2.0)"] +test = ["coverage (>=4.5)", "hypothesis (>=4.0)", "pytest (>=4.3)", "trustme", "uvloop"] +trio = ["trio (>=0.16)"] + +[[package]] +name = "appdirs" +version = "1.4.4" +description = "A small Python module for determining appropriate platform-specific dirs, e.g. a \"user data dir\"." +category = "dev" +optional = false +python-versions = "*" + +[[package]] +name = "appnope" +version = "0.1.2" +description = "Disable App Nap on macOS >= 10.9" +category = "main" +optional = false +python-versions = "*" + +[[package]] +name = "argon2-cffi" +version = "20.1.0" +description = "The secure Argon2 password hashing algorithm." +category = "main" +optional = false +python-versions = "*" + +[package.dependencies] +cffi = ">=1.0.0" +six = "*" + +[package.extras] +dev = ["coverage[toml] (>=5.0.2)", "hypothesis", "pytest", "sphinx", "wheel", "pre-commit"] +docs = ["sphinx"] +tests = ["coverage[toml] (>=5.0.2)", "hypothesis", "pytest"] + +[[package]] +name = "astunparse" +version = "1.6.3" +description = "An AST unparser for Python" +category = "main" +optional = false +python-versions = "*" + +[package.dependencies] +six = ">=1.6.1,<2.0" + +[[package]] +name = "async-generator" +version = "1.10" +description = "Async generators and context managers for Python 3.5+" +category = "main" +optional = false +python-versions = ">=3.5" + +[[package]] +name = "atomicwrites" +version = "1.4.0" +description = "Atomic file writes." +category = "main" +optional = false +python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*" + +[[package]] +name = "attrs" +version = "20.3.0" +description = "Classes Without Boilerplate" +category = "main" +optional = false +python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*" + +[package.extras] +dev = ["coverage[toml] (>=5.0.2)", "hypothesis", "pympler", "pytest (>=4.3.0)", "six", "zope.interface", "furo", "sphinx", "pre-commit"] +docs = ["furo", "sphinx", "zope.interface"] +tests = ["coverage[toml] (>=5.0.2)", "hypothesis", "pympler", "pytest (>=4.3.0)", "six", "zope.interface"] +tests_no_zope = ["coverage[toml] (>=5.0.2)", "hypothesis", "pympler", "pytest (>=4.3.0)", "six"] + +[[package]] +name = "babel" +version = "2.9.0" +description = "Internationalization utilities" +category = "main" +optional = false +python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*" + +[package.dependencies] +pytz = ">=2015.7" + +[[package]] +name = "backcall" +version = "0.2.0" +description = "Specifications for callback functions passed in to an API" +category = "main" +optional = false +python-versions = "*" + +[[package]] +name = "bert-score" +version = "0.3.7" +description = "PyTorch implementation of BERT score" +category = "main" +optional = false +python-versions = ">=3.6" + +[package.dependencies] +matplotlib = "*" +numpy = "*" +pandas = ">=1.0.1" +requests = "*" +torch = ">=1.0.0" +tqdm = ">=4.31.1" +transformers = ">=3.0.0" + +[[package]] +name = "black" +version = "20.8b1" +description = "The uncompromising code formatter." +category = "dev" +optional = false +python-versions = ">=3.6" + +[package.dependencies] +appdirs = "*" +click = ">=7.1.2" +mypy-extensions = ">=0.4.3" +pathspec = ">=0.6,<1" +regex = ">=2020.1.8" +toml = ">=0.10.1" +typed-ast = ">=1.4.0" +typing-extensions = ">=3.7.4" + +[package.extras] +colorama = ["colorama (>=0.4.3)"] +d = ["aiohttp (>=3.3.2)", "aiohttp-cors"] + +[[package]] +name = "bleach" +version = "3.2.1" +description = "An easy safelist-based HTML-sanitizing tool." +category = "main" +optional = false +python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*" + +[package.dependencies] +packaging = "*" +six = ">=1.9.0" +webencodings = "*" + +[[package]] +name = "blis" +version = "0.7.4" +description = "The Blis BLAS-like linear algebra library, as a self-contained C-extension." +category = "main" +optional = false +python-versions = "*" + +[package.dependencies] +numpy = ">=1.15.0" + +[[package]] +name = "boto3" +version = "1.16.47" +description = "The AWS SDK for Python" +category = "main" +optional = false +python-versions = "*" + +[package.dependencies] +botocore = ">=1.19.47,<1.20.0" +jmespath = ">=0.7.1,<1.0.0" +s3transfer = ">=0.3.0,<0.4.0" + +[[package]] +name = "botocore" +version = "1.19.47" +description = "Low-level, data-driven core of boto 3." +category = "main" +optional = false +python-versions = "*" + +[package.dependencies] +jmespath = ">=0.7.1,<1.0.0" +python-dateutil = ">=2.1,<3.0.0" +urllib3 = {version = ">=1.25.4,<1.27", markers = "python_version != \"3.4\""} + +[[package]] +name = "bpemb" +version = "0.3.2" +description = "Byte-pair embeddings in 275 languages" +category = "main" +optional = false +python-versions = "*" + +[package.dependencies] +gensim = "*" +numpy = "*" +requests = "*" +sentencepiece = "*" +tqdm = "*" + +[[package]] +name = "cachetools" +version = "4.2.0" +description = "Extensible memoizing collections and decorators" +category = "main" +optional = false +python-versions = "~=3.5" + +[[package]] +name = "catalogue" +version = "1.0.0" +description = "Super lightweight function registries for your library" +category = "main" +optional = false +python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,>=2.7" + +[[package]] +name = "certifi" +version = "2020.12.5" +description = "Python package for providing Mozilla's CA Bundle." +category = "main" +optional = false +python-versions = "*" + +[[package]] +name = "cffi" +version = "1.14.4" +description = "Foreign Function Interface for Python calling C code." +category = "main" +optional = false +python-versions = "*" + +[package.dependencies] +pycparser = "*" + +[[package]] +name = "cfgv" +version = "3.2.0" +description = "Validate configuration and produce human readable error messages." +category = "dev" +optional = false +python-versions = ">=3.6.1" + +[[package]] +name = "chardet" +version = "4.0.0" +description = "Universal encoding detector for Python 2 and 3" +category = "main" +optional = false +python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*" + +[[package]] +name = "click" +version = "7.1.2" +description = "Composable command line interface toolkit" +category = "main" +optional = false +python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*" + +[[package]] +name = "cloudpickle" +version = "1.6.0" +description = "Extended pickling support for Python objects" +category = "main" +optional = false +python-versions = ">=3.5" + +[[package]] +name = "colorama" +version = "0.4.4" +description = "Cross-platform colored terminal text." +category = "main" +optional = false +python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*" + +[[package]] +name = "commonmark" +version = "0.9.1" +description = "Python parser for the CommonMark Markdown spec" +category = "dev" +optional = false +python-versions = "*" + +[package.extras] +test = ["flake8 (==3.7.8)", "hypothesis (==3.55.3)"] + +[[package]] +name = "conllu" +version = "4.2.1" +description = "CoNLL-U Parser parses a CoNLL-U formatted string into a nested python dictionary" +category = "main" +optional = false +python-versions = "*" + +[[package]] +name = "coverage" +version = "5.3.1" +description = "Code coverage measurement for Python" +category = "dev" +optional = false +python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*, <4" + +[package.extras] +toml = ["toml"] + +[[package]] +name = "cycler" +version = "0.10.0" +description = "Composable style cycles" +category = "main" +optional = false +python-versions = "*" + +[package.dependencies] +six = "*" + +[[package]] +name = "cymem" +version = "2.0.5" +description = "Manage calls to calloc/free through Cython" +category = "main" +optional = false +python-versions = "*" + +[[package]] +name = "cython" +version = "0.29.21" +description = "The Cython compiler for writing C extensions for the Python language." +category = "main" +optional = false +python-versions = ">=2.6, !=3.0.*, !=3.1.*, !=3.2.*" + +[[package]] +name = "cytoolz" +version = "0.11.0" +description = "Cython implementation of Toolz: High performance functional utilities" +category = "main" +optional = false +python-versions = "*" + +[package.dependencies] +toolz = ">=0.8.0" + +[package.extras] +cython = ["cython"] + +[[package]] +name = "datasets" +version = "1.1.3" +description = "HuggingFace/Datasets is an open library of NLP datasets." +category = "main" +optional = false +python-versions = "*" + +[package.dependencies] +dill = "*" +multiprocess = "*" +numpy = ">=1.17" +pandas = "*" +pyarrow = ">=0.17.1" +requests = ">=2.19.0" +tqdm = ">=4.27,<4.50.0" +xxhash = "*" + +[package.extras] +apache-beam = ["apache-beam"] +benchmarks = ["numpy (==1.18.5)", "tensorflow (==2.3.0)", "torch (==1.6.0)", "transformers (==3.0.2)"] +dev = ["apache-beam", "absl-py", "bs4", "elasticsearch", "faiss-cpu", "langdetect", "mwparserfromhell", "nltk", "pytest", "pytest-xdist", "tensorflow", "torch", "tldextract", "transformers", "zstandard", "black", "isort", "flake8 (==3.7.9)"] +docs = ["recommonmark", "sphinx (==3.1.2)", "sphinx-markdown-tables", "sphinx-rtd-theme (==0.4.3)", "sphinx-copybutton"] +quality = ["black", "isort", "flake8 (==3.7.9)"] +tensorflow = ["tensorflow (>=2.2.0)"] +tensorflow_gpu = ["tensorflow-gpu (>=2.2.0)"] +tests = ["apache-beam", "absl-py", "bs4", "elasticsearch", "faiss-cpu", "langdetect", "mwparserfromhell", "nltk", "pytest", "pytest-xdist", "tensorflow", "torch", "tldextract", "transformers", "zstandard"] +torch = ["torch"] + +[[package]] +name = "decorator" +version = "4.4.2" +description = "Decorators for Humans" +category = "main" +optional = false +python-versions = ">=2.6, !=3.0.*, !=3.1.*" + +[[package]] +name = "defusedxml" +version = "0.6.0" +description = "XML bomb protection for Python stdlib modules" +category = "main" +optional = false +python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*" + +[[package]] +name = "deprecated" +version = "1.2.10" +description = "Python @deprecated decorator to deprecate old python classes, functions or methods." +category = "main" +optional = false +python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*" + +[package.dependencies] +wrapt = ">=1.10,<2" + +[package.extras] +dev = ["tox", "bumpversion (<1)", "sphinx (<2)", "PyTest (<5)", "PyTest-Cov (<2.6)", "pytest", "pytest-cov"] + +[[package]] +name = "dill" +version = "0.3.3" +description = "serialize all of python" +category = "main" +optional = false +python-versions = ">=2.6, !=3.0.*" + +[package.extras] +graph = ["objgraph (>=1.7.2)"] + +[[package]] +name = "distlib" +version = "0.3.1" +description = "Distribution utilities" +category = "dev" +optional = false +python-versions = "*" + +[[package]] +name = "docformatter" +version = "1.4" +description = "Formats docstrings to follow PEP 257." +category = "dev" +optional = false +python-versions = "*" + +[package.dependencies] +untokenize = "*" + +[[package]] +name = "docopt" +version = "0.6.2" +description = "Pythonic argument parser, that will make you smile" +category = "main" +optional = false +python-versions = "*" + +[[package]] +name = "docutils" +version = "0.16" +description = "Docutils -- Python Documentation Utilities" +category = "dev" +optional = false +python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*" + +[[package]] +name = "editdistance" +version = "0.5.3" +description = "Fast implementation of the edit distance(Levenshtein distance)" +category = "main" +optional = false +python-versions = "*" + +[[package]] +name = "entrypoints" +version = "0.3" +description = "Discover and load entry points from installed packages." +category = "main" +optional = false +python-versions = ">=2.7" + +[[package]] +name = "fastbpe" +version = "0.1.0" +description = "C++ implementation of Neural Machine Translation of Rare Words with Subword Units, with Python API." +category = "main" +optional = false +python-versions = "*" + +[[package]] +name = "filelock" +version = "3.0.12" +description = "A platform independent file lock." +category = "main" +optional = false +python-versions = "*" + +[[package]] +name = "flair" +version = "0.6.1.post1" +description = "A very simple framework for state-of-the-art NLP" +category = "main" +optional = false +python-versions = ">=3.6" + +[package.dependencies] +bpemb = ">=0.3.2" +deprecated = ">=1.2.4" +ftfy = "*" +gdown = "*" +gensim = ">=3.4.0" +hyperopt = ">=0.1.1" +janome = "*" +konoha = ">=4.0.0,<5.0.0" +langdetect = "*" +lxml = "*" +matplotlib = ">=2.2.3" +mpld3 = "0.3" +python-dateutil = ">=2.6.1" +regex = "*" +scikit-learn = ">=0.21.3" +segtok = ">=1.5.7" +sentencepiece = "!=0.1.92" +sqlitedict = ">=1.6.0" +tabulate = "*" +torch = ">=1.1.0" +tqdm = ">=4.26.0" +transformers = ">=3.0.0" + +[[package]] +name = "flake8" +version = "3.8.4" +description = "the modular source code checker: pep8 pyflakes and co" +category = "dev" +optional = false +python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,>=2.7" + +[package.dependencies] +mccabe = ">=0.6.0,<0.7.0" +pycodestyle = ">=2.6.0a1,<2.7.0" +pyflakes = ">=2.2.0,<2.3.0" + +[[package]] +name = "fsspec" +version = "0.8.5" +description = "File-system specification" +category = "main" +optional = false +python-versions = ">3.6" + +[package.extras] +abfs = ["adlfs"] +adl = ["adlfs"] +dask = ["dask", "distributed"] +dropbox = ["dropboxdrivefs", "requests", "dropbox"] +gcs = ["gcsfs"] +git = ["pygit2"] +github = ["requests"] +gs = ["gcsfs"] +hdfs = ["pyarrow"] +http = ["requests", "aiohttp"] +s3 = ["s3fs"] +sftp = ["paramiko"] +smb = ["smbprotocol"] +ssh = ["paramiko"] + +[[package]] +name = "ftfy" +version = "5.8" +description = "Fixes some problems with Unicode text after the fact" +category = "main" +optional = false +python-versions = ">=3.5" + +[package.dependencies] +wcwidth = "*" + +[[package]] +name = "future" +version = "0.18.2" +description = "Clean single-source support for Python 3 and 2" +category = "main" +optional = false +python-versions = ">=2.6, !=3.0.*, !=3.1.*, !=3.2.*" + +[[package]] +name = "fuzzywuzzy" +version = "0.18.0" +description = "Fuzzy string matching in python" +category = "main" +optional = false +python-versions = "*" + +[package.extras] +speedup = ["python-levenshtein (>=0.12)"] + +[[package]] +name = "gast" +version = "0.3.3" +description = "Python AST that abstracts the underlying Python version" +category = "main" +optional = false +python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*" + +[[package]] +name = "gdown" +version = "3.12.2" +description = "Google Drive direct download of big files." +category = "main" +optional = false +python-versions = "*" + +[package.dependencies] +filelock = "*" +requests = {version = "*", extras = ["socks"]} +six = "*" +tqdm = "*" + +[[package]] +name = "gensim" +version = "3.8.2" +description = "Python framework for fast Vector Space Modelling" +category = "main" +optional = false +python-versions = "*" + +[package.dependencies] +numpy = ">=1.11.3" +scipy = ">=1.0.0" +six = ">=1.5.0" +smart-open = ">=1.8.1" + +[package.extras] +distributed = ["Pyro4 (>=4.27)"] +docs = ["pytest", "pytest-rerunfailures", "mock", "cython", "testfixtures", "Morfessor (==2.0.2a4)", "python-Levenshtein (>=0.10.2)", "visdom (>=0.1.8,!=0.1.8.7)", "scikit-learn", "Pyro4 (>=4.27)", "sphinx", "sphinxcontrib-napoleon", "plotly", "pattern (<=2.6)", "sphinxcontrib.programoutput"] +test = ["pytest", "pytest-rerunfailures", "mock", "cython", "testfixtures", "Morfessor (==2.0.2a4)", "python-Levenshtein (>=0.10.2)", "visdom (>=0.1.8,!=0.1.8.7)", "scikit-learn"] +test-win = ["pytest", "pytest-rerunfailures", "mock", "cython", "testfixtures", "Morfessor (==2.0.2a4)", "python-Levenshtein (>=0.10.2)", "visdom (>=0.1.8,!=0.1.8.7)", "scikit-learn"] + +[[package]] +name = "google-auth" +version = "1.24.0" +description = "Google Authentication Library" +category = "main" +optional = false +python-versions = ">=2.7,!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,!=3.4.*,!=3.5.*" + +[package.dependencies] +cachetools = ">=2.0.0,<5.0" +pyasn1-modules = ">=0.2.1" +rsa = {version = ">=3.1.4,<5", markers = "python_version >= \"3.6\""} +six = ">=1.9.0" + +[package.extras] +aiohttp = ["aiohttp (>=3.6.2,<4.0.0dev)"] + +[[package]] +name = "google-auth-oauthlib" +version = "0.4.2" +description = "Google Authentication Library" +category = "main" +optional = false +python-versions = ">=3.6" + +[package.dependencies] +google-auth = "*" +requests-oauthlib = ">=0.7.0" + +[package.extras] +tool = ["click"] + +[[package]] +name = "google-pasta" +version = "0.2.0" +description = "pasta is an AST-based Python refactoring library" +category = "main" +optional = false +python-versions = "*" + +[package.dependencies] +six = "*" + +[[package]] +name = "grpcio" +version = "1.34.0" +description = "HTTP/2-based RPC framework" +category = "main" +optional = false +python-versions = "*" + +[package.dependencies] +six = ">=1.5.2" + +[package.extras] +protobuf = ["grpcio-tools (>=1.34.0)"] + +[[package]] +name = "h5py" +version = "2.10.0" +description = "Read and write HDF5 files from Python" +category = "main" +optional = false +python-versions = "*" + +[package.dependencies] +numpy = ">=1.7" +six = "*" + +[[package]] +name = "hydra-core" +version = "1.0.4" +description = "A framework for elegantly configuring complex applications" +category = "main" +optional = false +python-versions = "*" + +[package.dependencies] +antlr4-python3-runtime = "4.8" +importlib-resources = {version = "*", markers = "python_version < \"3.9\""} +omegaconf = ">=2.0.5" + +[[package]] +name = "hyperopt" +version = "0.2.5" +description = "Distributed Asynchronous Hyperparameter Optimization" +category = "main" +optional = false +python-versions = "*" + +[package.dependencies] +cloudpickle = "*" +future = "*" +networkx = ">=2.2" +numpy = "*" +scipy = "*" +six = "*" +tqdm = "*" + +[package.extras] +atpe = ["lightgbm", "scikit-learn"] +mongotrials = ["pymongo"] +sparktrials = ["pyspark"] +dev = ["black", "pre-commit", "nose", "pytest"] + +[[package]] +name = "identify" +version = "1.5.11" +description = "File identification library for Python" +category = "dev" +optional = false +python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,>=2.7" + +[package.extras] +license = ["editdistance"] + +[[package]] +name = "idna" +version = "2.10" +description = "Internationalized Domain Names in Applications (IDNA)" +category = "main" +optional = false +python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*" + +[[package]] +name = "imagesize" +version = "1.2.0" +description = "Getting image size from png/jpeg/jpeg2000/gif file" +category = "dev" +optional = false +python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*" + +[[package]] +name = "importlib-resources" +version = "4.1.1" +description = "Read resources from Python packages" +category = "main" +optional = false +python-versions = ">=3.6" + +[package.extras] +docs = ["sphinx", "jaraco.packaging (>=3.2)", "rst.linker (>=1.9)"] +testing = ["pytest (>=3.5,!=3.7.3)", "pytest-checkdocs (>=1.2.3)", "pytest-flake8", "pytest-cov", "jaraco.test (>=3.2.0)", "pytest-black (>=0.3.7)", "pytest-mypy"] + +[[package]] +name = "iniconfig" +version = "1.1.1" +description = "iniconfig: brain-dead simple config-ini parsing" +category = "main" +optional = false +python-versions = "*" + +[[package]] +name = "ipykernel" +version = "5.4.2" +description = "IPython Kernel for Jupyter" +category = "main" +optional = false +python-versions = ">=3.5" + +[package.dependencies] +appnope = {version = "*", markers = "platform_system == \"Darwin\""} +ipython = ">=5.0.0" +jupyter-client = "*" +tornado = ">=4.2" +traitlets = ">=4.1.0" + +[package.extras] +test = ["pytest (!=5.3.4)", "pytest-cov", "flaky", "nose"] + +[[package]] +name = "ipython" +version = "7.19.0" +description = "IPython: Productive Interactive Computing" +category = "main" +optional = false +python-versions = ">=3.7" + +[package.dependencies] +appnope = {version = "*", markers = "sys_platform == \"darwin\""} +backcall = "*" +colorama = {version = "*", markers = "sys_platform == \"win32\""} +decorator = "*" +jedi = ">=0.10" +pexpect = {version = ">4.3", markers = "sys_platform != \"win32\""} +pickleshare = "*" +prompt-toolkit = ">=2.0.0,<3.0.0 || >3.0.0,<3.0.1 || >3.0.1,<3.1.0" +pygments = "*" +traitlets = ">=4.2" + +[package.extras] +all = ["Sphinx (>=1.3)", "ipykernel", "ipyparallel", "ipywidgets", "nbconvert", "nbformat", "nose (>=0.10.1)", "notebook", "numpy (>=1.14)", "pygments", "qtconsole", "requests", "testpath"] +doc = ["Sphinx (>=1.3)"] +kernel = ["ipykernel"] +nbconvert = ["nbconvert"] +nbformat = ["nbformat"] +notebook = ["notebook", "ipywidgets"] +parallel = ["ipyparallel"] +qtconsole = ["qtconsole"] +test = ["nose (>=0.10.1)", "requests", "testpath", "pygments", "nbformat", "ipykernel", "numpy (>=1.14)"] + +[[package]] +name = "ipython-genutils" +version = "0.2.0" +description = "Vestigial utilities from IPython" +category = "main" +optional = false +python-versions = "*" + +[[package]] +name = "ipywidgets" +version = "7.6.2" +description = "IPython HTML widgets for Jupyter" +category = "main" +optional = false +python-versions = "*" + +[package.dependencies] +ipykernel = ">=4.5.1" +ipython = {version = ">=4.0.0", markers = "python_version >= \"3.3\""} +jupyterlab-widgets = {version = ">=1.0.0", markers = "python_version >= \"3.5\""} +nbformat = ">=4.2.0" +traitlets = ">=4.3.1" +widgetsnbextension = ">=3.5.0,<3.6.0" + +[package.extras] +test = ["pytest (>=3.6.0)", "pytest-cov", "mock"] + +[[package]] +name = "isort" +version = "5.7.0" +description = "A Python utility / library to sort Python imports." +category = "dev" +optional = false +python-versions = ">=3.6,<4.0" + +[package.extras] +pipfile_deprecated_finder = ["pipreqs", "requirementslib"] +requirements_deprecated_finder = ["pipreqs", "pip-api"] +colors = ["colorama (>=0.4.3,<0.5.0)"] + +[[package]] +name = "janome" +version = "0.4.1" +description = "Japanese morphological analysis engine." +category = "main" +optional = false +python-versions = "*" + +[[package]] +name = "jedi" +version = "0.18.0" +description = "An autocompletion tool for Python that can be used for text editors." +category = "main" +optional = false +python-versions = ">=3.6" + +[package.dependencies] +parso = ">=0.8.0,<0.9.0" + +[package.extras] +qa = ["flake8 (==3.8.3)", "mypy (==0.782)"] +testing = ["Django (<3.1)", "colorama", "docopt", "pytest (<6.0.0)"] + +[[package]] +name = "jinja2" +version = "2.11.2" +description = "A very fast and expressive template engine." +category = "main" +optional = false +python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*" + +[package.dependencies] +MarkupSafe = ">=0.23" + +[package.extras] +i18n = ["Babel (>=0.8)"] + +[[package]] +name = "jmespath" +version = "0.10.0" +description = "JSON Matching Expressions" +category = "main" +optional = false +python-versions = ">=2.6, !=3.0.*, !=3.1.*, !=3.2.*" + +[[package]] +name = "joblib" +version = "1.0.0" +description = "Lightweight pipelining with Python functions" +category = "main" +optional = false +python-versions = ">=3.6" + +[[package]] +name = "json5" +version = "0.9.5" +description = "A Python implementation of the JSON5 data format." +category = "main" +optional = false +python-versions = "*" + +[package.extras] +dev = ["hypothesis"] + +[[package]] +name = "jsonlines" +version = "1.2.0" +description = "Library with helpers for the jsonlines file format" +category = "main" +optional = false +python-versions = "*" + +[package.dependencies] +six = "*" + +[[package]] +name = "jsonnet" +version = "0.17.0" +description = "Python bindings for Jsonnet - The data templating language" +category = "main" +optional = false +python-versions = "*" + +[[package]] +name = "jsonpickle" +version = "1.4.2" +description = "Python library for serializing any arbitrary object graph into JSON" +category = "main" +optional = false +python-versions = ">=2.7" + +[package.extras] +docs = ["sphinx", "jaraco.packaging (>=3.2)", "rst.linker (>=1.9)"] +testing = ["coverage (<5)", "pytest (>=3.5,!=3.7.3)", "pytest-checkdocs (>=1.2.3)", "pytest-flake8", "pytest-black-multipy", "pytest-cov", "ecdsa", "feedparser", "numpy", "pandas", "pymongo", "sqlalchemy", "enum34", "jsonlib"] +"testing.libs" = ["demjson", "simplejson", "ujson", "yajl"] + +[[package]] +name = "jsonschema" +version = "3.2.0" +description = "An implementation of JSON Schema validation for Python" +category = "main" +optional = false +python-versions = "*" + +[package.dependencies] +attrs = ">=17.4.0" +pyrsistent = ">=0.14.0" +six = ">=1.11.0" + +[package.extras] +format = ["idna", "jsonpointer (>1.13)", "rfc3987", "strict-rfc3339", "webcolors"] +format_nongpl = ["idna", "jsonpointer (>1.13)", "webcolors", "rfc3986-validator (>0.1.0)", "rfc3339-validator"] + +[[package]] +name = "jupyter-client" +version = "6.1.7" +description = "Jupyter protocol implementation and client libraries" +category = "main" +optional = false +python-versions = ">=3.5" + +[package.dependencies] +jupyter-core = ">=4.6.0" +python-dateutil = ">=2.1" +pyzmq = ">=13" +tornado = ">=4.1" +traitlets = "*" + +[package.extras] +test = ["ipykernel", "ipython", "mock", "pytest", "pytest-asyncio", "async-generator", "pytest-timeout"] + +[[package]] +name = "jupyter-core" +version = "4.7.0" +description = "Jupyter core package. A base package on which Jupyter projects rely." +category = "main" +optional = false +python-versions = ">=3.6" + +[package.dependencies] +pywin32 = {version = ">=1.0", markers = "sys_platform == \"win32\""} +traitlets = "*" + +[[package]] +name = "jupyter-server" +version = "1.1.3" +description = "The backend—i.e. core services, APIs, and REST endpoints—to Jupyter web applications." +category = "main" +optional = false +python-versions = ">=3.6" + +[package.dependencies] +anyio = ">=2.0.2" +ipython-genutils = "*" +jinja2 = "*" +jupyter-client = ">=6.1.1" +jupyter-core = ">=4.4.0" +nbconvert = "*" +nbformat = "*" +prometheus-client = "*" +pywin32 = {version = ">=1.0", markers = "sys_platform == \"win32\""} +pyzmq = ">=17" +Send2Trash = "*" +terminado = ">=0.8.3" +tornado = ">=6.1.0" +traitlets = ">=4.2.1" + +[package.extras] +test = ["coverage", "requests", "pytest", "pytest-cov", "pytest-tornasync", "pytest-console-scripts", "ipykernel"] + +[[package]] +name = "jupyterlab" +version = "3.0.0" +description = "The JupyterLab server extension." +category = "main" +optional = false +python-versions = ">=3.6" + +[package.dependencies] +ipython = "*" +jinja2 = ">=2.10" +jupyter-core = "*" +jupyter-server = ">=1.1,<2.0" +jupyterlab-server = ">=2.0,<3.0" +nbclassic = ">=0.2,<1.0" +packaging = "*" +tornado = ">=6.1.0" + +[package.extras] +docs = ["jsx-lexer", "recommonmark", "sphinx", "sphinx-rtd-theme", "sphinx-copybutton"] +test = ["pytest (>=6.0)", "pytest-cov", "pytest-console-scripts", "pytest-check-links", "jupyterlab-server[test] (>=2.0,<3.0)", "requests", "wheel", "virtualenv", "nose-exclude"] + +[[package]] +name = "jupyterlab-pygments" +version = "0.1.2" +description = "Pygments theme using JupyterLab CSS variables" +category = "main" +optional = false +python-versions = "*" + +[package.dependencies] +pygments = ">=2.4.1,<3" + +[[package]] +name = "jupyterlab-server" +version = "2.0.0" +description = "JupyterLab Server" +category = "main" +optional = false +python-versions = ">=3.6" + +[package.dependencies] +babel = "*" +jinja2 = ">=2.10" +json5 = "*" +jsonschema = ">=3.0.1" +jupyter-server = ">=1.1,<2.0" +packaging = "*" +requests = "*" + +[package.extras] +test = ["codecov", "ipykernel", "pytest (>=5.3.2)", "pytest-cov", "jupyter-server", "pytest-console-scripts", "strict-rfc3339", "wheel"] + +[[package]] +name = "jupyterlab-widgets" +version = "1.0.0" +description = "A JupyterLab extension." +category = "main" +optional = false +python-versions = ">=3.6" + +[[package]] +name = "kaleido" +version = "0.1.0" +description = "Static image export for web-based visualization libraries with zero dependencies" +category = "main" +optional = false +python-versions = "*" + +[[package]] +name = "keras-preprocessing" +version = "1.1.2" +description = "Easy data preprocessing and data augmentation for deep learning models" +category = "main" +optional = false +python-versions = "*" + +[package.dependencies] +numpy = ">=1.9.1" +six = ">=1.9.0" + +[package.extras] +image = ["scipy (>=0.14)", "Pillow (>=5.2.0)"] +pep8 = ["flake8"] +tests = ["pandas", "pillow", "tensorflow", "keras", "pytest", "pytest-xdist", "pytest-cov"] + +[[package]] +name = "kiwisolver" +version = "1.3.1" +description = "A fast implementation of the Cassowary constraint solver" +category = "main" +optional = false +python-versions = ">=3.6" + +[[package]] +name = "konoha" +version = "4.0.0" +description = "A tiny sentence/word tokenizer for Japanese text written in Python" +category = "main" +optional = false +python-versions = ">=3.6,<4.0" + +[package.extras] +janome = ["janome (>=0.3.10,<0.4.0)"] +all = ["janome (>=0.3.10,<0.4.0)", "natto-py (>=0.9.0,<0.10.0)", "kytea (>=0.1.4,<0.2.0)", "sentencepiece (>=0.1.85,<0.2.0)", "sudachipy (>=0.4.2,<0.5.0)", "boto3 (>=1.11.0,<2.0.0)"] +mecab = ["natto-py (>=0.9.0,<0.10.0)"] +kytea = ["kytea (>=0.1.4,<0.2.0)"] +sentencepiece = ["sentencepiece (>=0.1.85,<0.2.0)"] +sudachi = ["sudachipy (>=0.4.2,<0.5.0)"] +remote = ["boto3 (>=1.11.0,<2.0.0)"] + +[[package]] +name = "langdetect" +version = "1.0.8" +description = "Language detection library ported from Google's language-detection." +category = "main" +optional = false +python-versions = "*" + +[package.dependencies] +six = "*" + +[[package]] +name = "language-tool-python" +version = "2.4.7" +description = "Checks grammar using LanguageTool." +category = "main" +optional = false +python-versions = "*" + +[package.dependencies] +requests = "*" +tqdm = "*" + +[[package]] +name = "lemminflect" +version = "0.2.1" +description = "A python module for English lemmatization and inflection." +category = "main" +optional = false +python-versions = "*" + +[package.dependencies] +numpy = "*" + +[[package]] +name = "lru-dict" +version = "1.1.6" +description = "An Dict like LRU container." +category = "main" +optional = false +python-versions = "*" + +[[package]] +name = "lxml" +version = "4.6.2" +description = "Powerful and Pythonic XML processing library combining libxml2/libxslt with the ElementTree API." +category = "main" +optional = false +python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, != 3.4.*" + +[package.extras] +cssselect = ["cssselect (>=0.7)"] +html5 = ["html5lib"] +htmlsoup = ["beautifulsoup4"] +source = ["Cython (>=0.29.7)"] + +[[package]] +name = "markdown" +version = "3.3.3" +description = "Python implementation of Markdown." +category = "main" +optional = false +python-versions = ">=3.6" + +[package.extras] +testing = ["coverage", "pyyaml"] + +[[package]] +name = "markupsafe" +version = "1.1.1" +description = "Safely add untrusted strings to HTML/XML markup." +category = "main" +optional = false +python-versions = ">=2.7,!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*" + +[[package]] +name = "matplotlib" +version = "3.3.3" +description = "Python plotting package" +category = "main" +optional = false +python-versions = ">=3.6" + +[package.dependencies] +cycler = ">=0.10" +kiwisolver = ">=1.0.1" +numpy = ">=1.15" +pillow = ">=6.2.0" +pyparsing = ">=2.0.3,<2.0.4 || >2.0.4,<2.1.2 || >2.1.2,<2.1.6 || >2.1.6" +python-dateutil = ">=2.1" + +[[package]] +name = "mccabe" +version = "0.6.1" +description = "McCabe checker, plugin for flake8" +category = "dev" +optional = false +python-versions = "*" + +[[package]] +name = "mistune" +version = "0.8.4" +description = "The fastest markdown parser in pure Python" +category = "main" +optional = false +python-versions = "*" + +[[package]] +name = "more-itertools" +version = "8.6.0" +description = "More routines for operating on iterables, beyond itertools" +category = "main" +optional = false +python-versions = ">=3.5" + +[[package]] +name = "mpld3" +version = "0.3" +description = "D3 Viewer for Matplotlib" +category = "main" +optional = false +python-versions = "*" + +[[package]] +name = "multiprocess" +version = "0.70.11.1" +description = "better multiprocessing and multithreading in python" +category = "main" +optional = false +python-versions = "*" + +[package.dependencies] +dill = ">=0.3.3" + +[[package]] +name = "murmurhash" +version = "1.0.5" +description = "Cython bindings for MurmurHash" +category = "main" +optional = false +python-versions = "*" + +[[package]] +name = "mypy-extensions" +version = "0.4.3" +description = "Experimental type system extensions for programs checked with the mypy typechecker." +category = "dev" +optional = false +python-versions = "*" + +[[package]] +name = "nbclassic" +version = "0.2.5" +description = "Jupyter Notebook as a Jupyter Server Extension." +category = "main" +optional = false +python-versions = ">=3.6" + +[package.dependencies] +jupyter-server = ">=1.1,<2.0" +notebook = "<7" + +[package.extras] +test = ["pytest", "pytest-tornasync", "pytest-console-scripts"] + +[[package]] +name = "nbclient" +version = "0.5.1" +description = "A client library for executing notebooks. Formerly nbconvert's ExecutePreprocessor." +category = "main" +optional = false +python-versions = ">=3.6" + +[package.dependencies] +async-generator = "*" +jupyter-client = ">=6.1.5" +nbformat = ">=5.0" +nest-asyncio = "*" +traitlets = ">=4.2" + +[package.extras] +dev = ["codecov", "coverage", "ipython", "ipykernel", "ipywidgets", "pytest (>=4.1)", "pytest-cov (>=2.6.1)", "check-manifest", "flake8", "mypy", "tox", "bumpversion", "xmltodict", "pip (>=18.1)", "wheel (>=0.31.0)", "setuptools (>=38.6.0)", "twine (>=1.11.0)", "black"] +sphinx = ["Sphinx (>=1.7)", "sphinx-book-theme", "mock", "moto", "myst-parser"] +test = ["codecov", "coverage", "ipython", "ipykernel", "ipywidgets", "pytest (>=4.1)", "pytest-cov (>=2.6.1)", "check-manifest", "flake8", "mypy", "tox", "bumpversion", "xmltodict", "pip (>=18.1)", "wheel (>=0.31.0)", "setuptools (>=38.6.0)", "twine (>=1.11.0)", "black"] + +[[package]] +name = "nbconvert" +version = "6.0.7" +description = "Converting Jupyter Notebooks" +category = "main" +optional = false +python-versions = ">=3.6" + +[package.dependencies] +bleach = "*" +defusedxml = "*" +entrypoints = ">=0.2.2" +jinja2 = ">=2.4" +jupyter-core = "*" +jupyterlab-pygments = "*" +mistune = ">=0.8.1,<2" +nbclient = ">=0.5.0,<0.6.0" +nbformat = ">=4.4" +pandocfilters = ">=1.4.1" +pygments = ">=2.4.1" +testpath = "*" +traitlets = ">=4.2" + +[package.extras] +all = ["pytest", "pytest-cov", "pytest-dependency", "ipykernel", "ipywidgets (>=7)", "pyppeteer (==0.2.2)", "tornado (>=4.0)", "sphinx (>=1.5.1)", "sphinx-rtd-theme", "nbsphinx (>=0.2.12)", "ipython"] +docs = ["sphinx (>=1.5.1)", "sphinx-rtd-theme", "nbsphinx (>=0.2.12)", "ipython"] +serve = ["tornado (>=4.0)"] +test = ["pytest", "pytest-cov", "pytest-dependency", "ipykernel", "ipywidgets (>=7)", "pyppeteer (==0.2.2)"] +webpdf = ["pyppeteer (==0.2.2)"] + +[[package]] +name = "nbformat" +version = "5.0.8" +description = "The Jupyter Notebook format" +category = "main" +optional = false +python-versions = ">=3.5" + +[package.dependencies] +ipython-genutils = "*" +jsonschema = ">=2.4,<2.5.0 || >2.5.0" +jupyter-core = "*" +traitlets = ">=4.1" + +[package.extras] +fast = ["fastjsonschema"] +test = ["fastjsonschema", "testpath", "pytest", "pytest-cov"] + +[[package]] +name = "nbsphinx" +version = "0.8.0" +description = "Jupyter Notebook Tools for Sphinx" +category = "dev" +optional = false +python-versions = ">=3.6" + +[package.dependencies] +docutils = "*" +jinja2 = "*" +nbconvert = "!=5.4" +nbformat = "*" +sphinx = ">=1.8" +traitlets = "*" + +[[package]] +name = "nest-asyncio" +version = "1.4.3" +description = "Patch asyncio to allow nested event loops" +category = "main" +optional = false +python-versions = ">=3.5" + +[[package]] +name = "networkx" +version = "2.5" +description = "Python package for creating and manipulating graphs and networks" +category = "main" +optional = false +python-versions = ">=3.6" + +[package.dependencies] +decorator = ">=4.3.0" + +[package.extras] +all = ["numpy", "scipy", "pandas", "matplotlib", "pygraphviz", "pydot", "pyyaml", "lxml", "pytest"] +gdal = ["gdal"] +lxml = ["lxml"] +matplotlib = ["matplotlib"] +numpy = ["numpy"] +pandas = ["pandas"] +pydot = ["pydot"] +pygraphviz = ["pygraphviz"] +pytest = ["pytest"] +pyyaml = ["pyyaml"] +scipy = ["scipy"] + +[[package]] +name = "nlpaug" +version = "1.1.1" +description = "Natural language processing augmentation library for deep neural networks" +category = "main" +optional = false +python-versions = "*" + +[[package]] +name = "nltk" +version = "3.5" +description = "Natural Language Toolkit" +category = "main" +optional = false +python-versions = "*" + +[package.dependencies] +click = "*" +joblib = "*" +regex = "*" +tqdm = "*" + +[package.extras] +all = ["requests", "numpy", "python-crfsuite", "scikit-learn", "twython", "pyparsing", "scipy", "matplotlib", "gensim"] +corenlp = ["requests"] +machine_learning = ["gensim", "numpy", "python-crfsuite", "scikit-learn", "scipy"] +plot = ["matplotlib"] +tgrep = ["pyparsing"] +twitter = ["twython"] + +[[package]] +name = "nodeenv" +version = "1.5.0" +description = "Node.js virtual environment builder" +category = "dev" +optional = false +python-versions = "*" + +[[package]] +name = "notebook" +version = "6.1.6" +description = "A web-based notebook environment for interactive computing" +category = "main" +optional = false +python-versions = ">=3.5" + +[package.dependencies] +argon2-cffi = "*" +ipykernel = "*" +ipython-genutils = "*" +jinja2 = "*" +jupyter-client = ">=5.3.4" +jupyter-core = ">=4.6.1" +nbconvert = "*" +nbformat = "*" +prometheus-client = "*" +pyzmq = ">=17" +Send2Trash = "*" +terminado = ">=0.8.3" +tornado = ">=5.0" +traitlets = ">=4.2.1" + +[package.extras] +docs = ["sphinx", "nbsphinx", "sphinxcontrib-github-alt", "sphinx-rtd-theme"] +json-logging = ["json-logging"] +test = ["pytest", "coverage", "requests", "nbval", "selenium", "pytest-cov", "requests-unixsocket"] + +[[package]] +name = "num2words" +version = "0.5.10" +description = "Modules to convert numbers to words. Easily extensible." +category = "main" +optional = false +python-versions = "*" + +[package.dependencies] +docopt = ">=0.6.2" + +[[package]] +name = "numpy" +version = "1.18.5" +description = "NumPy is the fundamental package for array computing with Python." +category = "main" +optional = false +python-versions = ">=3.5" + +[[package]] +name = "oauthlib" +version = "3.1.0" +description = "A generic, spec-compliant, thorough implementation of the OAuth request-signing logic" +category = "main" +optional = false +python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*" + +[package.extras] +rsa = ["cryptography"] +signals = ["blinker"] +signedtoken = ["cryptography", "pyjwt (>=1.0.0)"] + +[[package]] +name = "omegaconf" +version = "2.0.5" +description = "A flexible configuration library" +category = "main" +optional = false +python-versions = ">=3.6" + +[package.dependencies] +PyYAML = ">=5.1" +typing-extensions = "*" + +[[package]] +name = "opt-einsum" +version = "3.3.0" +description = "Optimizing numpys einsum function" +category = "main" +optional = false +python-versions = ">=3.5" + +[package.dependencies] +numpy = ">=1.7" + +[package.extras] +docs = ["sphinx (==1.2.3)", "sphinxcontrib-napoleon", "sphinx-rtd-theme", "numpydoc"] +tests = ["pytest", "pytest-cov", "pytest-pep8"] + +[[package]] +name = "overrides" +version = "3.1.0" +description = "A decorator to automatically detect mismatch when overriding a method." +category = "main" +optional = false +python-versions = "*" + +[[package]] +name = "packaging" +version = "20.8" +description = "Core utilities for Python packages" +category = "main" +optional = false +python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*" + +[package.dependencies] +pyparsing = ">=2.0.2" + +[[package]] +name = "pandas" +version = "1.2.0" +description = "Powerful data structures for data analysis, time series, and statistics" +category = "main" +optional = false +python-versions = ">=3.7.1" + +[package.dependencies] +numpy = ">=1.16.5" +python-dateutil = ">=2.7.3" +pytz = ">=2017.3" + +[package.extras] +test = ["pytest (>=5.0.1)", "pytest-xdist", "hypothesis (>=3.58)"] + +[[package]] +name = "pandocfilters" +version = "1.4.3" +description = "Utilities for writing pandoc filters in python" +category = "main" +optional = false +python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*" + +[[package]] +name = "parso" +version = "0.8.1" +description = "A Python Parser" +category = "main" +optional = false +python-versions = ">=3.6" + +[package.extras] +qa = ["flake8 (==3.8.3)", "mypy (==0.782)"] +testing = ["docopt", "pytest (<6.0.0)"] + +[[package]] +name = "pathspec" +version = "0.8.1" +description = "Utility library for gitignore style pattern matching of file paths." +category = "dev" +optional = false +python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*" + +[[package]] +name = "pexpect" +version = "4.8.0" +description = "Pexpect allows easy control of interactive console applications." +category = "main" +optional = false +python-versions = "*" + +[package.dependencies] +ptyprocess = ">=0.5" + +[[package]] +name = "pickleshare" +version = "0.7.5" +description = "Tiny 'shelve'-like database with concurrency support" +category = "main" +optional = false +python-versions = "*" + +[[package]] +name = "pillow" +version = "8.0.1" +description = "Python Imaging Library (Fork)" +category = "main" +optional = false +python-versions = ">=3.6" + +[[package]] +name = "plac" +version = "1.1.3" +description = "The smartest command line arguments parser in the world" +category = "main" +optional = false +python-versions = "*" + +[[package]] +name = "plotly" +version = "4.14.1" +description = "An open-source, interactive data visualization library for Python" +category = "main" +optional = false +python-versions = "*" + +[package.dependencies] +retrying = ">=1.3.3" +six = "*" + +[[package]] +name = "pluggy" +version = "0.13.1" +description = "plugin and hook calling mechanisms for python" +category = "main" +optional = false +python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*" + +[package.extras] +dev = ["pre-commit", "tox"] + +[[package]] +name = "pre-commit" +version = "2.9.3" +description = "A framework for managing and maintaining multi-language pre-commit hooks." +category = "dev" +optional = false +python-versions = ">=3.6.1" + +[package.dependencies] +cfgv = ">=2.0.0" +identify = ">=1.0.0" +nodeenv = ">=0.11.1" +pyyaml = ">=5.1" +toml = "*" +virtualenv = ">=20.0.8" + +[[package]] +name = "preshed" +version = "3.0.5" +description = "Cython hash table that trusts the keys are pre-hashed" +category = "main" +optional = false +python-versions = "*" + +[package.dependencies] +cymem = ">=2.0.2,<2.1.0" +murmurhash = ">=0.28.0,<1.1.0" + +[[package]] +name = "progressbar" +version = "2.5" +description = "Text progress bar library for Python." +category = "main" +optional = false +python-versions = "*" + +[[package]] +name = "prometheus-client" +version = "0.9.0" +description = "Python client for the Prometheus monitoring system." +category = "main" +optional = false +python-versions = "*" + +[package.extras] +twisted = ["twisted"] + +[[package]] +name = "prompt-toolkit" +version = "3.0.8" +description = "Library for building powerful interactive command lines in Python" +category = "main" +optional = false +python-versions = ">=3.6.1" + +[package.dependencies] +wcwidth = "*" + +[[package]] +name = "protobuf" +version = "3.14.0" +description = "Protocol Buffers" +category = "main" +optional = false +python-versions = "*" + +[package.dependencies] +six = ">=1.9" + +[[package]] +name = "ptyprocess" +version = "0.7.0" +description = "Run a subprocess in a pseudo terminal" +category = "main" +optional = false +python-versions = "*" + +[[package]] +name = "py" +version = "1.10.0" +description = "library with cross-python path, ini-parsing, io, code, log facilities" +category = "main" +optional = false +python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*" + +[[package]] +name = "py-rouge" +version = "1.1" +description = "Full Python implementation of the ROUGE metric, producing same results as in the official perl implementation." +category = "main" +optional = false +python-versions = "*" + +[[package]] +name = "pyahocorasick" +version = "1.4.0" +description = "pyahocorasick is a fast and memory efficient library for exact or approximate multi-pattern string search. With the ahocorasick.Automaton class, you can find multiple key strings occurrences at once in some input text. You can use it as a plain dict-like Trie or convert a Trie to an automaton for efficient Aho-Corasick search. Implemented in C and tested on Python 2.7 and 3.4+. Works on Linux, Mac and Windows. BSD-3-clause license." +category = "main" +optional = false +python-versions = "*" + +[[package]] +name = "pyarrow" +version = "2.0.0" +description = "Python library for Apache Arrow" +category = "main" +optional = false +python-versions = ">=3.5" + +[package.dependencies] +numpy = ">=1.14" + +[[package]] +name = "pyasn1" +version = "0.4.8" +description = "ASN.1 types and codecs" +category = "main" +optional = false +python-versions = "*" + +[[package]] +name = "pyasn1-modules" +version = "0.2.8" +description = "A collection of ASN.1-based protocols modules." +category = "main" +optional = false +python-versions = "*" + +[package.dependencies] +pyasn1 = ">=0.4.6,<0.5.0" + +[[package]] +name = "pycodestyle" +version = "2.6.0" +description = "Python style guide checker" +category = "dev" +optional = false +python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*" + +[[package]] +name = "pycparser" +version = "2.20" +description = "C parser in Python" +category = "main" +optional = false +python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*" + +[[package]] +name = "pyflakes" +version = "2.2.0" +description = "passive checker of Python programs" +category = "dev" +optional = false +python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*" + +[[package]] +name = "pygments" +version = "2.7.3" +description = "Pygments is a syntax highlighting package written in Python." +category = "main" +optional = false +python-versions = ">=3.5" + +[[package]] +name = "pyparsing" +version = "2.4.7" +description = "Python parsing module" +category = "main" +optional = false +python-versions = ">=2.6, !=3.0.*, !=3.1.*, !=3.2.*" + +[[package]] +name = "pyrsistent" +version = "0.17.3" +description = "Persistent/Functional/Immutable data structures" +category = "main" +optional = false +python-versions = ">=3.5" + +[[package]] +name = "pysocks" +version = "1.7.1" +description = "A Python SOCKS client module. See https://github.com/Anorov/PySocks for more information." +category = "main" +optional = false +python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*" + +[[package]] +name = "pytest" +version = "6.2.1" +description = "pytest: simple powerful testing with Python" +category = "main" +optional = false +python-versions = ">=3.6" + +[package.dependencies] +atomicwrites = {version = ">=1.0", markers = "sys_platform == \"win32\""} +attrs = ">=19.2.0" +colorama = {version = "*", markers = "sys_platform == \"win32\""} +iniconfig = "*" +packaging = "*" +pluggy = ">=0.12,<1.0.0a1" +py = ">=1.8.2" +toml = "*" + +[package.extras] +testing = ["argcomplete", "hypothesis (>=3.56)", "mock", "nose", "requests", "xmlschema"] + +[[package]] +name = "pytest-cov" +version = "2.10.1" +description = "Pytest plugin for measuring coverage." +category = "dev" +optional = false +python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*" + +[package.dependencies] +coverage = ">=4.4" +pytest = ">=4.6" + +[package.extras] +testing = ["fields", "hunter", "process-tests (==2.0.2)", "six", "pytest-xdist", "virtualenv"] + +[[package]] +name = "python-dateutil" +version = "2.8.1" +description = "Extensions to the standard Python datetime module" +category = "main" +optional = false +python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,>=2.7" + +[package.dependencies] +six = ">=1.5" + +[[package]] +name = "python-levenshtein" +version = "0.12.0" +description = "Python extension for computing string edit distances and similarities." +category = "main" +optional = false +python-versions = "*" + +[[package]] +name = "pytorch-lightning" +version = "1.1.2" +description = "PyTorch Lightning is the lightweight PyTorch wrapper for ML researchers. Scale your models. Write less boilerplate." +category = "main" +optional = false +python-versions = ">=3.6" + +[package.dependencies] +fsspec = ">=0.8.0" +future = ">=0.17.1" +numpy = ">=1.16.6" +PyYAML = ">=5.1" +tensorboard = ">=2.2.0" +torch = ">=1.3" +tqdm = ">=4.41.0" + +[package.extras] +all = ["matplotlib (>3.1)", "horovod (>=0.20.2)", "omegaconf (>=2.0.1)", "torchtext (>=0.3.1,<0.7)", "onnx (>=1.7.0)", "onnxruntime (>=1.3.0)", "hydra-core (>=1.0)", "neptune-client (>=0.4.109)", "comet-ml (>=3.1.12)", "mlflow (>=1.0.0)", "test-tube (>=0.7.5)", "wandb (>=0.8.21)", "coverage (>=5.0)", "codecov (>=2.1)", "pytest (>=5.0)", "flake8 (>=3.6)", "flake8-black", "check-manifest", "twine (==1.13.0)", "scikit-learn (>=0.22.2)", "scikit-image (>=0.17.2)", "black (>=20.8b1)", "isort (>=5.6.4)", "mypy (>=0.720)", "pre-commit (>=1.0)", "cloudpickle (>=1.3)", "nltk (>=3.3)", "pandas", "torchvision (>=0.4.1)", "gym (>=0.17.0)"] +cpu = ["matplotlib (>3.1)", "omegaconf (>=2.0.1)", "torchtext (>=0.3.1,<0.7)", "onnx (>=1.7.0)", "onnxruntime (>=1.3.0)", "hydra-core (>=1.0)", "neptune-client (>=0.4.109)", "comet-ml (>=3.1.12)", "mlflow (>=1.0.0)", "test-tube (>=0.7.5)", "wandb (>=0.8.21)", "coverage (>=5.0)", "codecov (>=2.1)", "pytest (>=5.0)", "flake8 (>=3.6)", "flake8-black", "check-manifest", "twine (==1.13.0)", "scikit-learn (>=0.22.2)", "scikit-image (>=0.17.2)", "black (>=20.8b1)", "isort (>=5.6.4)", "mypy (>=0.720)", "pre-commit (>=1.0)", "cloudpickle (>=1.3)", "nltk (>=3.3)", "pandas", "torchvision (>=0.4.1)", "gym (>=0.17.0)"] +cpu-extra = ["matplotlib (>3.1)", "omegaconf (>=2.0.1)", "torchtext (>=0.3.1,<0.7)", "onnx (>=1.7.0)", "onnxruntime (>=1.3.0)", "hydra-core (>=1.0)"] +dev = ["matplotlib (>3.1)", "horovod (>=0.20.2)", "omegaconf (>=2.0.1)", "torchtext (>=0.3.1,<0.7)", "onnx (>=1.7.0)", "onnxruntime (>=1.3.0)", "hydra-core (>=1.0)", "neptune-client (>=0.4.109)", "comet-ml (>=3.1.12)", "mlflow (>=1.0.0)", "test-tube (>=0.7.5)", "wandb (>=0.8.21)", "coverage (>=5.0)", "codecov (>=2.1)", "pytest (>=5.0)", "flake8 (>=3.6)", "flake8-black", "check-manifest", "twine (==1.13.0)", "scikit-learn (>=0.22.2)", "scikit-image (>=0.17.2)", "black (>=20.8b1)", "isort (>=5.6.4)", "mypy (>=0.720)", "pre-commit (>=1.0)", "cloudpickle (>=1.3)", "nltk (>=3.3)", "pandas"] +examples = ["torchvision (>=0.4.1)", "gym (>=0.17.0)"] +extra = ["matplotlib (>3.1)", "horovod (>=0.20.2)", "omegaconf (>=2.0.1)", "torchtext (>=0.3.1,<0.7)", "onnx (>=1.7.0)", "onnxruntime (>=1.3.0)", "hydra-core (>=1.0)"] +loggers = ["neptune-client (>=0.4.109)", "comet-ml (>=3.1.12)", "mlflow (>=1.0.0)", "test-tube (>=0.7.5)", "wandb (>=0.8.21)"] +test = ["coverage (>=5.0)", "codecov (>=2.1)", "pytest (>=5.0)", "flake8 (>=3.6)", "flake8-black", "check-manifest", "twine (==1.13.0)", "scikit-learn (>=0.22.2)", "scikit-image (>=0.17.2)", "black (>=20.8b1)", "isort (>=5.6.4)", "mypy (>=0.720)", "pre-commit (>=1.0)", "cloudpickle (>=1.3)", "nltk (>=3.3)", "pandas"] + +[[package]] +name = "pytz" +version = "2020.5" +description = "World timezone definitions, modern and historical" +category = "main" +optional = false +python-versions = "*" + +[[package]] +name = "pywin32" +version = "300" +description = "Python for Window Extensions" +category = "main" +optional = false +python-versions = "*" + +[[package]] +name = "pywinpty" +version = "0.5.7" +description = "Python bindings for the winpty library" +category = "main" +optional = false +python-versions = "*" + +[[package]] +name = "pyyaml" +version = "5.3.1" +description = "YAML parser and emitter for Python" +category = "main" +optional = false +python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*" + +[[package]] +name = "pyzmq" +version = "20.0.0" +description = "Python bindings for 0MQ" +category = "main" +optional = false +python-versions = ">=3.5" + +[package.dependencies] +cffi = {version = "*", markers = "implementation_name === \"pypy\""} +py = {version = "*", markers = "implementation_name === \"pypy\""} + +[[package]] +name = "recommonmark" +version = "0.7.1" +description = "A docutils-compatibility bridge to CommonMark, enabling you to write CommonMark inside of Docutils & Sphinx projects." +category = "dev" +optional = false +python-versions = "*" + +[package.dependencies] +commonmark = ">=0.8.1" +docutils = ">=0.11" +sphinx = ">=1.3.1" + +[[package]] +name = "regex" +version = "2020.11.13" +description = "Alternative regular expression module, to replace re." +category = "main" +optional = false +python-versions = "*" + +[[package]] +name = "requests" +version = "2.25.1" +description = "Python HTTP for Humans." +category = "main" +optional = false +python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*" + +[package.dependencies] +certifi = ">=2017.4.17" +chardet = ">=3.0.2,<5" +idna = ">=2.5,<3" +PySocks = {version = ">=1.5.6,<1.5.7 || >1.5.7", optional = true, markers = "extra == \"socks\""} +urllib3 = ">=1.21.1,<1.27" + +[package.extras] +security = ["pyOpenSSL (>=0.14)", "cryptography (>=1.3.4)"] +socks = ["PySocks (>=1.5.6,!=1.5.7)", "win-inet-pton"] + +[[package]] +name = "requests-oauthlib" +version = "1.3.0" +description = "OAuthlib authentication support for Requests." +category = "main" +optional = false +python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*" + +[package.dependencies] +oauthlib = ">=3.0.0" +requests = ">=2.0.0" + +[package.extras] +rsa = ["oauthlib[signedtoken] (>=3.0.0)"] + +[[package]] +name = "retrying" +version = "1.3.3" +description = "Retrying" +category = "main" +optional = false +python-versions = "*" + +[package.dependencies] +six = ">=1.7.0" + +[[package]] +name = "rouge-score" +version = "0.0.4" +description = "Pure python implementation of ROUGE-1.5.5." +category = "main" +optional = false +python-versions = ">=2.7" + +[package.dependencies] +absl-py = "*" +nltk = "*" +numpy = "*" +six = ">=1.14.0" + +[[package]] +name = "rsa" +version = "4.6" +description = "Pure-Python RSA implementation" +category = "main" +optional = false +python-versions = ">=3.5, <4" + +[package.dependencies] +pyasn1 = ">=0.1.3" + +[[package]] +name = "s3transfer" +version = "0.3.3" +description = "An Amazon S3 Transfer Manager" +category = "main" +optional = false +python-versions = "*" + +[package.dependencies] +botocore = ">=1.12.36,<2.0a.0" + +[[package]] +name = "sacremoses" +version = "0.0.43" +description = "SacreMoses" +category = "main" +optional = false +python-versions = "*" + +[package.dependencies] +click = "*" +joblib = "*" +regex = "*" +six = "*" +tqdm = "*" + +[[package]] +name = "scikit-learn" +version = "0.24.0" +description = "A set of python modules for machine learning and data mining" +category = "main" +optional = false +python-versions = ">=3.6" + +[package.dependencies] +joblib = ">=0.11" +numpy = ">=1.13.3" +scipy = ">=0.19.1" +threadpoolctl = ">=2.0.0" + +[package.extras] +benchmark = ["matplotlib (>=2.1.1)", "pandas (>=0.25.0)", "memory-profiler (>=0.57.0)"] +docs = ["matplotlib (>=2.1.1)", "scikit-image (>=0.13)", "pandas (>=0.25.0)", "seaborn (>=0.9.0)", "memory-profiler (>=0.57.0)", "sphinx (>=3.2.0)", "sphinx-gallery (>=0.7.0)", "numpydoc (>=1.0.0)", "Pillow (>=7.1.2)", "sphinx-prompt (>=1.3.0)"] +examples = ["matplotlib (>=2.1.1)", "scikit-image (>=0.13)", "pandas (>=0.25.0)", "seaborn (>=0.9.0)"] +tests = ["matplotlib (>=2.1.1)", "scikit-image (>=0.13)", "pandas (>=0.25.0)", "pytest (>=5.0.1)", "pytest-cov (>=2.9.0)", "flake8 (>=3.8.2)", "mypy (>=0.770)", "pyamg (>=4.0.0)"] + +[[package]] +name = "scipy" +version = "1.4.1" +description = "SciPy: Scientific Library for Python" +category = "main" +optional = false +python-versions = ">=3.5" + +[package.dependencies] +numpy = ">=1.13.3" + +[[package]] +name = "segtok" +version = "1.5.10" +description = "sentence segmentation and word tokenization tools" +category = "main" +optional = false +python-versions = "*" + +[package.dependencies] +regex = "*" + +[[package]] +name = "semver" +version = "2.13.0" +description = "Python helper for Semantic Versioning (http://semver.org/)" +category = "main" +optional = false +python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*" + +[[package]] +name = "send2trash" +version = "1.5.0" +description = "Send file to trash natively under Mac OS X, Windows and Linux." +category = "main" +optional = false +python-versions = "*" + +[[package]] +name = "sentencepiece" +version = "0.1.94" +description = "SentencePiece python wrapper" +category = "main" +optional = false +python-versions = "*" + +[[package]] +name = "six" +version = "1.15.0" +description = "Python 2 and 3 compatibility utilities" +category = "main" +optional = false +python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*" + +[[package]] +name = "smart-open" +version = "4.1.0" +description = "Utils for streaming large files (S3, HDFS, GCS, Azure Blob Storage, gzip, bz2...)" +category = "main" +optional = false +python-versions = ">=3.6.*" + +[package.extras] +all = ["boto3", "google-cloud-storage", "azure-storage-blob", "azure-common", "azure-core", "requests"] +azure = ["azure-storage-blob", "azure-common", "azure-core"] +gcp = ["google-cloud-storage"] +http = ["requests"] +s3 = ["boto3"] +test = ["boto3", "google-cloud-storage", "azure-storage-blob", "azure-common", "azure-core", "requests", "mock", "moto[server] (==1.3.14)", "pathlib2", "responses", "boto3", "paramiko", "parameterizedtestcase", "pytest", "pytest-rerunfailures"] +webhdfs = ["requests"] + +[[package]] +name = "sniffio" +version = "1.2.0" +description = "Sniff out which async library your code is running under" +category = "main" +optional = false +python-versions = ">=3.5" + +[[package]] +name = "snowballstemmer" +version = "2.0.0" +description = "This package provides 26 stemmers for 25 languages generated from Snowball algorithms." +category = "dev" +optional = false +python-versions = "*" + +[[package]] +name = "spacy" +version = "2.3.5" +description = "Industrial-strength Natural Language Processing (NLP) in Python" +category = "main" +optional = false +python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,!=3.4.*,>=2.7" + +[package.dependencies] +blis = ">=0.4.0,<0.8.0" +catalogue = ">=0.0.7,<1.1.0" +cymem = ">=2.0.2,<2.1.0" +murmurhash = ">=0.28.0,<1.1.0" +numpy = ">=1.15.0" +plac = ">=0.9.6,<1.2.0" +preshed = ">=3.0.2,<3.1.0" +requests = ">=2.13.0,<3.0.0" +srsly = ">=1.0.2,<1.1.0" +thinc = ">=7.4.1,<7.5.0" +tqdm = ">=4.38.0,<5.0.0" +wasabi = ">=0.4.0,<1.1.0" + +[package.extras] +cuda = ["cupy (>=5.0.0b4)"] +cuda100 = ["cupy-cuda100 (>=5.0.0b4)"] +cuda101 = ["cupy-cuda101 (>=5.0.0b4)"] +cuda102 = ["cupy-cuda102 (>=5.0.0b4)"] +cuda110 = ["cupy-cuda110 (>=5.0.0b4)"] +cuda111 = ["cupy-cuda111 (>=5.0.0b4)"] +cuda80 = ["cupy-cuda80 (>=5.0.0b4)"] +cuda90 = ["cupy-cuda90 (>=5.0.0b4)"] +cuda91 = ["cupy-cuda91 (>=5.0.0b4)"] +cuda92 = ["cupy-cuda92 (>=5.0.0b4)"] +ja = ["sudachipy (>=0.4.5)", "sudachidict-core (>=20200330)"] +ko = ["natto-py (==0.9.0)"] +lookups = ["spacy-lookups-data (>=0.3.2,<0.4.0)"] +th = ["pythainlp (>=2.0)"] + +[[package]] +name = "sphinx" +version = "3.4.2" +description = "Python documentation generator" +category = "dev" +optional = false +python-versions = ">=3.5" + +[package.dependencies] +alabaster = ">=0.7,<0.8" +babel = ">=1.3" +colorama = {version = ">=0.3.5", markers = "sys_platform == \"win32\""} +docutils = ">=0.12" +imagesize = "*" +Jinja2 = ">=2.3" +packaging = "*" +Pygments = ">=2.0" +requests = ">=2.5.0" +snowballstemmer = ">=1.1" +sphinxcontrib-applehelp = "*" +sphinxcontrib-devhelp = "*" +sphinxcontrib-htmlhelp = "*" +sphinxcontrib-jsmath = "*" +sphinxcontrib-qthelp = "*" +sphinxcontrib-serializinghtml = "*" + +[package.extras] +docs = ["sphinxcontrib-websupport"] +lint = ["flake8 (>=3.5.0)", "isort", "mypy (>=0.790)", "docutils-stubs"] +test = ["pytest", "pytest-cov", "html5lib", "cython", "typed-ast"] + +[[package]] +name = "sphinx-rtd-theme" +version = "0.5.1" +description = "Read the Docs theme for Sphinx" +category = "dev" +optional = false +python-versions = "*" + +[package.dependencies] +sphinx = "*" + +[package.extras] +dev = ["transifex-client", "sphinxcontrib-httpdomain", "bump2version"] + +[[package]] +name = "sphinxcontrib-applehelp" +version = "1.0.2" +description = "sphinxcontrib-applehelp is a sphinx extension which outputs Apple help books" +category = "dev" +optional = false +python-versions = ">=3.5" + +[package.extras] +lint = ["flake8", "mypy", "docutils-stubs"] +test = ["pytest"] + +[[package]] +name = "sphinxcontrib-devhelp" +version = "1.0.2" +description = "sphinxcontrib-devhelp is a sphinx extension which outputs Devhelp document." +category = "dev" +optional = false +python-versions = ">=3.5" + +[package.extras] +lint = ["flake8", "mypy", "docutils-stubs"] +test = ["pytest"] + +[[package]] +name = "sphinxcontrib-htmlhelp" +version = "1.0.3" +description = "sphinxcontrib-htmlhelp is a sphinx extension which renders HTML help files" +category = "dev" +optional = false +python-versions = ">=3.5" + +[package.extras] +lint = ["flake8", "mypy", "docutils-stubs"] +test = ["pytest", "html5lib"] + +[[package]] +name = "sphinxcontrib-jsmath" +version = "1.0.1" +description = "A sphinx extension which renders display math in HTML via JavaScript" +category = "dev" +optional = false +python-versions = ">=3.5" + +[package.extras] +test = ["pytest", "flake8", "mypy"] + +[[package]] +name = "sphinxcontrib-qthelp" +version = "1.0.3" +description = "sphinxcontrib-qthelp is a sphinx extension which outputs QtHelp document." +category = "dev" +optional = false +python-versions = ">=3.5" + +[package.extras] +lint = ["flake8", "mypy", "docutils-stubs"] +test = ["pytest"] + +[[package]] +name = "sphinxcontrib-serializinghtml" +version = "1.1.4" +description = "sphinxcontrib-serializinghtml is a sphinx extension which outputs \"serialized\" HTML files (json and pickle)." +category = "dev" +optional = false +python-versions = ">=3.5" + +[package.extras] +lint = ["flake8", "mypy", "docutils-stubs"] +test = ["pytest"] + +[[package]] +name = "sqlitedict" +version = "1.7.0" +description = "Persistent dict in Python, backed up by sqlite3 and pickle, multithread-safe." +category = "main" +optional = false +python-versions = "*" + +[[package]] +name = "srsly" +version = "1.0.5" +description = "Modern high-performance serialization utilities for Python" +category = "main" +optional = false +python-versions = "*" + +[[package]] +name = "stanza" +version = "1.1.1" +description = "A Python NLP Library for Many Human Languages, by the Stanford NLP Group" +category = "main" +optional = false +python-versions = ">=3.6" + +[package.dependencies] +numpy = "*" +protobuf = "*" +requests = "*" +torch = ">=1.3.0" +tqdm = "*" + +[package.extras] +dev = ["check-manifest"] +test = ["coverage"] + +[[package]] +name = "tabulate" +version = "0.8.7" +description = "Pretty-print tabular data" +category = "main" +optional = false +python-versions = "*" + +[package.extras] +widechars = ["wcwidth"] + +[[package]] +name = "tensorboard" +version = "2.4.0" +description = "TensorBoard lets you watch Tensors Flow" +category = "main" +optional = false +python-versions = ">= 2.7, != 3.0.*, != 3.1.*" + +[package.dependencies] +absl-py = ">=0.4" +google-auth = ">=1.6.3,<2" +google-auth-oauthlib = ">=0.4.1,<0.5" +grpcio = ">=1.24.3" +markdown = ">=2.6.8" +numpy = ">=1.12.0" +protobuf = ">=3.6.0" +requests = ">=2.21.0,<3" +six = ">=1.10.0" +tensorboard-plugin-wit = ">=1.6.0" +werkzeug = ">=0.11.15" + +[[package]] +name = "tensorboard-plugin-wit" +version = "1.7.0" +description = "What-If Tool TensorBoard plugin." +category = "main" +optional = false +python-versions = "*" + +[[package]] +name = "tensorboardx" +version = "2.1" +description = "TensorBoardX lets you watch Tensors Flow without Tensorflow" +category = "main" +optional = false +python-versions = "*" + +[package.dependencies] +numpy = "*" +protobuf = ">=3.8.0" +six = "*" + +[[package]] +name = "tensorflow" +version = "2.3.1" +description = "TensorFlow is an open source machine learning framework for everyone." +category = "main" +optional = false +python-versions = "*" + +[package.dependencies] +absl-py = ">=0.7.0" +astunparse = "1.6.3" +gast = "0.3.3" +google-pasta = ">=0.1.8" +grpcio = ">=1.8.6" +h5py = ">=2.10.0,<2.11.0" +keras-preprocessing = ">=1.1.1,<1.2" +numpy = ">=1.16.0,<1.19.0" +opt-einsum = ">=2.3.2" +protobuf = ">=3.9.2" +six = ">=1.12.0" +tensorboard = ">=2.3.0,<3" +tensorflow-estimator = ">=2.3.0,<2.4.0" +termcolor = ">=1.1.0" +wrapt = ">=1.11.1" + +[[package]] +name = "tensorflow-estimator" +version = "2.3.0" +description = "TensorFlow Estimator." +category = "main" +optional = false +python-versions = "*" + +[[package]] +name = "termcolor" +version = "1.1.0" +description = "ANSII Color formatting for output in terminal." +category = "main" +optional = false +python-versions = "*" + +[[package]] +name = "terminado" +version = "0.9.1" +description = "Tornado websocket backend for the Xterm.js Javascript terminal emulator library." +category = "main" +optional = false +python-versions = ">=3.6" + +[package.dependencies] +ptyprocess = {version = "*", markers = "os_name != \"nt\""} +pywinpty = {version = ">=0.5", markers = "os_name == \"nt\""} +tornado = ">=4" + +[[package]] +name = "terminaltables" +version = "3.1.0" +description = "Generate simple tables in terminals from a nested list of strings." +category = "main" +optional = false +python-versions = "*" + +[[package]] +name = "testpath" +version = "0.4.4" +description = "Test utilities for code working with files and commands" +category = "main" +optional = false +python-versions = "*" + +[package.extras] +test = ["pathlib2"] + +[[package]] +name = "textattack" +version = "0.2.15" +description = "A library for generating text adversarial examples" +category = "main" +optional = false +python-versions = ">=3.6" + +[package.dependencies] +bert-score = ">=0.3.5" +datasets = "*" +editdistance = "*" +filelock = "*" +flair = "0.6.1.post1" +language-tool-python = "*" +lemminflect = "*" +lru-dict = "*" +more-itertools = "*" +nltk = "*" +num2words = "*" +numpy = "<1.19.0" +pandas = ">=1.0.1" +PySocks = ">=1.5.6,<1.5.7 || >1.5.7" +scipy = "1.4.1" +terminaltables = "*" +torch = "*" +tqdm = ">=4.27,<4.50.0" +transformers = ">=3.3.0" +word2number = "*" + +[package.extras] +dev = ["recommonmark", "nbsphinx", "sphinx-autobuild", "sphinx-rtd-theme", "black (==20.8b1)", "docformatter", "isort (==5.6.4)", "flake8", "pytest", "pytest-xdist", "tensorflow (>=2)", "tensorflow-hub", "tensorflow-text (>=2)", "tensorboardx", "sentence-transformers (>0.2.6)", "stanza", "visdom", "wandb", "gensim (==3.8.3)"] +docs = ["recommonmark", "nbsphinx", "sphinx-autobuild", "sphinx-rtd-theme"] +optional = ["sentence-transformers (>0.2.6)", "stanza", "visdom", "wandb", "gensim (==3.8.3)"] +tensorflow = ["tensorflow (>=2)", "tensorflow-hub", "tensorflow-text (>=2)", "tensorboardx"] +test = ["black (==20.8b1)", "docformatter", "isort (==5.6.4)", "flake8", "pytest", "pytest-xdist"] + +[[package]] +name = "textblob" +version = "0.15.3" +description = "Simple, Pythonic text processing. Sentiment analysis, part-of-speech tagging, noun phrase parsing, and more." +category = "main" +optional = false +python-versions = "*" + +[package.dependencies] +nltk = ">=3.1" + +[[package]] +name = "thinc" +version = "7.4.5" +description = "Practical Machine Learning for NLP" +category = "main" +optional = false +python-versions = "*" + +[package.dependencies] +blis = ">=0.4.0,<0.8.0" +catalogue = ">=0.0.7,<1.1.0" +cymem = ">=2.0.2,<2.1.0" +murmurhash = ">=0.28.0,<1.1.0" +numpy = ">=1.15.0" +plac = ">=0.9.6,<1.2.0" +preshed = ">=1.0.1,<3.1.0" +srsly = ">=0.0.6,<1.1.0" +tqdm = ">=4.10.0,<5.0.0" +wasabi = ">=0.0.9,<1.1.0" + +[package.extras] +cuda = ["cupy (>=5.0.0b4)"] +cuda100 = ["cupy-cuda100 (>=5.0.0b4)"] +cuda101 = ["cupy-cuda101 (>=5.0.0b4)"] +cuda102 = ["cupy-cuda102 (>=5.0.0b4)"] +cuda110 = ["cupy-cuda110 (>=5.0.0b4)"] +cuda111 = ["cupy-cuda111 (>=5.0.0b4)"] +cuda80 = ["cupy-cuda80 (>=5.0.0b4)"] +cuda90 = ["cupy-cuda90 (>=5.0.0b4)"] +cuda91 = ["cupy-cuda91 (>=5.0.0b4)"] +cuda92 = ["cupy-cuda92 (>=5.0.0b4)"] + +[[package]] +name = "threadpoolctl" +version = "2.1.0" +description = "threadpoolctl" +category = "main" +optional = false +python-versions = ">=3.5" + +[[package]] +name = "tokenizers" +version = "0.9.4" +description = "Fast and Customizable Tokenizers" +category = "main" +optional = false +python-versions = "*" + +[package.extras] +testing = ["pytest"] + +[[package]] +name = "toml" +version = "0.10.2" +description = "Python Library for Tom's Obvious, Minimal Language" +category = "main" +optional = false +python-versions = ">=2.6, !=3.0.*, !=3.1.*, !=3.2.*" + +[[package]] +name = "toolz" +version = "0.11.1" +description = "List processing tools and functional utilities" +category = "main" +optional = false +python-versions = ">=3.5" + +[[package]] +name = "torch" +version = "1.7.1" +description = "Tensors and Dynamic neural networks in Python with strong GPU acceleration" +category = "main" +optional = false +python-versions = ">=3.6.2" + +[package.dependencies] +numpy = "*" +typing-extensions = "*" + +[[package]] +name = "tornado" +version = "6.1" +description = "Tornado is a Python web framework and asynchronous networking library, originally developed at FriendFeed." +category = "main" +optional = false +python-versions = ">= 3.5" + +[[package]] +name = "tqdm" +version = "4.49.0" +description = "Fast, Extensible Progress Meter" +category = "main" +optional = false +python-versions = ">=2.6, !=3.0.*, !=3.1.*" + +[package.extras] +dev = ["py-make (>=0.1.0)", "twine", "argopt", "pydoc-markdown"] + +[[package]] +name = "traitlets" +version = "5.0.5" +description = "Traitlets Python configuration system" +category = "main" +optional = false +python-versions = ">=3.7" + +[package.dependencies] +ipython-genutils = "*" + +[package.extras] +test = ["pytest"] + +[[package]] +name = "transformers" +version = "4.0.1" +description = "State-of-the-art Natural Language Processing for TensorFlow 2.0 and PyTorch" +category = "main" +optional = false +python-versions = ">=3.6.0" + +[package.dependencies] +filelock = "*" +numpy = "*" +packaging = "*" +regex = "!=2019.12.17" +requests = "*" +sacremoses = "*" +tokenizers = "0.9.4" +tqdm = ">=4.27" + +[package.extras] +all = ["tensorflow (>=2.0)", "onnxconverter-common", "keras2onnx", "torch (>=1.0)", "jaxlib (==0.1.55)", "jax (>=0.2.0)", "flax (==0.2.2)", "sentencepiece (==0.1.91)", "protobuf", "tokenizers (==0.9.4)"] +dev = ["tensorflow (>=2.0)", "onnxconverter-common", "keras2onnx", "torch (>=1.0)", "jaxlib (==0.1.55)", "jax (>=0.2.0)", "flax (==0.2.2)", "sentencepiece (==0.1.91)", "protobuf", "tokenizers (==0.9.4)", "pytest", "pytest-xdist", "timeout-decorator", "parameterized", "psutil", "faiss-cpu", "datasets", "cookiecutter (==1.7.2)", "black (>=20.8b1)", "isort (>=5.5.4)", "flake8 (>=3.8.3)", "fugashi (>=1.0)", "ipadic (>=1.0.0,<2.0)", "unidic-lite (>=1.0.7)", "unidic (>=1.0.2)", "recommonmark", "sphinx (==3.2.1)", "sphinx-markdown-tables", "sphinx-rtd-theme (==0.4.3)", "sphinx-copybutton", "scikit-learn"] +docs = ["recommonmark", "sphinx (==3.2.1)", "sphinx-markdown-tables", "sphinx-rtd-theme (==0.4.3)", "sphinx-copybutton"] +flax = ["jaxlib (==0.1.55)", "jax (>=0.2.0)", "flax (==0.2.2)"] +ja = ["fugashi (>=1.0)", "ipadic (>=1.0.0,<2.0)", "unidic-lite (>=1.0.7)", "unidic (>=1.0.2)"] +modelcreation = ["cookiecutter (==1.7.2)"] +onnxruntime = ["onnxruntime (>=1.4.0)", "onnxruntime-tools (>=1.4.2)"] +quality = ["black (>=20.8b1)", "isort (>=5.5.4)", "flake8 (>=3.8.3)"] +retrieval = ["faiss-cpu", "datasets"] +sentencepiece = ["sentencepiece (==0.1.91)", "protobuf"] +serving = ["pydantic", "uvicorn", "fastapi", "starlette"] +sklearn = ["scikit-learn"] +testing = ["pytest", "pytest-xdist", "timeout-decorator", "parameterized", "psutil", "faiss-cpu", "datasets", "cookiecutter (==1.7.2)"] +tf = ["tensorflow (>=2.0)", "onnxconverter-common", "keras2onnx"] +tf-cpu = ["tensorflow-cpu (>=2.0)", "onnxconverter-common", "keras2onnx"] +tokenizers = ["tokenizers (==0.9.4)"] +torch = ["torch (>=1.0)"] + +[[package]] +name = "typed-ast" +version = "1.4.2" +description = "a fork of Python 2 and 3 ast modules with type comment support" +category = "dev" +optional = false +python-versions = "*" + +[[package]] +name = "typing-extensions" +version = "3.7.4.3" +description = "Backported and Experimental Type Hints for Python 3.5+" +category = "main" +optional = false +python-versions = "*" + +[[package]] +name = "untokenize" +version = "0.1.1" +description = "Transforms tokens into original source code (while preserving whitespace)." +category = "dev" +optional = false +python-versions = "*" + +[[package]] +name = "urllib3" +version = "1.26.2" +description = "HTTP library with thread-safe connection pooling, file post, and more." +category = "main" +optional = false +python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*, <4" + +[package.extras] +brotli = ["brotlipy (>=0.6.0)"] +secure = ["pyOpenSSL (>=0.14)", "cryptography (>=1.3.4)", "idna (>=2.0.0)", "certifi", "ipaddress"] +socks = ["PySocks (>=1.5.6,!=1.5.7,<2.0)"] + +[[package]] +name = "virtualenv" +version = "20.2.2" +description = "Virtual Python Environment builder" +category = "dev" +optional = false +python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,>=2.7" + +[package.dependencies] +appdirs = ">=1.4.3,<2" +distlib = ">=0.3.1,<1" +filelock = ">=3.0.0,<4" +six = ">=1.9.0,<2" + +[package.extras] +docs = ["proselint (>=0.10.2)", "sphinx (>=3)", "sphinx-argparse (>=0.2.5)", "sphinx-rtd-theme (>=0.4.3)", "towncrier (>=19.9.0rc1)"] +testing = ["coverage (>=4)", "coverage-enable-subprocess (>=1)", "flaky (>=3)", "pytest (>=4)", "pytest-env (>=0.6.2)", "pytest-freezegun (>=0.4.1)", "pytest-mock (>=2)", "pytest-randomly (>=1)", "pytest-timeout (>=1)", "pytest-xdist (>=1.31.0)", "packaging (>=20.0)", "xonsh (>=0.9.16)"] + +[[package]] +name = "wasabi" +version = "0.8.0" +description = "A lightweight console printing and formatting toolkit" +category = "main" +optional = false +python-versions = "*" + +[[package]] +name = "wcwidth" +version = "0.2.5" +description = "Measures the displayed width of unicode strings in a terminal" +category = "main" +optional = false +python-versions = "*" + +[[package]] +name = "webencodings" +version = "0.5.1" +description = "Character encoding aliases for legacy web content" +category = "main" +optional = false +python-versions = "*" + +[[package]] +name = "werkzeug" +version = "1.0.1" +description = "The comprehensive WSGI web application library." +category = "main" +optional = false +python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*" + +[package.extras] +dev = ["pytest", "pytest-timeout", "coverage", "tox", "sphinx", "pallets-sphinx-themes", "sphinx-issues"] +watchdog = ["watchdog"] + +[[package]] +name = "widgetsnbextension" +version = "3.5.1" +description = "IPython HTML widgets for Jupyter" +category = "main" +optional = false +python-versions = "*" + +[package.dependencies] +notebook = ">=4.4.1" + +[[package]] +name = "word2number" +version = "1.1" +description = "Convert number words eg. three hundred and forty two to numbers (342)." +category = "main" +optional = false +python-versions = "*" + +[[package]] +name = "wrapt" +version = "1.12.1" +description = "Module for decorators, wrappers and monkey patching." +category = "main" +optional = false +python-versions = "*" + +[[package]] +name = "xxhash" +version = "2.0.0" +description = "Python binding for xxHash" +category = "main" +optional = false +python-versions = ">=2.6, !=3.0.*, !=3.1.*, !=3.2.*" + +[metadata] +lock-version = "1.1" +python-versions = "^3.8" +content-hash = "aed6f272e080444b9a328036617fce09105ddc0dbbf32a2d23541b3bdc54c231" + +[metadata.files] +absl-py = [ + {file = "absl-py-0.11.0.tar.gz", hash = "sha256:673cccb88d810e5627d0c1c818158485d106f65a583880e2f730c997399bcfa7"}, + {file = "absl_py-0.11.0-py3-none-any.whl", hash = "sha256:b3d9eb5119ff6e0a0125f6dabf2f9fae02f8acae7be70576002fac27235611c5"}, +] +alabaster = [ + {file = "alabaster-0.7.12-py2.py3-none-any.whl", hash = "sha256:446438bdcca0e05bd45ea2de1668c1d9b032e1a9154c2c259092d77031ddd359"}, + {file = "alabaster-0.7.12.tar.gz", hash = "sha256:a661d72d58e6ea8a57f7a86e37d86716863ee5e92788398526d58b26a4e4dc02"}, +] +allennlp = [ + {file = "allennlp-1.3.0-py3-none-any.whl", hash = "sha256:d24f43b07ac68fcf0b76ecc0a0239144a132a383a1f45a7aa2fa91159eb0d5a2"}, + {file = "allennlp-1.3.0.tar.gz", hash = "sha256:72c35c6996d3981335c10686bc2ff25e0f5b6b60617ab1dd26d5bcf74877137e"}, +] +allennlp-models = [ + {file = "allennlp_models-1.3.0-py3-none-any.whl", hash = "sha256:7e46daeec9228d519ec6fa46e170da8b43493590216046fe15406ea9dcddd87d"}, + {file = "allennlp_models-1.3.0.tar.gz", hash = "sha256:a3eb79cccea97da3ef43ceff0fde958e08a93f05e5bb1c6f36ce65b9a516aed1"}, +] +antlr4-python3-runtime = [ + {file = "antlr4-python3-runtime-4.8.tar.gz", hash = "sha256:15793f5d0512a372b4e7d2284058ad32ce7dd27126b105fb0b2245130445db33"}, +] +anyio = [ + {file = "anyio-2.0.2-py3-none-any.whl", hash = "sha256:01cce0087b8fd8b6b7e629dc11505dcde02f916ce903332892cb2ae9817b597d"}, + {file = "anyio-2.0.2.tar.gz", hash = "sha256:35075abd32cf20fd7e0be2fee3614e80b92d5392eba257c8d2f33de3df7ca237"}, +] +appdirs = [ + {file = "appdirs-1.4.4-py2.py3-none-any.whl", hash = "sha256:a841dacd6b99318a741b166adb07e19ee71a274450e68237b4650ca1055ab128"}, + {file = "appdirs-1.4.4.tar.gz", hash = "sha256:7d5d0167b2b1ba821647616af46a749d1c653740dd0d2415100fe26e27afdf41"}, +] +appnope = [ + {file = "appnope-0.1.2-py2.py3-none-any.whl", hash = "sha256:93aa393e9d6c54c5cd570ccadd8edad61ea0c4b9ea7a01409020c9aa019eb442"}, + {file = "appnope-0.1.2.tar.gz", hash = "sha256:dd83cd4b5b460958838f6eb3000c660b1f9caf2a5b1de4264e941512f603258a"}, +] +argon2-cffi = [ + {file = "argon2-cffi-20.1.0.tar.gz", hash = "sha256:d8029b2d3e4b4cea770e9e5a0104dd8fa185c1724a0f01528ae4826a6d25f97d"}, + {file = "argon2_cffi-20.1.0-cp27-cp27m-macosx_10_6_intel.whl", hash = "sha256:6ea92c980586931a816d61e4faf6c192b4abce89aa767ff6581e6ddc985ed003"}, + {file = "argon2_cffi-20.1.0-cp27-cp27m-manylinux1_x86_64.whl", hash = "sha256:05a8ac07c7026542377e38389638a8a1e9b78f1cd8439cd7493b39f08dd75fbf"}, + {file = "argon2_cffi-20.1.0-cp27-cp27m-win32.whl", hash = "sha256:0bf066bc049332489bb2d75f69216416329d9dc65deee127152caeb16e5ce7d5"}, + {file = "argon2_cffi-20.1.0-cp27-cp27m-win_amd64.whl", hash = "sha256:57358570592c46c420300ec94f2ff3b32cbccd10d38bdc12dc6979c4a8484fbc"}, + {file = "argon2_cffi-20.1.0-cp27-cp27mu-manylinux1_x86_64.whl", hash = "sha256:7d455c802727710e9dfa69b74ccaab04568386ca17b0ad36350b622cd34606fe"}, + {file = "argon2_cffi-20.1.0-cp35-abi3-manylinux1_x86_64.whl", hash = "sha256:b160416adc0f012fb1f12588a5e6954889510f82f698e23ed4f4fa57f12a0647"}, + {file = "argon2_cffi-20.1.0-cp35-cp35m-win32.whl", hash = "sha256:9bee3212ba4f560af397b6d7146848c32a800652301843df06b9e8f68f0f7361"}, + {file = "argon2_cffi-20.1.0-cp35-cp35m-win_amd64.whl", hash = "sha256:392c3c2ef91d12da510cfb6f9bae52512a4552573a9e27600bdb800e05905d2b"}, + {file = "argon2_cffi-20.1.0-cp36-cp36m-win32.whl", hash = "sha256:ba7209b608945b889457f949cc04c8e762bed4fe3fec88ae9a6b7765ae82e496"}, + {file = "argon2_cffi-20.1.0-cp36-cp36m-win_amd64.whl", hash = "sha256:da7f0445b71db6d3a72462e04f36544b0de871289b0bc8a7cc87c0f5ec7079fa"}, + {file = "argon2_cffi-20.1.0-cp37-abi3-macosx_10_6_intel.whl", hash = "sha256:cc0e028b209a5483b6846053d5fd7165f460a1f14774d79e632e75e7ae64b82b"}, + {file = "argon2_cffi-20.1.0-cp37-cp37m-win32.whl", hash = "sha256:18dee20e25e4be86680b178b35ccfc5d495ebd5792cd00781548d50880fee5c5"}, + {file = "argon2_cffi-20.1.0-cp37-cp37m-win_amd64.whl", hash = "sha256:6678bb047373f52bcff02db8afab0d2a77d83bde61cfecea7c5c62e2335cb203"}, + {file = "argon2_cffi-20.1.0-cp38-cp38-win32.whl", hash = "sha256:77e909cc756ef81d6abb60524d259d959bab384832f0c651ed7dcb6e5ccdbb78"}, + {file = "argon2_cffi-20.1.0-cp38-cp38-win_amd64.whl", hash = "sha256:9dfd5197852530294ecb5795c97a823839258dfd5eb9420233c7cfedec2058f2"}, + {file = "argon2_cffi-20.1.0-cp39-cp39-win32.whl", hash = "sha256:e2db6e85c057c16d0bd3b4d2b04f270a7467c147381e8fd73cbbe5bc719832be"}, + {file = "argon2_cffi-20.1.0-cp39-cp39-win_amd64.whl", hash = "sha256:8a84934bd818e14a17943de8099d41160da4a336bcc699bb4c394bbb9b94bd32"}, +] +astunparse = [ + {file = "astunparse-1.6.3-py2.py3-none-any.whl", hash = "sha256:c2652417f2c8b5bb325c885ae329bdf3f86424075c4fd1a128674bc6fba4b8e8"}, + {file = "astunparse-1.6.3.tar.gz", hash = "sha256:5ad93a8456f0d084c3456d059fd9a92cce667963232cbf763eac3bc5b7940872"}, +] +async-generator = [ + {file = "async_generator-1.10-py3-none-any.whl", hash = "sha256:01c7bf666359b4967d2cda0000cc2e4af16a0ae098cbffcb8472fb9e8ad6585b"}, + {file = "async_generator-1.10.tar.gz", hash = "sha256:6ebb3d106c12920aaae42ccb6f787ef5eefdcdd166ea3d628fa8476abe712144"}, +] +atomicwrites = [ + {file = "atomicwrites-1.4.0-py2.py3-none-any.whl", hash = "sha256:6d1784dea7c0c8d4a5172b6c620f40b6e4cbfdf96d783691f2e1302a7b88e197"}, + {file = "atomicwrites-1.4.0.tar.gz", hash = "sha256:ae70396ad1a434f9c7046fd2dd196fc04b12f9e91ffb859164193be8b6168a7a"}, +] +attrs = [ + {file = "attrs-20.3.0-py2.py3-none-any.whl", hash = "sha256:31b2eced602aa8423c2aea9c76a724617ed67cf9513173fd3a4f03e3a929c7e6"}, + {file = "attrs-20.3.0.tar.gz", hash = "sha256:832aa3cde19744e49938b91fea06d69ecb9e649c93ba974535d08ad92164f700"}, +] +babel = [ + {file = "Babel-2.9.0-py2.py3-none-any.whl", hash = "sha256:9d35c22fcc79893c3ecc85ac4a56cde1ecf3f19c540bba0922308a6c06ca6fa5"}, + {file = "Babel-2.9.0.tar.gz", hash = "sha256:da031ab54472314f210b0adcff1588ee5d1d1d0ba4dbd07b94dba82bde791e05"}, +] +backcall = [ + {file = "backcall-0.2.0-py2.py3-none-any.whl", hash = "sha256:fbbce6a29f263178a1f7915c1940bde0ec2b2a967566fe1c65c1dfb7422bd255"}, + {file = "backcall-0.2.0.tar.gz", hash = "sha256:5cbdbf27be5e7cfadb448baf0aa95508f91f2bbc6c6437cd9cd06e2a4c215e1e"}, +] +bert-score = [ + {file = "bert_score-0.3.7-py3-none-any.whl", hash = "sha256:7cb962d01280fdcc597d588604df4bd0472cd0d694cfbb48aeae60348f163765"}, + {file = "bert_score-0.3.7.tar.gz", hash = "sha256:6ae0dafb2e2d92dc313f9c6bf6bcd7d4d9af6bf7bc53086e40b9a87f2140cc67"}, +] +black = [ + {file = "black-20.8b1.tar.gz", hash = "sha256:1c02557aa099101b9d21496f8a914e9ed2222ef70336404eeeac8edba836fbea"}, +] +bleach = [ + {file = "bleach-3.2.1-py2.py3-none-any.whl", hash = "sha256:9f8ccbeb6183c6e6cddea37592dfb0167485c1e3b13b3363bc325aa8bda3adbd"}, + {file = "bleach-3.2.1.tar.gz", hash = "sha256:52b5919b81842b1854196eaae5ca29679a2f2e378905c346d3ca8227c2c66080"}, +] +blis = [ + {file = "blis-0.7.4-cp36-cp36m-macosx_10_9_x86_64.whl", hash = "sha256:5b403deb2ad5515e1edb3c0867bccb5b974b461f24283d9219a3a761fd6dacc6"}, + {file = "blis-0.7.4-cp36-cp36m-manylinux2014_x86_64.whl", hash = "sha256:9f9b829480c12fc834549306821e5c51cb28b216ca5f88c5b2cfedbeb9daf67d"}, + {file = "blis-0.7.4-cp36-cp36m-win_amd64.whl", hash = "sha256:c2d8064217c326dd9a0dcbae294ffe8557263e2a00d76101ffa222b9c9d9c62d"}, + {file = "blis-0.7.4-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:d717b5dea407aac89a646908e7d9849105abab9c88a539c120518c200f899f4e"}, + {file = "blis-0.7.4-cp37-cp37m-manylinux2014_x86_64.whl", hash = "sha256:5ecddc4c6daf80558154b091db0a9839bb15dbe65d2906a543a73b93fbce4f73"}, + {file = "blis-0.7.4-cp37-cp37m-win_amd64.whl", hash = "sha256:6814991b3e3193db4f9b2417174c6f24b9c0197409d864fa7628583bd2df1f0f"}, + {file = "blis-0.7.4-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:4222bbc7b9c47bc3cf6f36f2241862c1512ca7ebac3828267a2e05ef6c47fc54"}, + {file = "blis-0.7.4-cp38-cp38-manylinux2014_x86_64.whl", hash = "sha256:445e4838b809e99677f5c0982fb9af320f0d91328fb28c8097e5f1173c4df9d6"}, + {file = "blis-0.7.4-cp38-cp38-win_amd64.whl", hash = "sha256:94890b2296f1449baa56aede46627ea7fc8de11c788f9c261ee38c2eb4a2cc7d"}, + {file = "blis-0.7.4-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:168fd7bd763ebe529aa25a066d3a6b89f4c3f492f6297f881df6942741b95787"}, + {file = "blis-0.7.4-cp39-cp39-manylinux2014_x86_64.whl", hash = "sha256:5c1a2023f7d8431daa8d87d32f539bb23e1a009500c37f9eba0ac7b3f20f73eb"}, + {file = "blis-0.7.4-cp39-cp39-win_amd64.whl", hash = "sha256:78a8e0ee72a42c3b2f5b9114500a781119995f76fa6c21d4b02c6fb9c21df2cc"}, + {file = "blis-0.7.4.tar.gz", hash = "sha256:7daa615a97d4f28db0f332b710bfe1900b15d0c25841c6d727965e4fd91e09cf"}, +] +boto3 = [ + {file = "boto3-1.16.47-py2.py3-none-any.whl", hash = "sha256:50c2475cc6c38f7ff24c3e0ca8f7eaf787ce740499198043e05e6f13ac2e919f"}, + {file = "boto3-1.16.47.tar.gz", hash = "sha256:05796ba6c65f79214ea61becae5126d5c924eed8a11874bc5536d611deabbe47"}, +] +botocore = [ + {file = "botocore-1.19.47-py2.py3-none-any.whl", hash = "sha256:4989ff6ca4104f641d966f6bb2f3c4207f1a7a8d879b2e21224c7713dd9dc9b8"}, + {file = "botocore-1.19.47.tar.gz", hash = "sha256:15584a86d6cb1f94ea785e8d3c98faeff8ddd0105356e1c106118d9ac12fa891"}, +] +bpemb = [ + {file = "bpemb-0.3.2-py3-none-any.whl", hash = "sha256:2a84d0ef963221d01a41b2a4c5d26ef2978992d6558e7289bbe58f7fd03a6446"}, + {file = "bpemb-0.3.2.tar.gz", hash = "sha256:7ef2564f656ec48c4621f555c4431c4ec71900f3c45c8d2203f9c78446e8391c"}, +] +cachetools = [ + {file = "cachetools-4.2.0-py3-none-any.whl", hash = "sha256:c6b07a6ded8c78bf36730b3dc452dfff7d95f2a12a2fed856b1a0cb13ca78c61"}, + {file = "cachetools-4.2.0.tar.gz", hash = "sha256:3796e1de094f0eaca982441c92ce96c68c89cced4cd97721ab297ea4b16db90e"}, +] +catalogue = [ + {file = "catalogue-1.0.0-py2.py3-none-any.whl", hash = "sha256:584d78e7f4c3c6e2fd498eb56dfc8ef1f4ff738480237de2ccd26cbe2cf47172"}, + {file = "catalogue-1.0.0.tar.gz", hash = "sha256:d74d1d856c6b36a37bf14aa6dbbc27d0582667b7ab979a6108e61a575e8723f5"}, +] +certifi = [ + {file = "certifi-2020.12.5-py2.py3-none-any.whl", hash = "sha256:719a74fb9e33b9bd44cc7f3a8d94bc35e4049deebe19ba7d8e108280cfd59830"}, + {file = "certifi-2020.12.5.tar.gz", hash = "sha256:1a4995114262bffbc2413b159f2a1a480c969de6e6eb13ee966d470af86af59c"}, +] +cffi = [ + {file = "cffi-1.14.4-cp27-cp27m-macosx_10_9_x86_64.whl", hash = "sha256:ebb253464a5d0482b191274f1c8bf00e33f7e0b9c66405fbffc61ed2c839c775"}, + {file = "cffi-1.14.4-cp27-cp27m-manylinux1_i686.whl", hash = "sha256:2c24d61263f511551f740d1a065eb0212db1dbbbbd241db758f5244281590c06"}, + {file = "cffi-1.14.4-cp27-cp27m-manylinux1_x86_64.whl", hash = "sha256:9f7a31251289b2ab6d4012f6e83e58bc3b96bd151f5b5262467f4bb6b34a7c26"}, + {file = "cffi-1.14.4-cp27-cp27m-win32.whl", hash = "sha256:5cf4be6c304ad0b6602f5c4e90e2f59b47653ac1ed9c662ed379fe48a8f26b0c"}, + {file = "cffi-1.14.4-cp27-cp27m-win_amd64.whl", hash = "sha256:f60567825f791c6f8a592f3c6e3bd93dd2934e3f9dac189308426bd76b00ef3b"}, + {file = "cffi-1.14.4-cp27-cp27mu-manylinux1_i686.whl", hash = "sha256:c6332685306b6417a91b1ff9fae889b3ba65c2292d64bd9245c093b1b284809d"}, + {file = "cffi-1.14.4-cp27-cp27mu-manylinux1_x86_64.whl", hash = "sha256:d9efd8b7a3ef378dd61a1e77367f1924375befc2eba06168b6ebfa903a5e59ca"}, + {file = "cffi-1.14.4-cp35-cp35m-macosx_10_9_x86_64.whl", hash = "sha256:51a8b381b16ddd370178a65360ebe15fbc1c71cf6f584613a7ea08bfad946698"}, + {file = "cffi-1.14.4-cp35-cp35m-manylinux1_i686.whl", hash = "sha256:1d2c4994f515e5b485fd6d3a73d05526aa0fcf248eb135996b088d25dfa1865b"}, + {file = "cffi-1.14.4-cp35-cp35m-manylinux1_x86_64.whl", hash = "sha256:af5c59122a011049aad5dd87424b8e65a80e4a6477419c0c1015f73fb5ea0293"}, + {file = "cffi-1.14.4-cp35-cp35m-win32.whl", hash = "sha256:594234691ac0e9b770aee9fcdb8fa02c22e43e5c619456efd0d6c2bf276f3eb2"}, + {file = "cffi-1.14.4-cp35-cp35m-win_amd64.whl", hash = "sha256:64081b3f8f6f3c3de6191ec89d7dc6c86a8a43911f7ecb422c60e90c70be41c7"}, + {file = "cffi-1.14.4-cp36-cp36m-macosx_10_9_x86_64.whl", hash = "sha256:f803eaa94c2fcda012c047e62bc7a51b0bdabda1cad7a92a522694ea2d76e49f"}, + {file = "cffi-1.14.4-cp36-cp36m-manylinux1_i686.whl", hash = "sha256:105abaf8a6075dc96c1fe5ae7aae073f4696f2905fde6aeada4c9d2926752362"}, + {file = "cffi-1.14.4-cp36-cp36m-manylinux1_x86_64.whl", hash = "sha256:0638c3ae1a0edfb77c6765d487fee624d2b1ee1bdfeffc1f0b58c64d149e7eec"}, + {file = "cffi-1.14.4-cp36-cp36m-manylinux2014_aarch64.whl", hash = "sha256:7c6b1dece89874d9541fc974917b631406233ea0440d0bdfbb8e03bf39a49b3b"}, + {file = "cffi-1.14.4-cp36-cp36m-win32.whl", hash = "sha256:155136b51fd733fa94e1c2ea5211dcd4c8879869008fc811648f16541bf99668"}, + {file = "cffi-1.14.4-cp36-cp36m-win_amd64.whl", hash = "sha256:6bc25fc545a6b3d57b5f8618e59fc13d3a3a68431e8ca5fd4c13241cd70d0009"}, + {file = "cffi-1.14.4-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:a7711edca4dcef1a75257b50a2fbfe92a65187c47dab5a0f1b9b332c5919a3fb"}, + {file = "cffi-1.14.4-cp37-cp37m-manylinux1_i686.whl", hash = "sha256:00e28066507bfc3fe865a31f325c8391a1ac2916219340f87dfad602c3e48e5d"}, + {file = "cffi-1.14.4-cp37-cp37m-manylinux1_x86_64.whl", hash = "sha256:798caa2a2384b1cbe8a2a139d80734c9db54f9cc155c99d7cc92441a23871c03"}, + {file = "cffi-1.14.4-cp37-cp37m-manylinux2014_aarch64.whl", hash = "sha256:a5ed8c05548b54b998b9498753fb9cadbfd92ee88e884641377d8a8b291bcc01"}, + {file = "cffi-1.14.4-cp37-cp37m-win32.whl", hash = "sha256:00a1ba5e2e95684448de9b89888ccd02c98d512064b4cb987d48f4b40aa0421e"}, + {file = "cffi-1.14.4-cp37-cp37m-win_amd64.whl", hash = "sha256:9cc46bc107224ff5b6d04369e7c595acb700c3613ad7bcf2e2012f62ece80c35"}, + {file = "cffi-1.14.4-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:df5169c4396adc04f9b0a05f13c074df878b6052430e03f50e68adf3a57aa28d"}, + {file = "cffi-1.14.4-cp38-cp38-manylinux1_i686.whl", hash = "sha256:9ffb888f19d54a4d4dfd4b3f29bc2c16aa4972f1c2ab9c4ab09b8ab8685b9c2b"}, + {file = "cffi-1.14.4-cp38-cp38-manylinux1_x86_64.whl", hash = "sha256:8d6603078baf4e11edc4168a514c5ce5b3ba6e3e9c374298cb88437957960a53"}, + {file = "cffi-1.14.4-cp38-cp38-manylinux2014_aarch64.whl", hash = "sha256:d5ff0621c88ce83a28a10d2ce719b2ee85635e85c515f12bac99a95306da4b2e"}, + {file = "cffi-1.14.4-cp38-cp38-win32.whl", hash = "sha256:b4e248d1087abf9f4c10f3c398896c87ce82a9856494a7155823eb45a892395d"}, + {file = "cffi-1.14.4-cp38-cp38-win_amd64.whl", hash = "sha256:ec80dc47f54e6e9a78181ce05feb71a0353854cc26999db963695f950b5fb375"}, + {file = "cffi-1.14.4-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:840793c68105fe031f34d6a086eaea153a0cd5c491cde82a74b420edd0a2b909"}, + {file = "cffi-1.14.4-cp39-cp39-manylinux1_i686.whl", hash = "sha256:b18e0a9ef57d2b41f5c68beefa32317d286c3d6ac0484efd10d6e07491bb95dd"}, + {file = "cffi-1.14.4-cp39-cp39-manylinux1_x86_64.whl", hash = "sha256:045d792900a75e8b1e1b0ab6787dd733a8190ffcf80e8c8ceb2fb10a29ff238a"}, + {file = "cffi-1.14.4-cp39-cp39-win32.whl", hash = "sha256:ba4e9e0ae13fc41c6b23299545e5ef73055213e466bd107953e4a013a5ddd7e3"}, + {file = "cffi-1.14.4-cp39-cp39-win_amd64.whl", hash = "sha256:f032b34669220030f905152045dfa27741ce1a6db3324a5bc0b96b6c7420c87b"}, + {file = "cffi-1.14.4.tar.gz", hash = "sha256:1a465cbe98a7fd391d47dce4b8f7e5b921e6cd805ef421d04f5f66ba8f06086c"}, +] +cfgv = [ + {file = "cfgv-3.2.0-py2.py3-none-any.whl", hash = "sha256:32e43d604bbe7896fe7c248a9c2276447dbef840feb28fe20494f62af110211d"}, + {file = "cfgv-3.2.0.tar.gz", hash = "sha256:cf22deb93d4bcf92f345a5c3cd39d3d41d6340adc60c78bbbd6588c384fda6a1"}, +] +chardet = [ + {file = "chardet-4.0.0-py2.py3-none-any.whl", hash = "sha256:f864054d66fd9118f2e67044ac8981a54775ec5b67aed0441892edb553d21da5"}, + {file = "chardet-4.0.0.tar.gz", hash = "sha256:0d6f53a15db4120f2b08c94f11e7d93d2c911ee118b6b30a04ec3ee8310179fa"}, +] +click = [ + {file = "click-7.1.2-py2.py3-none-any.whl", hash = "sha256:dacca89f4bfadd5de3d7489b7c8a566eee0d3676333fbb50030263894c38c0dc"}, + {file = "click-7.1.2.tar.gz", hash = "sha256:d2b5255c7c6349bc1bd1e59e08cd12acbbd63ce649f2588755783aa94dfb6b1a"}, +] +cloudpickle = [ + {file = "cloudpickle-1.6.0-py3-none-any.whl", hash = "sha256:3a32d0eb0bc6f4d0c57fbc4f3e3780f7a81e6fee0fa935072884d58ae8e1cc7c"}, + {file = "cloudpickle-1.6.0.tar.gz", hash = "sha256:9bc994f9e9447593bd0a45371f0e7ac7333710fcf64a4eb9834bf149f4ef2f32"}, +] +colorama = [ + {file = "colorama-0.4.4-py2.py3-none-any.whl", hash = "sha256:9f47eda37229f68eee03b24b9748937c7dc3868f906e8ba69fbcbdd3bc5dc3e2"}, + {file = "colorama-0.4.4.tar.gz", hash = "sha256:5941b2b48a20143d2267e95b1c2a7603ce057ee39fd88e7329b0c292aa16869b"}, +] +commonmark = [ + {file = "commonmark-0.9.1-py2.py3-none-any.whl", hash = "sha256:da2f38c92590f83de410ba1a3cbceafbc74fee9def35f9251ba9a971d6d66fd9"}, + {file = "commonmark-0.9.1.tar.gz", hash = "sha256:452f9dc859be7f06631ddcb328b6919c67984aca654e5fefb3914d54691aed60"}, +] +conllu = [ + {file = "conllu-4.2.1-py2.py3-none-any.whl", hash = "sha256:9dd850d2993191cc3c477665f0eabe78494b57563447f52c9ad6ee3f3f3b3523"}, + {file = "conllu-4.2.1.tar.gz", hash = "sha256:5ae05d3e5410068df4ba8fcd5194b67215fbf72ac018a5c6a73cbb5ccb8d12f7"}, +] +coverage = [ + {file = "coverage-5.3.1-cp27-cp27m-macosx_10_9_x86_64.whl", hash = "sha256:fabeeb121735d47d8eab8671b6b031ce08514c86b7ad8f7d5490a7b6dcd6267d"}, + {file = "coverage-5.3.1-cp27-cp27m-manylinux1_i686.whl", hash = "sha256:7e4d159021c2029b958b2363abec4a11db0ce8cd43abb0d9ce44284cb97217e7"}, + {file = "coverage-5.3.1-cp27-cp27m-manylinux1_x86_64.whl", hash = "sha256:378ac77af41350a8c6b8801a66021b52da8a05fd77e578b7380e876c0ce4f528"}, + {file = "coverage-5.3.1-cp27-cp27m-manylinux2010_i686.whl", hash = "sha256:e448f56cfeae7b1b3b5bcd99bb377cde7c4eb1970a525c770720a352bc4c8044"}, + {file = "coverage-5.3.1-cp27-cp27m-manylinux2010_x86_64.whl", hash = "sha256:cc44e3545d908ecf3e5773266c487ad1877be718d9dc65fc7eb6e7d14960985b"}, + {file = "coverage-5.3.1-cp27-cp27m-win32.whl", hash = "sha256:08b3ba72bd981531fd557f67beee376d6700fba183b167857038997ba30dd297"}, + {file = "coverage-5.3.1-cp27-cp27m-win_amd64.whl", hash = "sha256:8dacc4073c359f40fcf73aede8428c35f84639baad7e1b46fce5ab7a8a7be4bb"}, + {file = "coverage-5.3.1-cp27-cp27mu-manylinux1_i686.whl", hash = "sha256:ee2f1d1c223c3d2c24e3afbb2dd38be3f03b1a8d6a83ee3d9eb8c36a52bee899"}, + {file = "coverage-5.3.1-cp27-cp27mu-manylinux1_x86_64.whl", hash = "sha256:9a9d4ff06804920388aab69c5ea8a77525cf165356db70131616acd269e19b36"}, + {file = "coverage-5.3.1-cp27-cp27mu-manylinux2010_i686.whl", hash = "sha256:782a5c7df9f91979a7a21792e09b34a658058896628217ae6362088b123c8500"}, + {file = "coverage-5.3.1-cp27-cp27mu-manylinux2010_x86_64.whl", hash = "sha256:fda29412a66099af6d6de0baa6bd7c52674de177ec2ad2630ca264142d69c6c7"}, + {file = "coverage-5.3.1-cp35-cp35m-macosx_10_9_x86_64.whl", hash = "sha256:f2c6888eada180814b8583c3e793f3f343a692fc802546eed45f40a001b1169f"}, + {file = "coverage-5.3.1-cp35-cp35m-manylinux1_i686.whl", hash = "sha256:8f33d1156241c43755137288dea619105477961cfa7e47f48dbf96bc2c30720b"}, + {file = "coverage-5.3.1-cp35-cp35m-manylinux1_x86_64.whl", hash = "sha256:b239711e774c8eb910e9b1ac719f02f5ae4bf35fa0420f438cdc3a7e4e7dd6ec"}, + {file = "coverage-5.3.1-cp35-cp35m-manylinux2010_i686.whl", hash = "sha256:f54de00baf200b4539a5a092a759f000b5f45fd226d6d25a76b0dff71177a714"}, + {file = "coverage-5.3.1-cp35-cp35m-manylinux2010_x86_64.whl", hash = "sha256:be0416074d7f253865bb67630cf7210cbc14eb05f4099cc0f82430135aaa7a3b"}, + {file = "coverage-5.3.1-cp35-cp35m-win32.whl", hash = "sha256:c46643970dff9f5c976c6512fd35768c4a3819f01f61169d8cdac3f9290903b7"}, + {file = "coverage-5.3.1-cp35-cp35m-win_amd64.whl", hash = "sha256:9a4f66259bdd6964d8cf26142733c81fb562252db74ea367d9beb4f815478e72"}, + {file = "coverage-5.3.1-cp36-cp36m-macosx_10_9_x86_64.whl", hash = "sha256:c6e5174f8ca585755988bc278c8bb5d02d9dc2e971591ef4a1baabdf2d99589b"}, + {file = "coverage-5.3.1-cp36-cp36m-manylinux1_i686.whl", hash = "sha256:3911c2ef96e5ddc748a3c8b4702c61986628bb719b8378bf1e4a6184bbd48fe4"}, + {file = "coverage-5.3.1-cp36-cp36m-manylinux1_x86_64.whl", hash = "sha256:c5ec71fd4a43b6d84ddb88c1df94572479d9a26ef3f150cef3dacefecf888105"}, + {file = "coverage-5.3.1-cp36-cp36m-manylinux2010_i686.whl", hash = "sha256:f51dbba78d68a44e99d484ca8c8f604f17e957c1ca09c3ebc2c7e3bbd9ba0448"}, + {file = "coverage-5.3.1-cp36-cp36m-manylinux2010_x86_64.whl", hash = "sha256:a2070c5affdb3a5e751f24208c5c4f3d5f008fa04d28731416e023c93b275277"}, + {file = "coverage-5.3.1-cp36-cp36m-win32.whl", hash = "sha256:535dc1e6e68fad5355f9984d5637c33badbdc987b0c0d303ee95a6c979c9516f"}, + {file = "coverage-5.3.1-cp36-cp36m-win_amd64.whl", hash = "sha256:a4857f7e2bc6921dbd487c5c88b84f5633de3e7d416c4dc0bb70256775551a6c"}, + {file = "coverage-5.3.1-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:fac3c432851038b3e6afe086f777732bcf7f6ebbfd90951fa04ee53db6d0bcdd"}, + {file = "coverage-5.3.1-cp37-cp37m-manylinux1_i686.whl", hash = "sha256:cd556c79ad665faeae28020a0ab3bda6cd47d94bec48e36970719b0b86e4dcf4"}, + {file = "coverage-5.3.1-cp37-cp37m-manylinux1_x86_64.whl", hash = "sha256:a66ca3bdf21c653e47f726ca57f46ba7fc1f260ad99ba783acc3e58e3ebdb9ff"}, + {file = "coverage-5.3.1-cp37-cp37m-manylinux2010_i686.whl", hash = "sha256:ab110c48bc3d97b4d19af41865e14531f300b482da21783fdaacd159251890e8"}, + {file = "coverage-5.3.1-cp37-cp37m-manylinux2010_x86_64.whl", hash = "sha256:e52d3d95df81c8f6b2a1685aabffadf2d2d9ad97203a40f8d61e51b70f191e4e"}, + {file = "coverage-5.3.1-cp37-cp37m-win32.whl", hash = "sha256:fa10fee7e32213f5c7b0d6428ea92e3a3fdd6d725590238a3f92c0de1c78b9d2"}, + {file = "coverage-5.3.1-cp37-cp37m-win_amd64.whl", hash = "sha256:ce6f3a147b4b1a8b09aae48517ae91139b1b010c5f36423fa2b866a8b23df879"}, + {file = "coverage-5.3.1-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:93a280c9eb736a0dcca19296f3c30c720cb41a71b1f9e617f341f0a8e791a69b"}, + {file = "coverage-5.3.1-cp38-cp38-manylinux1_i686.whl", hash = "sha256:3102bb2c206700a7d28181dbe04d66b30780cde1d1c02c5f3c165cf3d2489497"}, + {file = "coverage-5.3.1-cp38-cp38-manylinux1_x86_64.whl", hash = "sha256:8ffd4b204d7de77b5dd558cdff986a8274796a1e57813ed005b33fd97e29f059"}, + {file = "coverage-5.3.1-cp38-cp38-manylinux2010_i686.whl", hash = "sha256:a607ae05b6c96057ba86c811d9c43423f35e03874ffb03fbdcd45e0637e8b631"}, + {file = "coverage-5.3.1-cp38-cp38-manylinux2010_x86_64.whl", hash = "sha256:3a3c3f8863255f3c31db3889f8055989527173ef6192a283eb6f4db3c579d830"}, + {file = "coverage-5.3.1-cp38-cp38-win32.whl", hash = "sha256:ff1330e8bc996570221b450e2d539134baa9465f5cb98aff0e0f73f34172e0ae"}, + {file = "coverage-5.3.1-cp38-cp38-win_amd64.whl", hash = "sha256:3498b27d8236057def41de3585f317abae235dd3a11d33e01736ffedb2ef8606"}, + {file = "coverage-5.3.1-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:ceb499d2b3d1d7b7ba23abe8bf26df5f06ba8c71127f188333dddcf356b4b63f"}, + {file = "coverage-5.3.1-cp39-cp39-manylinux1_i686.whl", hash = "sha256:3b14b1da110ea50c8bcbadc3b82c3933974dbeea1832e814aab93ca1163cd4c1"}, + {file = "coverage-5.3.1-cp39-cp39-manylinux1_x86_64.whl", hash = "sha256:76b2775dda7e78680d688daabcb485dc87cf5e3184a0b3e012e1d40e38527cc8"}, + {file = "coverage-5.3.1-cp39-cp39-manylinux2010_i686.whl", hash = "sha256:cef06fb382557f66d81d804230c11ab292d94b840b3cb7bf4450778377b592f4"}, + {file = "coverage-5.3.1-cp39-cp39-manylinux2010_x86_64.whl", hash = "sha256:6f61319e33222591f885c598e3e24f6a4be3533c1d70c19e0dc59e83a71ce27d"}, + {file = "coverage-5.3.1-cp39-cp39-win32.whl", hash = "sha256:cc6f8246e74dd210d7e2b56c76ceaba1cc52b025cd75dbe96eb48791e0250e98"}, + {file = "coverage-5.3.1-cp39-cp39-win_amd64.whl", hash = "sha256:2757fa64e11ec12220968f65d086b7a29b6583d16e9a544c889b22ba98555ef1"}, + {file = "coverage-5.3.1-pp36-none-any.whl", hash = "sha256:723d22d324e7997a651478e9c5a3120a0ecbc9a7e94071f7e1954562a8806cf3"}, + {file = "coverage-5.3.1-pp37-none-any.whl", hash = "sha256:c89b558f8a9a5a6f2cfc923c304d49f0ce629c3bd85cb442ca258ec20366394c"}, + {file = "coverage-5.3.1.tar.gz", hash = "sha256:38f16b1317b8dd82df67ed5daa5f5e7c959e46579840d77a67a4ceb9cef0a50b"}, +] +cycler = [ + {file = "cycler-0.10.0-py2.py3-none-any.whl", hash = "sha256:1d8a5ae1ff6c5cf9b93e8811e581232ad8920aeec647c37316ceac982b08cb2d"}, + {file = "cycler-0.10.0.tar.gz", hash = "sha256:cd7b2d1018258d7247a71425e9f26463dfb444d411c39569972f4ce586b0c9d8"}, +] +cymem = [ + {file = "cymem-2.0.5-cp36-cp36m-macosx_10_9_x86_64.whl", hash = "sha256:9d72d69f7a62a280199c3aa7bc550685c47d6d0689b2d299e6492253b86d2437"}, + {file = "cymem-2.0.5-cp36-cp36m-manylinux2014_x86_64.whl", hash = "sha256:8ea57e6923f40eb51012352161bb5707c14a5a5ce901ff72021e59df06221655"}, + {file = "cymem-2.0.5-cp36-cp36m-win_amd64.whl", hash = "sha256:4bd023c2477198b39b660c2a6b0242880649765ecee8461688a57fd4afd2bfc0"}, + {file = "cymem-2.0.5-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:1f0eb9b3d03623dcfc746cf8bff0663b0e347f4aea759965c8932087a0307ee9"}, + {file = "cymem-2.0.5-cp37-cp37m-manylinux2014_x86_64.whl", hash = "sha256:a440d63577fcdc9c528c9cc026b7b4f8648193bac462bc0596c9eac10f9fba62"}, + {file = "cymem-2.0.5-cp37-cp37m-win_amd64.whl", hash = "sha256:3d48902d7441645835fefc7832df49feb5362c7300d182475b63a01d25ae44ef"}, + {file = "cymem-2.0.5-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:f2167c9959fcd639b95d51fa5efaa7c61eef8d686cb75a25412a914f428ce980"}, + {file = "cymem-2.0.5-cp38-cp38-manylinux2014_x86_64.whl", hash = "sha256:734d82d0d03c2ceb929bc1744c04dbe0a105e68a4947c8406056a36f86c41830"}, + {file = "cymem-2.0.5-cp38-cp38-win_amd64.whl", hash = "sha256:01d3ea159f7a3f3192b1e800ed8207dac7586794d903a153198b9ea317f144bc"}, + {file = "cymem-2.0.5-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:d307f7f6230d861a938837cae4b855226b6845a21c010242a15e9ce6853856cd"}, + {file = "cymem-2.0.5-cp39-cp39-manylinux2014_x86_64.whl", hash = "sha256:ce1e81c1d031f56b67bac2136e73b4512cbc794706cd570178972d54ba6115d8"}, + {file = "cymem-2.0.5-cp39-cp39-win_amd64.whl", hash = "sha256:d19f68b90411e02ab33b1654118337f96f41c13a3cd00c4f44f7abed2bc712e7"}, + {file = "cymem-2.0.5.tar.gz", hash = "sha256:190e15d9cf2c3bde60ae37bddbae6568a36044dc4a326d84081a5fa08818eee0"}, +] +cython = [ + {file = "Cython-0.29.21-cp27-cp27m-macosx_10_9_x86_64.whl", hash = "sha256:c541b2b49c6638f2b5beb9316726db84a8d1c132bf31b942dae1f9c7f6ad3b92"}, + {file = "Cython-0.29.21-cp27-cp27m-manylinux1_i686.whl", hash = "sha256:b8d8497091c1dc8705d1575c71e908a93b1f127a174b2d472020f3d84263ac28"}, + {file = "Cython-0.29.21-cp27-cp27m-manylinux1_x86_64.whl", hash = "sha256:695a6bcaf9e12b1e471dfce96bbecf22a1487adc2ac6106b15960a2b51b97f5d"}, + {file = "Cython-0.29.21-cp27-cp27m-win32.whl", hash = "sha256:171b9f70ceafcec5852089d0f9c1e75b0d554f46c882cd4e2e4acaba9bd7d148"}, + {file = "Cython-0.29.21-cp27-cp27m-win_amd64.whl", hash = "sha256:539e59949aab4955c143a468810123bf22d3e8556421e1ce2531ed4893914ca0"}, + {file = "Cython-0.29.21-cp27-cp27mu-manylinux1_i686.whl", hash = "sha256:e93acd1f603a0c1786e0841f066ae7cef014cf4750e3cd06fd03cfdf46361419"}, + {file = "Cython-0.29.21-cp27-cp27mu-manylinux1_x86_64.whl", hash = "sha256:2922e3031ba9ebbe7cb9200b585cc33b71d66023d78450dcb883f824f4969371"}, + {file = "Cython-0.29.21-cp34-cp34m-manylinux1_i686.whl", hash = "sha256:497841897942f734b0abc2dead2d4009795ee992267a70a23485fd0e937edc0b"}, + {file = "Cython-0.29.21-cp34-cp34m-manylinux1_x86_64.whl", hash = "sha256:0ac10bf476476a9f7ef61ec6e44c280ef434473124ad31d3132b720f7b0e8d2a"}, + {file = "Cython-0.29.21-cp34-cp34m-win32.whl", hash = "sha256:31c71a615f38401b0dc1f2a5a9a6c421ffd8908c4cd5bbedc4014c1b876488e8"}, + {file = "Cython-0.29.21-cp34-cp34m-win_amd64.whl", hash = "sha256:c4b78356074fcaac04ecb4de289f11d506e438859877670992ece11f9c90f37b"}, + {file = "Cython-0.29.21-cp35-cp35m-macosx_10_6_intel.whl", hash = "sha256:b2f9172e4d6358f33ecce6a4339b5960f9f83eab67ea244baa812737793826b7"}, + {file = "Cython-0.29.21-cp35-cp35m-manylinux1_i686.whl", hash = "sha256:856c7fb31d247ce713d60116375e1f8153d0291ab5e92cca7d8833a524ba9991"}, + {file = "Cython-0.29.21-cp35-cp35m-manylinux1_x86_64.whl", hash = "sha256:715294cd2246b39a8edca464a8366eb635f17213e4a6b9e74e52d8b877a8cb63"}, + {file = "Cython-0.29.21-cp35-cp35m-win32.whl", hash = "sha256:23f3a00b843a19de8bb4468b087db5b413a903213f67188729782488d67040e0"}, + {file = "Cython-0.29.21-cp35-cp35m-win_amd64.whl", hash = "sha256:ccb77faeaad99e99c6c444d04862c6cf604204fe0a07d4c8f9cbf2c9012d7d5a"}, + {file = "Cython-0.29.21-cp36-cp36m-macosx_10_9_x86_64.whl", hash = "sha256:e272ed97d20b026f4f25a012b25d7d7672a60e4f72b9ca385239d693cd91b2d5"}, + {file = "Cython-0.29.21-cp36-cp36m-manylinux1_i686.whl", hash = "sha256:8c6e25e9cc4961bb2abb1777c6fa9d0fa2d9b014beb3276cebe69996ff162b78"}, + {file = "Cython-0.29.21-cp36-cp36m-manylinux1_x86_64.whl", hash = "sha256:57ead89128dee9609119c93d3926c7a2add451453063147900408a50144598c6"}, + {file = "Cython-0.29.21-cp36-cp36m-win32.whl", hash = "sha256:0e25c209c75df8785480dcef85db3d36c165dbc0f4c503168e8763eb735704f2"}, + {file = "Cython-0.29.21-cp36-cp36m-win_amd64.whl", hash = "sha256:a0674f246ad5e1571ef29d4c5ec1d6ecabe9e6c424ad0d6fee46b914d5d24d69"}, + {file = "Cython-0.29.21-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:5da187bebe38030325e1c0b5b8a804d489410be2d384c0ef3ba39493c67eb51e"}, + {file = "Cython-0.29.21-cp37-cp37m-manylinux1_i686.whl", hash = "sha256:9ce5e5209f8406ffc2b058b1293cce7a954911bb7991e623564d489197c9ba30"}, + {file = "Cython-0.29.21-cp37-cp37m-manylinux1_x86_64.whl", hash = "sha256:5e545a48f919e40079b0efe7b0e081c74b96f9ef25b9c1ff4cdbd95764426b58"}, + {file = "Cython-0.29.21-cp37-cp37m-win32.whl", hash = "sha256:c8435959321cf8aec867bbad54b83b7fb8343204b530d85d9ea7a1f5329d5ac2"}, + {file = "Cython-0.29.21-cp37-cp37m-win_amd64.whl", hash = "sha256:540b3bee0711aac2e99bda4fa0a46dbcd8c74941666bfc1ef9236b1a64eeffd9"}, + {file = "Cython-0.29.21-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:93f5fed1c9445fb7afe20450cdaf94b0e0356d47cc75008105be89c6a2e417b1"}, + {file = "Cython-0.29.21-cp38-cp38-manylinux1_i686.whl", hash = "sha256:9207fdedc7e789a3dcaca628176b80c82fbed9ae0997210738cbb12536a56699"}, + {file = "Cython-0.29.21-cp38-cp38-manylinux1_x86_64.whl", hash = "sha256:603b9f1b8e93e8b494d3e89320c410679e21018e48b6cbc77280f5db71f17dc0"}, + {file = "Cython-0.29.21-cp38-cp38-win32.whl", hash = "sha256:473df5d5e400444a36ed81c6596f56a5b52a3481312d0a48d68b777790f730ae"}, + {file = "Cython-0.29.21-cp38-cp38-win_amd64.whl", hash = "sha256:b8a8a31b9e8860634adbca30fea1d0c7f08e208b3d7611f3e580e5f20992e5d7"}, + {file = "Cython-0.29.21-cp39-cp39-manylinux1_i686.whl", hash = "sha256:7ebaa8800c376bcdae596fb1372cb4232a5ef957619d35839520d2786f2debb9"}, + {file = "Cython-0.29.21-cp39-cp39-manylinux1_x86_64.whl", hash = "sha256:c111ac9abdf715762e4fb87395e59d61c0fbb6ce79eb2e24167700b6cfa8ba79"}, + {file = "Cython-0.29.21-py2.py3-none-any.whl", hash = "sha256:5c4276fdcbccdf1e3c1756c7aeb8395e9a36874fa4d30860e7694f43d325ae13"}, + {file = "Cython-0.29.21.tar.gz", hash = "sha256:e57acb89bd55943c8d8bf813763d20b9099cc7165c0f16b707631a7654be9cad"}, +] +cytoolz = [ + {file = "cytoolz-0.11.0.tar.gz", hash = "sha256:c64f3590c3eb40e1548f0d3c6b2ccde70493d0b8dc6cc7f9f3fec0bb3dcd4222"}, +] +datasets = [ + {file = "datasets-1.1.3-py3-none-any.whl", hash = "sha256:2c5bbd3abd563da4d770c26de4296ed8b6033837f46dc833f4bcdf893aa4ffbc"}, + {file = "datasets-1.1.3.tar.gz", hash = "sha256:40261f45806ebe003194bb6d14b3f59a6f1e7f9e347e78b662e1ab979ace7e9c"}, +] +decorator = [ + {file = "decorator-4.4.2-py2.py3-none-any.whl", hash = "sha256:41fa54c2a0cc4ba648be4fd43cff00aedf5b9465c9bf18d64325bc225f08f760"}, + {file = "decorator-4.4.2.tar.gz", hash = "sha256:e3a62f0520172440ca0dcc823749319382e377f37f140a0b99ef45fecb84bfe7"}, +] +defusedxml = [ + {file = "defusedxml-0.6.0-py2.py3-none-any.whl", hash = "sha256:6687150770438374ab581bb7a1b327a847dd9c5749e396102de3fad4e8a3ef93"}, + {file = "defusedxml-0.6.0.tar.gz", hash = "sha256:f684034d135af4c6cbb949b8a4d2ed61634515257a67299e5f940fbaa34377f5"}, +] +deprecated = [ + {file = "Deprecated-1.2.10-py2.py3-none-any.whl", hash = "sha256:a766c1dccb30c5f6eb2b203f87edd1d8588847709c78589e1521d769addc8218"}, + {file = "Deprecated-1.2.10.tar.gz", hash = "sha256:525ba66fb5f90b07169fdd48b6373c18f1ee12728ca277ca44567a367d9d7f74"}, +] +dill = [ + {file = "dill-0.3.3-py2.py3-none-any.whl", hash = "sha256:78370261be6ea49037ace8c17e0b7dd06d0393af6513cc23f9b222d9367ce389"}, + {file = "dill-0.3.3.zip", hash = "sha256:efb7f6cb65dba7087c1e111bb5390291ba3616741f96840bfc75792a1a9b5ded"}, +] +distlib = [ + {file = "distlib-0.3.1-py2.py3-none-any.whl", hash = "sha256:8c09de2c67b3e7deef7184574fc060ab8a793e7adbb183d942c389c8b13c52fb"}, + {file = "distlib-0.3.1.zip", hash = "sha256:edf6116872c863e1aa9d5bb7cb5e05a022c519a4594dc703843343a9ddd9bff1"}, +] +docformatter = [ + {file = "docformatter-1.4.tar.gz", hash = "sha256:064e6d81f04ac96bc0d176cbaae953a0332482b22d3ad70d47c8a7f2732eef6f"}, +] +docopt = [ + {file = "docopt-0.6.2.tar.gz", hash = "sha256:49b3a825280bd66b3aa83585ef59c4a8c82f2c8a522dbe754a8bc8d08c85c491"}, +] +docutils = [ + {file = "docutils-0.16-py2.py3-none-any.whl", hash = "sha256:0c5b78adfbf7762415433f5515cd5c9e762339e23369dbe8000d84a4bf4ab3af"}, + {file = "docutils-0.16.tar.gz", hash = "sha256:c2de3a60e9e7d07be26b7f2b00ca0309c207e06c100f9cc2a94931fc75a478fc"}, +] +editdistance = [ + {file = "editdistance-0.5.3-cp27-cp27m-macosx_10_6_intel.whl", hash = "sha256:ef4714dc9cf281863dcc3ba6d24c3cae1dde41610a78dcdfae50d743ca71d5e1"}, + {file = "editdistance-0.5.3-cp27-cp27m-manylinux1_i686.whl", hash = "sha256:a322354a8dfb442770902f06552b20df5184e65e84ac90cb799740915eb52212"}, + {file = "editdistance-0.5.3-cp27-cp27m-manylinux1_x86_64.whl", hash = "sha256:36a4c36d7945f5ecfa1dc92c08635d73b64769cd0af066da774437fe2c7dc80a"}, + {file = "editdistance-0.5.3-cp27-cp27m-win32.whl", hash = "sha256:93e847cc2fbebb34a36b41337a3eb9b2034d4ff9679665b08ecc5c3c313f83a9"}, + {file = "editdistance-0.5.3-cp27-cp27m-win_amd64.whl", hash = "sha256:d4561b602b7675f6a050cdd0e1b652007ce73bb7290019487b8919a44593d74d"}, + {file = "editdistance-0.5.3-cp27-cp27mu-manylinux1_i686.whl", hash = "sha256:dddb0d36f698e3c942d0d5934185533d9324fbde975b3e956a19883713e86d33"}, + {file = "editdistance-0.5.3-cp27-cp27mu-manylinux1_x86_64.whl", hash = "sha256:1018f0fa857b079c721583c42d2c54800fbe8c7d2c29b354a9724a0b79971cb8"}, + {file = "editdistance-0.5.3-cp33-cp33m-win32.whl", hash = "sha256:810d93e614f35ad2916570f48ff1370ac3c001eb6941d5e836e2c1c6986fafff"}, + {file = "editdistance-0.5.3-cp33-cp33m-win_amd64.whl", hash = "sha256:a96ac49acc7668477c13aff02ca0527c6462b026b78600602dbef04efc9250d3"}, + {file = "editdistance-0.5.3-cp34-cp34m-macosx_10_6_intel.whl", hash = "sha256:a9167d9d5e754abd7ce68da065a636cc161e5063c322efd81159d15001d5272a"}, + {file = "editdistance-0.5.3-cp34-cp34m-manylinux1_i686.whl", hash = "sha256:a10c61df748220b2b9e2949a10aea23ffeded28c07e610e107a8f6a4b5b92782"}, + {file = "editdistance-0.5.3-cp34-cp34m-manylinux1_x86_64.whl", hash = "sha256:6452d750fbc49c6f04232a840f96b0f1155ff7cb2d953ce1edf075c5a394f3ea"}, + {file = "editdistance-0.5.3-cp34-cp34m-win32.whl", hash = "sha256:1f510e6eb411ec6123ba4ebc086d5882027710d28db174985a74e13fd0eb354f"}, + {file = "editdistance-0.5.3-cp34-cp34m-win_amd64.whl", hash = "sha256:9d6ee66f8de30ec6358083e5ecd7919a5966b38c64012c1672f326c61ff7a15f"}, + {file = "editdistance-0.5.3-cp35-cp35m-macosx_10_6_intel.whl", hash = "sha256:c1cf5ff98cfdc38046ae0f2d3ccbe1e15b0665234a04783f6558ec0a48e72dc8"}, + {file = "editdistance-0.5.3-cp35-cp35m-manylinux1_i686.whl", hash = "sha256:5f9c202b1a2f2630f7a0cdd76ad0ad55de4cd700553778c77e37379c6ac8e8bb"}, + {file = "editdistance-0.5.3-cp35-cp35m-manylinux1_x86_64.whl", hash = "sha256:553fb295802c399f0f419b616b499c241ffdcb2a70888d1e9d1bd22ba21b122f"}, + {file = "editdistance-0.5.3-cp35-cp35m-win32.whl", hash = "sha256:0834826832e51a6c18032b13b68083e3ebfbf3daf774142ae6f2b17b35580c16"}, + {file = "editdistance-0.5.3-cp35-cp35m-win_amd64.whl", hash = "sha256:6ccfd57221bae661304e7f9495f508aeec8f72e462d97481d55488ded87f5cbc"}, + {file = "editdistance-0.5.3-cp36-cp36m-macosx_10_6_intel.whl", hash = "sha256:25b39c836347dcbb251a6041fd3d7575b82c365923a4b13c32c699e442b1b644"}, + {file = "editdistance-0.5.3-cp36-cp36m-manylinux1_i686.whl", hash = "sha256:fa0047a8d972ab779141eed4713811251c9f6e96e9e8a62caa8d554a0444ff74"}, + {file = "editdistance-0.5.3-cp36-cp36m-manylinux1_x86_64.whl", hash = "sha256:db65bf1f39964019040434cb924c62c9965bd0df2feb316dbe5de3f09e6a81de"}, + {file = "editdistance-0.5.3-cp36-cp36m-win32.whl", hash = "sha256:cc65c2cd68751a966f7468537b4a6fd7d9107d49e139d8efd5734ee6f48d3126"}, + {file = "editdistance-0.5.3-cp36-cp36m-win_amd64.whl", hash = "sha256:fe7e6a90476976d7e5abc9472acb0311b7cdc76d84190f8f6c317234680c5de3"}, + {file = "editdistance-0.5.3-cp37-cp37m-macosx_10_6_intel.whl", hash = "sha256:25dd59d7f17a38203c5e433f5b11f64a8d1042d876d0dc00b324dda060d12e81"}, + {file = "editdistance-0.5.3-cp37-cp37m-manylinux1_i686.whl", hash = "sha256:61486173447a153cccbd52eb63947378803f0f2a5bffebbfec500bd77fc5706d"}, + {file = "editdistance-0.5.3-cp37-cp37m-manylinux1_x86_64.whl", hash = "sha256:cd49e9b22972b15527d53e06918c14d9fe228ae362a57476d16b0cad3e14e0c8"}, + {file = "editdistance-0.5.3-cp37-cp37m-win32.whl", hash = "sha256:503c6f69f4901d8a63f3748e4b0eccb2a89e6844b0879a7e256cab439297d379"}, + {file = "editdistance-0.5.3-cp37-cp37m-win_amd64.whl", hash = "sha256:ee4ed815bc5137a794095368580334e430ff26c73a05c67e76b39f535b363a0f"}, + {file = "editdistance-0.5.3.tar.gz", hash = "sha256:89d016dda04649b2c49e12b34337755a7b612bfd690420edd50ab31787120c1f"}, +] +entrypoints = [ + {file = "entrypoints-0.3-py2.py3-none-any.whl", hash = "sha256:589f874b313739ad35be6e0cd7efde2a4e9b6fea91edcc34e58ecbb8dbe56d19"}, + {file = "entrypoints-0.3.tar.gz", hash = "sha256:c70dd71abe5a8c85e55e12c19bd91ccfeec11a6e99044204511f9ed547d48451"}, +] +fastbpe = [ + {file = "fastBPE-0.1.0.tar.gz", hash = "sha256:95eef4be2689e822a918ac4eae3349cd78ca3f28af591afa421f8fac6d4cd889"}, +] +filelock = [ + {file = "filelock-3.0.12-py3-none-any.whl", hash = "sha256:929b7d63ec5b7d6b71b0fa5ac14e030b3f70b75747cef1b10da9b879fef15836"}, + {file = "filelock-3.0.12.tar.gz", hash = "sha256:18d82244ee114f543149c66a6e0c14e9c4f8a1044b5cdaadd0f82159d6a6ff59"}, +] +flair = [ + {file = "flair-0.6.1.post1-py3-none-any.whl", hash = "sha256:8a98fb98c8248b478da90ae52d4ed3bb10738f518b49e9e26234fe8b429c9bbe"}, + {file = "flair-0.6.1.post1.tar.gz", hash = "sha256:8e9809705bcf6ed7e058d47f34314540461b22fcf816a32a66c8e18564c76a99"}, +] +flake8 = [ + {file = "flake8-3.8.4-py2.py3-none-any.whl", hash = "sha256:749dbbd6bfd0cf1318af27bf97a14e28e5ff548ef8e5b1566ccfb25a11e7c839"}, + {file = "flake8-3.8.4.tar.gz", hash = "sha256:aadae8761ec651813c24be05c6f7b4680857ef6afaae4651a4eccaef97ce6c3b"}, +] +fsspec = [ + {file = "fsspec-0.8.5-py3-none-any.whl", hash = "sha256:5629dc945800873cb2092df806c854e74c2799f4854247bce37ca7171000a7ec"}, + {file = "fsspec-0.8.5.tar.gz", hash = "sha256:890c6ce9325030f03bd2eae81389ddcbcee53bdd475334ca064595e1e45f92a6"}, +] +ftfy = [ + {file = "ftfy-5.8.tar.gz", hash = "sha256:51c7767f8c4b47d291fcef30b9625fb5341c06a31e6a3b627039c706c42f3720"}, +] +future = [ + {file = "future-0.18.2.tar.gz", hash = "sha256:b1bead90b70cf6ec3f0710ae53a525360fa360d306a86583adc6bf83a4db537d"}, +] +fuzzywuzzy = [ + {file = "fuzzywuzzy-0.18.0-py2.py3-none-any.whl", hash = "sha256:928244b28db720d1e0ee7587acf660ea49d7e4c632569cad4f1cd7e68a5f0993"}, + {file = "fuzzywuzzy-0.18.0.tar.gz", hash = "sha256:45016e92264780e58972dca1b3d939ac864b78437422beecebb3095f8efd00e8"}, +] +gast = [ + {file = "gast-0.3.3-py2.py3-none-any.whl", hash = "sha256:8f46f5be57ae6889a4e16e2ca113b1703ef17f2b0abceb83793eaba9e1351a45"}, + {file = "gast-0.3.3.tar.gz", hash = "sha256:b881ef288a49aa81440d2c5eb8aeefd4c2bb8993d5f50edae7413a85bfdb3b57"}, +] +gdown = [ + {file = "gdown-3.12.2.tar.gz", hash = "sha256:4b3a1301e57bfd8dce939bf25ef8fbb4b23967fd0f878eede328bdcc41386bac"}, +] +gensim = [ + {file = "gensim-3.8.2-cp35-cp35m-macosx_10_7_intel.whl", hash = "sha256:ae3b493154073d059242fd4929cec050db77995b34621d3c9c7a622689cae341"}, + {file = "gensim-3.8.2-cp35-cp35m-manylinux1_i686.whl", hash = "sha256:9d4407d0d866afa97c8a2c9d04ad0a6b996fe02f1f96144c0f929608bcdbff78"}, + {file = "gensim-3.8.2-cp35-cp35m-manylinux1_x86_64.whl", hash = "sha256:27806c022289a76dbce29cd0e00090abb4a0ae117ff714ca0b963bd13de52d5b"}, + {file = "gensim-3.8.2-cp35-cp35m-win32.whl", hash = "sha256:3d1c1a94688c4b6107a261b0af0f43f13edef7263716348ffff790d207f4a629"}, + {file = "gensim-3.8.2-cp35-cp35m-win_amd64.whl", hash = "sha256:446476a017105a549a0eafba6e8ebb50d973775f1cf7fc1d23ba04e2c5056be4"}, + {file = "gensim-3.8.2-cp36-cp36m-macosx_10_9_x86_64.whl", hash = "sha256:1da1daed1d405c38541f4830981385fc4ae9ccb05cf01d4f49e184c4c0ccfe85"}, + {file = "gensim-3.8.2-cp36-cp36m-manylinux1_i686.whl", hash = "sha256:806ea08d463931f4e44b2380bedf16325562bb047615de0e4bb95216074315d9"}, + {file = "gensim-3.8.2-cp36-cp36m-manylinux1_x86_64.whl", hash = "sha256:c32506dd2d112ef33721e862009f0d7aa1d83a9d8f006faedc532a4fa4c201fa"}, + {file = "gensim-3.8.2-cp36-cp36m-win32.whl", hash = "sha256:5be9320d09babf386909be1c799ebcef5fc3b8d1ba595635768e83bd753cbb52"}, + {file = "gensim-3.8.2-cp36-cp36m-win_amd64.whl", hash = "sha256:0254577e162ae7fc99b7d47cdad121e2b120cb06fb4fdf88c2c7c67b0ac2af89"}, + {file = "gensim-3.8.2-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:b0d0c4750adeb169744e5e6b60eb3425bc47b2e19ca77ae2aeae26ca03d6c6c8"}, + {file = "gensim-3.8.2-cp37-cp37m-manylinux1_i686.whl", hash = "sha256:77bfbc0a424611f50d87e38ee92c514870f4a9eb2eb92efd9478d66f14a3dacc"}, + {file = "gensim-3.8.2-cp37-cp37m-manylinux1_x86_64.whl", hash = "sha256:7ddac73e8ad42f4522fb12a3b3c85192e28c67bb54dbc36ba7d53be8a7f8fed5"}, + {file = "gensim-3.8.2-cp37-cp37m-win32.whl", hash = "sha256:c8ae97aac860c57ffa444425a76566e1cf1d120daa2b3af6a748c2fe9edf48a5"}, + {file = "gensim-3.8.2-cp37-cp37m-win_amd64.whl", hash = "sha256:5a7227c03a9ab48d80e306104a295f518e4049f21e29a51f41702bda247d5478"}, + {file = "gensim-3.8.2.tar.gz", hash = "sha256:46e3065e82e11d2c50c682aa17729292734a8388cb7301f0e3204052d2df2ff5"}, + {file = "gensim-3.8.2.win-amd64-py3.5.exe", hash = "sha256:48c6653dcc1a168209108d7658cae3b9b8aa05b212252bb5a696d6ba9b1da09c"}, + {file = "gensim-3.8.2.win-amd64-py3.6.exe", hash = "sha256:db22e1f6bfd093dc8c143e75ce4eb21a5c96568822d2447d952ae65c5be8240b"}, + {file = "gensim-3.8.2.win-amd64-py3.7.exe", hash = "sha256:5a114d3c1339255496c0be7b20fa0b5740fb2ccde0b56f4631e48fefe4be7c06"}, + {file = "gensim-3.8.2.win32-py3.5.exe", hash = "sha256:8a96813bab1043544c8d20806663c649195dcf13270ed6f336748ceb4ab90bd6"}, + {file = "gensim-3.8.2.win32-py3.6.exe", hash = "sha256:a9f0a27126126e466db0faccfe506559d50d0346c12d4b4d8d796d60bb8776a8"}, + {file = "gensim-3.8.2.win32-py3.7.exe", hash = "sha256:3234cbd52c0a4d548f385393040277fab03593358fee5673533e7ee6ed2f406c"}, +] +google-auth = [ + {file = "google-auth-1.24.0.tar.gz", hash = "sha256:0b0e026b412a0ad096e753907559e4bdb180d9ba9f68dd9036164db4fdc4ad2e"}, + {file = "google_auth-1.24.0-py2.py3-none-any.whl", hash = "sha256:ce752cc51c31f479dbf9928435ef4b07514b20261b021c7383bee4bda646acb8"}, +] +google-auth-oauthlib = [ + {file = "google-auth-oauthlib-0.4.2.tar.gz", hash = "sha256:65b65bc39ad8cab15039b35e5898455d3d66296d0584d96fe0e79d67d04c51d9"}, + {file = "google_auth_oauthlib-0.4.2-py2.py3-none-any.whl", hash = "sha256:d4d98c831ea21d574699978827490a41b94f05d565c617fe1b420e88f1fc8d8d"}, +] +google-pasta = [ + {file = "google-pasta-0.2.0.tar.gz", hash = "sha256:c9f2c8dfc8f96d0d5808299920721be30c9eec37f2389f28904f454565c8a16e"}, + {file = "google_pasta-0.2.0-py2-none-any.whl", hash = "sha256:4612951da876b1a10fe3960d7226f0c7682cf901e16ac06e473b267a5afa8954"}, + {file = "google_pasta-0.2.0-py3-none-any.whl", hash = "sha256:b32482794a366b5366a32c92a9a9201b107821889935a02b3e51f6b432ea84ed"}, +] +grpcio = [ + {file = "grpcio-1.34.0-cp27-cp27m-macosx_10_10_x86_64.whl", hash = "sha256:e2ffa46db9103706640c74886ac23ed18d1487a8523cc128da239e1d5a4e3301"}, + {file = "grpcio-1.34.0-cp27-cp27m-manylinux2010_i686.whl", hash = "sha256:843436e69c37eb45b0285fa42f7acc06d147f2e9c1d515b0f901e94d40107e79"}, + {file = "grpcio-1.34.0-cp27-cp27m-manylinux2010_x86_64.whl", hash = "sha256:a403ed4d8fcc441a2c2ec9ede838b0ae5f9da996d950cf2ff9f82242b496e0a7"}, + {file = "grpcio-1.34.0-cp27-cp27m-win32.whl", hash = "sha256:dc45f5750ce50f34f20a0607efae5c797d01681a44465b8287bebef1e9847d5b"}, + {file = "grpcio-1.34.0-cp27-cp27m-win_amd64.whl", hash = "sha256:2fd4a80f267aa258f5a74df5fe243eff80299a4f5b356c1da53f6f5793bbbf4b"}, + {file = "grpcio-1.34.0-cp27-cp27mu-linux_armv7l.whl", hash = "sha256:f2e4d64675351a058f9cb35fe390ca0956bd2926171bfb7c87596a1ee10ff6ba"}, + {file = "grpcio-1.34.0-cp27-cp27mu-manylinux2010_i686.whl", hash = "sha256:4a2c85cd4a67c36fe12535fe32eb336635843d1eb31d3fa301444e60a8df9c90"}, + {file = "grpcio-1.34.0-cp27-cp27mu-manylinux2010_x86_64.whl", hash = "sha256:32ad56f6d3d7e699f9a0d62719f2de9092e79f444d875d70f58cf7f8bb19684c"}, + {file = "grpcio-1.34.0-cp35-cp35m-linux_armv7l.whl", hash = "sha256:e69ac6fc9096bbb43f5276655661db746233cd320808e0d302198eb43dc7bd04"}, + {file = "grpcio-1.34.0-cp35-cp35m-macosx_10_10_intel.whl", hash = "sha256:5b105adb44486fb594b8d8142b5d4fbe50cb125c77ac7d270f5d0277ce5c554a"}, + {file = "grpcio-1.34.0-cp35-cp35m-manylinux2010_i686.whl", hash = "sha256:923a3b18badc3749c4d715216934f62f46a818790e325ece6184d07e7d6c7f73"}, + {file = "grpcio-1.34.0-cp35-cp35m-manylinux2010_x86_64.whl", hash = "sha256:9579f22222ac89ceee64c1101cced6434d9f6b12078b43ece0f9d8ebdb657f73"}, + {file = "grpcio-1.34.0-cp35-cp35m-manylinux2014_i686.whl", hash = "sha256:dfa098a6ff8d1b68ed7bd655150ee91f57c29042c093ff51113176aded3f0071"}, + {file = "grpcio-1.34.0-cp35-cp35m-manylinux2014_x86_64.whl", hash = "sha256:32fbc78d558d9468a4b16f79f4130daec8e431bc7a3b1775b0e98f09a7ab45a2"}, + {file = "grpcio-1.34.0-cp35-cp35m-win32.whl", hash = "sha256:205eda06d8aeffc87a1e29ff1f090546adf0b6e766378cc4c13686534397fdb4"}, + {file = "grpcio-1.34.0-cp35-cp35m-win_amd64.whl", hash = "sha256:2ea864ae3d3abc99d3988d1d27dee3f6350b60149ccf810a89cd9a9d02a675d6"}, + {file = "grpcio-1.34.0-cp36-cp36m-linux_armv7l.whl", hash = "sha256:5d8108b240fd5b8a0483f95ab2651fe2d633311faae93a12938ea06cf61a5efd"}, + {file = "grpcio-1.34.0-cp36-cp36m-macosx_10_10_x86_64.whl", hash = "sha256:bda0f52eb1279a7119526df2ef33ea2808691120daf9effaf60ca0c07f76058a"}, + {file = "grpcio-1.34.0-cp36-cp36m-manylinux2010_i686.whl", hash = "sha256:c89b6a3eca8eae10eea78896ccfdc9d04aa2f7b2ee96de20246e5c96494c68f5"}, + {file = "grpcio-1.34.0-cp36-cp36m-manylinux2010_x86_64.whl", hash = "sha256:fa834f4c70b9df83d5af610097747c224513d59af1f03e8c06bca9a7d81fd1a3"}, + {file = "grpcio-1.34.0-cp36-cp36m-manylinux2014_i686.whl", hash = "sha256:20606ec7c265f81c5a0226f69842dc8dde66d921968ab9448e59d440cf98bebf"}, + {file = "grpcio-1.34.0-cp36-cp36m-manylinux2014_x86_64.whl", hash = "sha256:72b6a89aabf937d706946230f5aa13bdf7d2a42874810fa54436c647577b543e"}, + {file = "grpcio-1.34.0-cp36-cp36m-win32.whl", hash = "sha256:49da07ae43c552280b8b4c70617f9b589588404c2545d6eba2c55179b3d836af"}, + {file = "grpcio-1.34.0-cp36-cp36m-win_amd64.whl", hash = "sha256:beef6be49ada569edf3b73fd4eb57d6c2af7e10c0c82a210dbe51de7c4a1ed53"}, + {file = "grpcio-1.34.0-cp37-cp37m-macosx_10_10_x86_64.whl", hash = "sha256:8d92e884f6d67b9a2a4514631d3c9836281044caedb5fd34d4ce2bbec138c87d"}, + {file = "grpcio-1.34.0-cp37-cp37m-manylinux2010_i686.whl", hash = "sha256:e238a554f29d90b0e7fca15e8119b9a7c5f88faacbf9b982751ad54d639b57f8"}, + {file = "grpcio-1.34.0-cp37-cp37m-manylinux2010_x86_64.whl", hash = "sha256:98b0b6e44c451093354a38b620e6e0df958b0710abd6a0ddd84da84424bce003"}, + {file = "grpcio-1.34.0-cp37-cp37m-manylinux2014_i686.whl", hash = "sha256:bbd3522f821fb5d01049db214fb9f949a8b2d92761c2780a20ff73818efd5360"}, + {file = "grpcio-1.34.0-cp37-cp37m-manylinux2014_x86_64.whl", hash = "sha256:2f54046ca2a81ff45ec8f6d3d7447ad562adb067c3640c35354e440fd771b625"}, + {file = "grpcio-1.34.0-cp37-cp37m-win32.whl", hash = "sha256:50c4f10e7deff96d197bc6d1988c2a5a0bc6252bbd31d7fb374ce8923f937e7a"}, + {file = "grpcio-1.34.0-cp37-cp37m-win_amd64.whl", hash = "sha256:6fafdba42c26bbdf78948c09a93a8b3a8a509c66c6b4324bc1fb360bf4e82b9d"}, + {file = "grpcio-1.34.0-cp38-cp38-macosx_10_10_x86_64.whl", hash = "sha256:bd7634f8c49c8467fec5fd9e0d1abb205b0aa61670ff0113ef835ca6548aad3d"}, + {file = "grpcio-1.34.0-cp38-cp38-manylinux2010_i686.whl", hash = "sha256:69127393fc3513da228bc3908914df2284923e0eacf8d73f21ad387317450317"}, + {file = "grpcio-1.34.0-cp38-cp38-manylinux2010_x86_64.whl", hash = "sha256:5e8e6035d4f9ab856ab437e381e652b31dfd42443d2243d45bdf4b90adaf3559"}, + {file = "grpcio-1.34.0-cp38-cp38-manylinux2014_i686.whl", hash = "sha256:95de4ad9ae39590668e3330d414253f672aedd46cc107d7f71b4a2268f3d6066"}, + {file = "grpcio-1.34.0-cp38-cp38-manylinux2014_x86_64.whl", hash = "sha256:a1024006fe61ee7e43e7099faf08f4508ea0c944a1558e8d715a5b4556937ace"}, + {file = "grpcio-1.34.0-cp38-cp38-win32.whl", hash = "sha256:dea35dcf09aee91552cb4b3e250efdbcb79564b5b5517246bcbead8d5871e291"}, + {file = "grpcio-1.34.0-cp38-cp38-win_amd64.whl", hash = "sha256:e95bda60c584b3deb5c37babb44d4300cf4bf3a6c43198a244ddcaddca3fde3a"}, + {file = "grpcio-1.34.0-cp39-cp39-macosx_10_10_x86_64.whl", hash = "sha256:c88ce184973fe2035ffa176eb08cd492db090505e6b1ddc68b5cc1e0b01a07a0"}, + {file = "grpcio-1.34.0-cp39-cp39-manylinux2010_i686.whl", hash = "sha256:57a30f9df0f5342e4dad384e7023b9f88742c325838da977828c37f49eb8940a"}, + {file = "grpcio-1.34.0-cp39-cp39-manylinux2010_x86_64.whl", hash = "sha256:924d5e8b18942ebea1260e60be7e2bde2a3587ea386190b442790f84180bf372"}, + {file = "grpcio-1.34.0-cp39-cp39-manylinux2014_i686.whl", hash = "sha256:43fafebcc2e81d012f7147a0ddf9be69864c40fc4edd9844937eba0020508297"}, + {file = "grpcio-1.34.0-cp39-cp39-manylinux2014_x86_64.whl", hash = "sha256:9550b7c9d2f11579b484accc6183e02ebe33ce80a0ff15f5c28895df6b3d3108"}, + {file = "grpcio-1.34.0-cp39-cp39-win32.whl", hash = "sha256:d16f7f5a10bf24640fa639974d409c220e587b3e2fa2620af00d43ba36dafc2c"}, + {file = "grpcio-1.34.0-cp39-cp39-win_amd64.whl", hash = "sha256:25958bd7c6773e6de79781cc0d6f19d0c82332984dd07ef238889e93485d5afc"}, + {file = "grpcio-1.34.0.tar.gz", hash = "sha256:f98f746cacbaa681de0bcd90d7aa77b440e3e1327a9988f6a2b580d54e27d4c3"}, +] +h5py = [ + {file = "h5py-2.10.0-cp27-cp27m-macosx_10_6_intel.whl", hash = "sha256:ecf4d0b56ee394a0984de15bceeb97cbe1fe485f1ac205121293fc44dcf3f31f"}, + {file = "h5py-2.10.0-cp27-cp27m-manylinux1_i686.whl", hash = "sha256:86868dc07b9cc8cb7627372a2e6636cdc7a53b7e2854ad020c9e9d8a4d3fd0f5"}, + {file = "h5py-2.10.0-cp27-cp27m-manylinux1_x86_64.whl", hash = "sha256:aac4b57097ac29089f179bbc2a6e14102dd210618e94d77ee4831c65f82f17c0"}, + {file = "h5py-2.10.0-cp27-cp27m-win32.whl", hash = "sha256:7be5754a159236e95bd196419485343e2b5875e806fe68919e087b6351f40a70"}, + {file = "h5py-2.10.0-cp27-cp27m-win_amd64.whl", hash = "sha256:13c87efa24768a5e24e360a40e0bc4c49bcb7ce1bb13a3a7f9902cec302ccd36"}, + {file = "h5py-2.10.0-cp27-cp27mu-manylinux1_i686.whl", hash = "sha256:79b23f47c6524d61f899254f5cd5e486e19868f1823298bc0c29d345c2447172"}, + {file = "h5py-2.10.0-cp27-cp27mu-manylinux1_x86_64.whl", hash = "sha256:cbf28ae4b5af0f05aa6e7551cee304f1d317dbed1eb7ac1d827cee2f1ef97a99"}, + {file = "h5py-2.10.0-cp34-cp34m-manylinux1_i686.whl", hash = "sha256:c0d4b04bbf96c47b6d360cd06939e72def512b20a18a8547fa4af810258355d5"}, + {file = "h5py-2.10.0-cp34-cp34m-manylinux1_x86_64.whl", hash = "sha256:549ad124df27c056b2e255ea1c44d30fb7a17d17676d03096ad5cd85edb32dc1"}, + {file = "h5py-2.10.0-cp35-cp35m-macosx_10_6_intel.whl", hash = "sha256:a5f82cd4938ff8761d9760af3274acf55afc3c91c649c50ab18fcff5510a14a5"}, + {file = "h5py-2.10.0-cp35-cp35m-manylinux1_i686.whl", hash = "sha256:3dad1730b6470fad853ef56d755d06bb916ee68a3d8272b3bab0c1ddf83bb99e"}, + {file = "h5py-2.10.0-cp35-cp35m-manylinux1_x86_64.whl", hash = "sha256:063947eaed5f271679ed4ffa36bb96f57bc14f44dd4336a827d9a02702e6ce6b"}, + {file = "h5py-2.10.0-cp35-cp35m-win32.whl", hash = "sha256:c54a2c0dd4957776ace7f95879d81582298c5daf89e77fb8bee7378f132951de"}, + {file = "h5py-2.10.0-cp35-cp35m-win_amd64.whl", hash = "sha256:6998be619c695910cb0effe5eb15d3a511d3d1a5d217d4bd0bebad1151ec2262"}, + {file = "h5py-2.10.0-cp36-cp36m-macosx_10_6_intel.whl", hash = "sha256:ff7d241f866b718e4584fa95f520cb19405220c501bd3a53ee11871ba5166ea2"}, + {file = "h5py-2.10.0-cp36-cp36m-manylinux1_i686.whl", hash = "sha256:54817b696e87eb9e403e42643305f142cd8b940fe9b3b490bbf98c3b8a894cf4"}, + {file = "h5py-2.10.0-cp36-cp36m-manylinux1_x86_64.whl", hash = "sha256:d3c59549f90a891691991c17f8e58c8544060fdf3ccdea267100fa5f561ff62f"}, + {file = "h5py-2.10.0-cp36-cp36m-win32.whl", hash = "sha256:d7ae7a0576b06cb8e8a1c265a8bc4b73d05fdee6429bffc9a26a6eb531e79d72"}, + {file = "h5py-2.10.0-cp36-cp36m-win_amd64.whl", hash = "sha256:bffbc48331b4a801d2f4b7dac8a72609f0b10e6e516e5c480a3e3241e091c878"}, + {file = "h5py-2.10.0-cp37-cp37m-macosx_10_6_intel.whl", hash = "sha256:51ae56894c6c93159086ffa2c94b5b3388c0400548ab26555c143e7cfa05b8e5"}, + {file = "h5py-2.10.0-cp37-cp37m-manylinux1_i686.whl", hash = "sha256:16ead3c57141101e3296ebeed79c9c143c32bdd0e82a61a2fc67e8e6d493e9d1"}, + {file = "h5py-2.10.0-cp37-cp37m-manylinux1_x86_64.whl", hash = "sha256:f0e25bb91e7a02efccb50aba6591d3fe2c725479e34769802fcdd4076abfa917"}, + {file = "h5py-2.10.0-cp37-cp37m-win32.whl", hash = "sha256:f23951a53d18398ef1344c186fb04b26163ca6ce449ebd23404b153fd111ded9"}, + {file = "h5py-2.10.0-cp37-cp37m-win_amd64.whl", hash = "sha256:8bb1d2de101f39743f91512a9750fb6c351c032e5cd3204b4487383e34da7f75"}, + {file = "h5py-2.10.0-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:64f74da4a1dd0d2042e7d04cf8294e04ddad686f8eba9bb79e517ae582f6668d"}, + {file = "h5py-2.10.0-cp38-cp38-manylinux1_x86_64.whl", hash = "sha256:d35f7a3a6cefec82bfdad2785e78359a0e6a5fbb3f605dd5623ce88082ccd681"}, + {file = "h5py-2.10.0-cp38-cp38-win32.whl", hash = "sha256:6ef7ab1089e3ef53ca099038f3c0a94d03e3560e6aff0e9d6c64c55fb13fc681"}, + {file = "h5py-2.10.0-cp38-cp38-win_amd64.whl", hash = "sha256:769e141512b54dee14ec76ed354fcacfc7d97fea5a7646b709f7400cf1838630"}, + {file = "h5py-2.10.0.tar.gz", hash = "sha256:84412798925dc870ffd7107f045d7659e60f5d46d1c70c700375248bf6bf512d"}, +] +hydra-core = [ + {file = "hydra-core-1.0.4.tar.gz", hash = "sha256:a3c9a30d866361d811e8c0a0dbecac541a6381deb0e618a49a7f1561d0b016b4"}, + {file = "hydra_core-1.0.4-py3-none-any.whl", hash = "sha256:febf981846658713e623cf8bc7a645f2795f33013706115e18611c43baa1832e"}, +] +hyperopt = [ + {file = "hyperopt-0.2.5-py2.py3-none-any.whl", hash = "sha256:dc5c7cceaf33c125b727cf92709e70035d94dd507831dae66406ac762a18a253"}, + {file = "hyperopt-0.2.5.tar.gz", hash = "sha256:bc6047d50f956ae64eebcb34b1fd40f186a93e214957f20e87af2f10195295cc"}, +] +identify = [ + {file = "identify-1.5.11-py2.py3-none-any.whl", hash = "sha256:7aef7a5104d6254c162990e54a203cdc0fd202046b6c415bd5d636472f6565c4"}, + {file = "identify-1.5.11.tar.gz", hash = "sha256:b2c71bf9f5c482c389cef816f3a15f1c9d7429ad70f497d4a2e522442d80c6de"}, +] +idna = [ + {file = "idna-2.10-py2.py3-none-any.whl", hash = "sha256:b97d804b1e9b523befed77c48dacec60e6dcb0b5391d57af6a65a312a90648c0"}, + {file = "idna-2.10.tar.gz", hash = "sha256:b307872f855b18632ce0c21c5e45be78c0ea7ae4c15c828c20788b26921eb3f6"}, +] +imagesize = [ + {file = "imagesize-1.2.0-py2.py3-none-any.whl", hash = "sha256:6965f19a6a2039c7d48bca7dba2473069ff854c36ae6f19d2cde309d998228a1"}, + {file = "imagesize-1.2.0.tar.gz", hash = "sha256:b1f6b5a4eab1f73479a50fb79fcf729514a900c341d8503d62a62dbc4127a2b1"}, +] +importlib-resources = [ + {file = "importlib_resources-4.1.1-py3-none-any.whl", hash = "sha256:0a948d0c8c3f9344de62997e3f73444dbba233b1eaf24352933c2d264b9e4182"}, + {file = "importlib_resources-4.1.1.tar.gz", hash = "sha256:6b45007a479c4ec21165ae3ffbe37faf35404e2041fac6ae1da684f38530ca73"}, +] +iniconfig = [ + {file = "iniconfig-1.1.1-py2.py3-none-any.whl", hash = "sha256:011e24c64b7f47f6ebd835bb12a743f2fbe9a26d4cecaa7f53bc4f35ee9da8b3"}, + {file = "iniconfig-1.1.1.tar.gz", hash = "sha256:bc3af051d7d14b2ee5ef9969666def0cd1a000e121eaea580d4a313df4b37f32"}, +] +ipykernel = [ + {file = "ipykernel-5.4.2-py3-none-any.whl", hash = "sha256:63b4b96c513e1138874934e3e783a8e5e13c02b9036e37107bfe042ac8955005"}, + {file = "ipykernel-5.4.2.tar.gz", hash = "sha256:e20ceb7e52cb4d250452e1230be76e0b2323f33bd46c6b2bc7abb6601740e182"}, +] +ipython = [ + {file = "ipython-7.19.0-py3-none-any.whl", hash = "sha256:c987e8178ced651532b3b1ff9965925bfd445c279239697052561a9ab806d28f"}, + {file = "ipython-7.19.0.tar.gz", hash = "sha256:cbb2ef3d5961d44e6a963b9817d4ea4e1fa2eb589c371a470fed14d8d40cbd6a"}, +] +ipython-genutils = [ + {file = "ipython_genutils-0.2.0-py2.py3-none-any.whl", hash = "sha256:72dd37233799e619666c9f639a9da83c34013a73e8bbc79a7a6348d93c61fab8"}, + {file = "ipython_genutils-0.2.0.tar.gz", hash = "sha256:eb2e116e75ecef9d4d228fdc66af54269afa26ab4463042e33785b887c628ba8"}, +] +ipywidgets = [ + {file = "ipywidgets-7.6.2-py2.py3-none-any.whl", hash = "sha256:eab960f737f380075cabca41f92e5e81dfb6eba3ce6392094469ef2418ca4d35"}, + {file = "ipywidgets-7.6.2.tar.gz", hash = "sha256:bbb881ce18fb0cff4ac718f40c04709c7ac86a77abee149f1b447965ede86e36"}, +] +isort = [ + {file = "isort-5.7.0-py3-none-any.whl", hash = "sha256:fff4f0c04e1825522ce6949973e83110a6e907750cd92d128b0d14aaaadbffdc"}, + {file = "isort-5.7.0.tar.gz", hash = "sha256:c729845434366216d320e936b8ad6f9d681aab72dc7cbc2d51bedc3582f3ad1e"}, +] +janome = [ + {file = "Janome-0.4.1-py2.py3-none-any.whl", hash = "sha256:a650e2684e80af72f869eff17566f31dd4444f5443c4771dca1ada60cea5c251"}, + {file = "Janome-0.4.1.tar.gz", hash = "sha256:6c2c38d894014d57cb3151265c11146506ead3b3bc290898adc33711711612de"}, +] +jedi = [ + {file = "jedi-0.18.0-py2.py3-none-any.whl", hash = "sha256:18456d83f65f400ab0c2d3319e48520420ef43b23a086fdc05dff34132f0fb93"}, + {file = "jedi-0.18.0.tar.gz", hash = "sha256:92550a404bad8afed881a137ec9a461fed49eca661414be45059329614ed0707"}, +] +jinja2 = [ + {file = "Jinja2-2.11.2-py2.py3-none-any.whl", hash = "sha256:f0a4641d3cf955324a89c04f3d94663aa4d638abe8f733ecd3582848e1c37035"}, + {file = "Jinja2-2.11.2.tar.gz", hash = "sha256:89aab215427ef59c34ad58735269eb58b1a5808103067f7bb9d5836c651b3bb0"}, +] +jmespath = [ + {file = "jmespath-0.10.0-py2.py3-none-any.whl", hash = "sha256:cdf6525904cc597730141d61b36f2e4b8ecc257c420fa2f4549bac2c2d0cb72f"}, + {file = "jmespath-0.10.0.tar.gz", hash = "sha256:b85d0567b8666149a93172712e68920734333c0ce7e89b78b3e987f71e5ed4f9"}, +] +joblib = [ + {file = "joblib-1.0.0-py3-none-any.whl", hash = "sha256:75ead23f13484a2a414874779d69ade40d4fa1abe62b222a23cd50d4bc822f6f"}, + {file = "joblib-1.0.0.tar.gz", hash = "sha256:7ad866067ac1fdec27d51c8678ea760601b70e32ff1881d4dc8e1171f2b64b24"}, +] +json5 = [ + {file = "json5-0.9.5-py2.py3-none-any.whl", hash = "sha256:af1a1b9a2850c7f62c23fde18be4749b3599fd302f494eebf957e2ada6b9e42c"}, + {file = "json5-0.9.5.tar.gz", hash = "sha256:703cfee540790576b56a92e1c6aaa6c4b0d98971dc358ead83812aa4d06bdb96"}, +] +jsonlines = [ + {file = "jsonlines-1.2.0-py2.py3-none-any.whl", hash = "sha256:0ebd5b0c3efe0d4b5018b320fb0ee1a7b680ab39f6eb853715859f818d386cc8"}, + {file = "jsonlines-1.2.0.tar.gz", hash = "sha256:43b8d5588a9d4862c8a4a49580e38e20ec595aee7ad6fe469b10fb83fbefde88"}, +] +jsonnet = [ + {file = "jsonnet-0.17.0.tar.gz", hash = "sha256:23ffcd4d03a10af7b20b53feee16627debe28345a4d7d5ed07881b7444553bfb"}, +] +jsonpickle = [ + {file = "jsonpickle-1.4.2-py2.py3-none-any.whl", hash = "sha256:2ac5863099864c63d7f0c367af5e512c94f3384977dd367f2eae5f2303f7b92c"}, + {file = "jsonpickle-1.4.2.tar.gz", hash = "sha256:c9b99b28a9e6a3043ec993552db79f4389da11afcb1d0246d93c79f4b5e64062"}, +] +jsonschema = [ + {file = "jsonschema-3.2.0-py2.py3-none-any.whl", hash = "sha256:4e5b3cf8216f577bee9ce139cbe72eca3ea4f292ec60928ff24758ce626cd163"}, + {file = "jsonschema-3.2.0.tar.gz", hash = "sha256:c8a85b28d377cc7737e46e2d9f2b4f44ee3c0e1deac6bf46ddefc7187d30797a"}, +] +jupyter-client = [ + {file = "jupyter_client-6.1.7-py3-none-any.whl", hash = "sha256:c958d24d6eacb975c1acebb68ac9077da61b5f5c040f22f6849928ad7393b950"}, + {file = "jupyter_client-6.1.7.tar.gz", hash = "sha256:49e390b36fe4b4226724704ea28d9fb903f1a3601b6882ce3105221cd09377a1"}, +] +jupyter-core = [ + {file = "jupyter_core-4.7.0-py3-none-any.whl", hash = "sha256:0a451c9b295e4db772bdd8d06f2f1eb31caeec0e81fbb77ba37d4a3024e3b315"}, + {file = "jupyter_core-4.7.0.tar.gz", hash = "sha256:aa1f9496ab3abe72da4efe0daab0cb2233997914581f9a071e07498c6add8ed3"}, +] +jupyter-server = [ + {file = "jupyter_server-1.1.3-py3-none-any.whl", hash = "sha256:1c684fa73cad376b09f307ce817be50a50bca78c8be52aa059ec5481098dba1f"}, + {file = "jupyter_server-1.1.3.tar.gz", hash = "sha256:23ce959718592ba472db7982a5daf15dda3397fd50bb54d05ad10c09fe122905"}, +] +jupyterlab = [ + {file = "jupyterlab-3.0.0-py3-none-any.whl", hash = "sha256:42cf1b8c7ebe4e2a502f8538b852c3d553ddde21cb5da6085c4b6bfe67b34fa6"}, + {file = "jupyterlab-3.0.0.tar.gz", hash = "sha256:15228dff3f77b0bca795fd232cb25f02121510cec83f1d25856b3bc8e585b087"}, +] +jupyterlab-pygments = [ + {file = "jupyterlab_pygments-0.1.2-py2.py3-none-any.whl", hash = "sha256:abfb880fd1561987efaefcb2d2ac75145d2a5d0139b1876d5be806e32f630008"}, + {file = "jupyterlab_pygments-0.1.2.tar.gz", hash = "sha256:cfcda0873626150932f438eccf0f8bf22bfa92345b814890ab360d666b254146"}, +] +jupyterlab-server = [ + {file = "jupyterlab_server-2.0.0-py3-none-any.whl", hash = "sha256:20a4e495276956528783c0befab58e409d9f6258589caccb4c0591387caadc84"}, + {file = "jupyterlab_server-2.0.0.tar.gz", hash = "sha256:1350c36954d3d16c71129b30b60b9df11e8fcf2f3acf88596f6abc8a79b0c918"}, +] +jupyterlab-widgets = [ + {file = "jupyterlab_widgets-1.0.0-py3-none-any.whl", hash = "sha256:caeaf3e6103180e654e7d8d2b81b7d645e59e432487c1d35a41d6d3ee56b3fef"}, + {file = "jupyterlab_widgets-1.0.0.tar.gz", hash = "sha256:5c1a29a84d3069208cb506b10609175b249b6486d6b1cbae8fcde2a11584fb78"}, +] +kaleido = [ + {file = "kaleido-0.1.0-py2.py3-none-macosx_10_10_x86_64.whl", hash = "sha256:6a73cd4a69609490f7e13e43e77724d254aef28b062babad120b32e6f32968c2"}, + {file = "kaleido-0.1.0-py2.py3-none-manylinux1_x86_64.whl", hash = "sha256:8d0403b1eb21080e09d6d728c1ea7170fd4763c415fe89dfea6edf35ec36f8e7"}, + {file = "kaleido-0.1.0-py2.py3-none-manylinux2014_aarch64.whl", hash = "sha256:f3de8e08764115f529351208d689ff80523aa1e9fc0018d342af857f94e3b44e"}, + {file = "kaleido-0.1.0-py2.py3-none-win32.whl", hash = "sha256:c583ed02b2c50a17e11ee3faec76f7d5f9898f8915f8877b583c5f83d0094e91"}, + {file = "kaleido-0.1.0-py2.py3-none-win_amd64.whl", hash = "sha256:949e3fc01c56cdca0226e866277cfb5e1b4bf66b5d4045ca43a3211f61bc8446"}, +] +keras-preprocessing = [ + {file = "Keras_Preprocessing-1.1.2-py2.py3-none-any.whl", hash = "sha256:7b82029b130ff61cc99b55f3bd27427df4838576838c5b2f65940e4fcec99a7b"}, + {file = "Keras_Preprocessing-1.1.2.tar.gz", hash = "sha256:add82567c50c8bc648c14195bf544a5ce7c1f76761536956c3d2978970179ef3"}, +] +kiwisolver = [ + {file = "kiwisolver-1.3.1-cp36-cp36m-macosx_10_9_x86_64.whl", hash = "sha256:fd34fbbfbc40628200730bc1febe30631347103fc8d3d4fa012c21ab9c11eca9"}, + {file = "kiwisolver-1.3.1-cp36-cp36m-manylinux1_i686.whl", hash = "sha256:d3155d828dec1d43283bd24d3d3e0d9c7c350cdfcc0bd06c0ad1209c1bbc36d0"}, + {file = "kiwisolver-1.3.1-cp36-cp36m-manylinux1_x86_64.whl", hash = "sha256:5a7a7dbff17e66fac9142ae2ecafb719393aaee6a3768c9de2fd425c63b53e21"}, + {file = "kiwisolver-1.3.1-cp36-cp36m-manylinux2014_aarch64.whl", hash = "sha256:f8d6f8db88049a699817fd9178782867bf22283e3813064302ac59f61d95be05"}, + {file = "kiwisolver-1.3.1-cp36-cp36m-manylinux2014_ppc64le.whl", hash = "sha256:5f6ccd3dd0b9739edcf407514016108e2280769c73a85b9e59aa390046dbf08b"}, + {file = "kiwisolver-1.3.1-cp36-cp36m-win32.whl", hash = "sha256:225e2e18f271e0ed8157d7f4518ffbf99b9450fca398d561eb5c4a87d0986dd9"}, + {file = "kiwisolver-1.3.1-cp36-cp36m-win_amd64.whl", hash = "sha256:cf8b574c7b9aa060c62116d4181f3a1a4e821b2ec5cbfe3775809474113748d4"}, + {file = "kiwisolver-1.3.1-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:232c9e11fd7ac3a470d65cd67e4359eee155ec57e822e5220322d7b2ac84fbf0"}, + {file = "kiwisolver-1.3.1-cp37-cp37m-manylinux1_i686.whl", hash = "sha256:b38694dcdac990a743aa654037ff1188c7a9801ac3ccc548d3341014bc5ca278"}, + {file = "kiwisolver-1.3.1-cp37-cp37m-manylinux1_x86_64.whl", hash = "sha256:ca3820eb7f7faf7f0aa88de0e54681bddcb46e485beb844fcecbcd1c8bd01689"}, + {file = "kiwisolver-1.3.1-cp37-cp37m-manylinux2014_aarch64.whl", hash = "sha256:c8fd0f1ae9d92b42854b2979024d7597685ce4ada367172ed7c09edf2cef9cb8"}, + {file = "kiwisolver-1.3.1-cp37-cp37m-manylinux2014_ppc64le.whl", hash = "sha256:1e1bc12fb773a7b2ffdeb8380609f4f8064777877b2225dec3da711b421fda31"}, + {file = "kiwisolver-1.3.1-cp37-cp37m-win32.whl", hash = "sha256:72c99e39d005b793fb7d3d4e660aed6b6281b502e8c1eaf8ee8346023c8e03bc"}, + {file = "kiwisolver-1.3.1-cp37-cp37m-win_amd64.whl", hash = "sha256:8be8d84b7d4f2ba4ffff3665bcd0211318aa632395a1a41553250484a871d454"}, + {file = "kiwisolver-1.3.1-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:31dfd2ac56edc0ff9ac295193eeaea1c0c923c0355bf948fbd99ed6018010b72"}, + {file = "kiwisolver-1.3.1-cp38-cp38-manylinux1_i686.whl", hash = "sha256:563c649cfdef27d081c84e72a03b48ea9408c16657500c312575ae9d9f7bc1c3"}, + {file = "kiwisolver-1.3.1-cp38-cp38-manylinux1_x86_64.whl", hash = "sha256:78751b33595f7f9511952e7e60ce858c6d64db2e062afb325985ddbd34b5c131"}, + {file = "kiwisolver-1.3.1-cp38-cp38-manylinux2014_aarch64.whl", hash = "sha256:a357fd4f15ee49b4a98b44ec23a34a95f1e00292a139d6015c11f55774ef10de"}, + {file = "kiwisolver-1.3.1-cp38-cp38-manylinux2014_ppc64le.whl", hash = "sha256:5989db3b3b34b76c09253deeaf7fbc2707616f130e166996606c284395da3f18"}, + {file = "kiwisolver-1.3.1-cp38-cp38-win32.whl", hash = "sha256:c08e95114951dc2090c4a630c2385bef681cacf12636fb0241accdc6b303fd81"}, + {file = "kiwisolver-1.3.1-cp38-cp38-win_amd64.whl", hash = "sha256:44a62e24d9b01ba94ae7a4a6c3fb215dc4af1dde817e7498d901e229aaf50e4e"}, + {file = "kiwisolver-1.3.1-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:50af681a36b2a1dee1d3c169ade9fdc59207d3c31e522519181e12f1b3ba7000"}, + {file = "kiwisolver-1.3.1-cp39-cp39-manylinux1_i686.whl", hash = "sha256:a53d27d0c2a0ebd07e395e56a1fbdf75ffedc4a05943daf472af163413ce9598"}, + {file = "kiwisolver-1.3.1-cp39-cp39-manylinux1_x86_64.whl", hash = "sha256:834ee27348c4aefc20b479335fd422a2c69db55f7d9ab61721ac8cd83eb78882"}, + {file = "kiwisolver-1.3.1-cp39-cp39-manylinux2014_aarch64.whl", hash = "sha256:5c3e6455341008a054cccee8c5d24481bcfe1acdbc9add30aa95798e95c65621"}, + {file = "kiwisolver-1.3.1-cp39-cp39-manylinux2014_ppc64le.whl", hash = "sha256:acef3d59d47dd85ecf909c359d0fd2c81ed33bdff70216d3956b463e12c38a54"}, + {file = "kiwisolver-1.3.1-cp39-cp39-win32.whl", hash = "sha256:c5518d51a0735b1e6cee1fdce66359f8d2b59c3ca85dc2b0813a8aa86818a030"}, + {file = "kiwisolver-1.3.1-cp39-cp39-win_amd64.whl", hash = "sha256:b9edd0110a77fc321ab090aaa1cfcaba1d8499850a12848b81be2222eab648f6"}, + {file = "kiwisolver-1.3.1-pp36-pypy36_pp73-macosx_10_9_x86_64.whl", hash = "sha256:0cd53f403202159b44528498de18f9285b04482bab2a6fc3f5dd8dbb9352e30d"}, + {file = "kiwisolver-1.3.1-pp36-pypy36_pp73-manylinux2010_x86_64.whl", hash = "sha256:33449715e0101e4d34f64990352bce4095c8bf13bed1b390773fc0a7295967b3"}, + {file = "kiwisolver-1.3.1-pp36-pypy36_pp73-win32.whl", hash = "sha256:401a2e9afa8588589775fe34fc22d918ae839aaaf0c0e96441c0fdbce6d8ebe6"}, + {file = "kiwisolver-1.3.1.tar.gz", hash = "sha256:950a199911a8d94683a6b10321f9345d5a3a8433ec58b217ace979e18f16e248"}, +] +konoha = [ + {file = "konoha-4.0.0-py3-none-any.whl", hash = "sha256:b3ae3a934c97b73ff5000a27351eb28ea02edc612316c56731cfd9abd661649a"}, + {file = "konoha-4.0.0.tar.gz", hash = "sha256:51124d5cd06a229fe01169aff0774700410b1b98253838847aef90632e7ac9ed"}, +] +langdetect = [ + {file = "langdetect-1.0.8-py2-none-any.whl", hash = "sha256:f37495e63607865e47deed08d78f7f8e58172658216ff954b2f14671bcd87740"}, + {file = "langdetect-1.0.8.tar.gz", hash = "sha256:363795ea005f1243c958e953245dac5d814fabdc025c9afa91588c5fa6b2fa83"}, +] +language-tool-python = [ + {file = "language_tool_python-2.4.7-py3-none-any.whl", hash = "sha256:0162ebefb44ddcca8f536c5ba36c3e6a78319c771a47d29e4604e15d254f9443"}, + {file = "language_tool_python-2.4.7.tar.gz", hash = "sha256:4c6c568dc32380c43c1a0ff640a8ca06e44420bea17552133596b1d2137a0b5f"}, +] +lemminflect = [ + {file = "lemminflect-0.2.1-py3-none-any.whl", hash = "sha256:96dc0cf32aa1973a00deb369a413d032cf005ac9872a249283264d70b85a1da5"}, + {file = "lemminflect-0.2.1.tar.gz", hash = "sha256:46f439d8e8237efb429173c9f83d00038e9a4db3c668b436034c9ca783c35a53"}, +] +lru-dict = [ + {file = "lru-dict-1.1.6.tar.gz", hash = "sha256:365457660e3d05b76f1aba3e0f7fedbfcd6528e97c5115a351ddd0db488354cc"}, +] +lxml = [ + {file = "lxml-4.6.2-cp27-cp27m-macosx_10_9_x86_64.whl", hash = "sha256:a9d6bc8642e2c67db33f1247a77c53476f3a166e09067c0474facb045756087f"}, + {file = "lxml-4.6.2-cp27-cp27m-manylinux1_i686.whl", hash = "sha256:791394449e98243839fa822a637177dd42a95f4883ad3dec2a0ce6ac99fb0a9d"}, + {file = "lxml-4.6.2-cp27-cp27m-manylinux1_x86_64.whl", hash = "sha256:68a5d77e440df94011214b7db907ec8f19e439507a70c958f750c18d88f995d2"}, + {file = "lxml-4.6.2-cp27-cp27m-win32.whl", hash = "sha256:fc37870d6716b137e80d19241d0e2cff7a7643b925dfa49b4c8ebd1295eb506e"}, + {file = "lxml-4.6.2-cp27-cp27m-win_amd64.whl", hash = "sha256:69a63f83e88138ab7642d8f61418cf3180a4d8cd13995df87725cb8b893e950e"}, + {file = "lxml-4.6.2-cp27-cp27mu-manylinux1_i686.whl", hash = "sha256:42ebca24ba2a21065fb546f3e6bd0c58c3fe9ac298f3a320147029a4850f51a2"}, + {file = "lxml-4.6.2-cp27-cp27mu-manylinux1_x86_64.whl", hash = "sha256:f83d281bb2a6217cd806f4cf0ddded436790e66f393e124dfe9731f6b3fb9afe"}, + {file = "lxml-4.6.2-cp35-cp35m-manylinux1_i686.whl", hash = "sha256:535f067002b0fd1a4e5296a8f1bf88193080ff992a195e66964ef2a6cfec5388"}, + {file = "lxml-4.6.2-cp35-cp35m-manylinux1_x86_64.whl", hash = "sha256:366cb750140f221523fa062d641393092813b81e15d0e25d9f7c6025f910ee80"}, + {file = "lxml-4.6.2-cp35-cp35m-manylinux2014_aarch64.whl", hash = "sha256:97db258793d193c7b62d4e2586c6ed98d51086e93f9a3af2b2034af01450a74b"}, + {file = "lxml-4.6.2-cp35-cp35m-win32.whl", hash = "sha256:648914abafe67f11be7d93c1a546068f8eff3c5fa938e1f94509e4a5d682b2d8"}, + {file = "lxml-4.6.2-cp35-cp35m-win_amd64.whl", hash = "sha256:4e751e77006da34643ab782e4a5cc21ea7b755551db202bc4d3a423b307db780"}, + {file = "lxml-4.6.2-cp36-cp36m-macosx_10_9_x86_64.whl", hash = "sha256:681d75e1a38a69f1e64ab82fe4b1ed3fd758717bed735fb9aeaa124143f051af"}, + {file = "lxml-4.6.2-cp36-cp36m-manylinux1_i686.whl", hash = "sha256:127f76864468d6630e1b453d3ffbbd04b024c674f55cf0a30dc2595137892d37"}, + {file = "lxml-4.6.2-cp36-cp36m-manylinux1_x86_64.whl", hash = "sha256:4fb85c447e288df535b17ebdebf0ec1cf3a3f1a8eba7e79169f4f37af43c6b98"}, + {file = "lxml-4.6.2-cp36-cp36m-manylinux2014_aarch64.whl", hash = "sha256:5be4a2e212bb6aa045e37f7d48e3e1e4b6fd259882ed5a00786f82e8c37ce77d"}, + {file = "lxml-4.6.2-cp36-cp36m-win32.whl", hash = "sha256:8c88b599e226994ad4db29d93bc149aa1aff3dc3a4355dd5757569ba78632bdf"}, + {file = "lxml-4.6.2-cp36-cp36m-win_amd64.whl", hash = "sha256:6e4183800f16f3679076dfa8abf2db3083919d7e30764a069fb66b2b9eff9939"}, + {file = "lxml-4.6.2-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:d8d3d4713f0c28bdc6c806a278d998546e8efc3498949e3ace6e117462ac0a5e"}, + {file = "lxml-4.6.2-cp37-cp37m-manylinux1_i686.whl", hash = "sha256:8246f30ca34dc712ab07e51dc34fea883c00b7ccb0e614651e49da2c49a30711"}, + {file = "lxml-4.6.2-cp37-cp37m-manylinux1_x86_64.whl", hash = "sha256:923963e989ffbceaa210ac37afc9b906acebe945d2723e9679b643513837b089"}, + {file = "lxml-4.6.2-cp37-cp37m-manylinux2014_aarch64.whl", hash = "sha256:1471cee35eba321827d7d53d104e7b8c593ea3ad376aa2df89533ce8e1b24a01"}, + {file = "lxml-4.6.2-cp37-cp37m-win32.whl", hash = "sha256:2363c35637d2d9d6f26f60a208819e7eafc4305ce39dc1d5005eccc4593331c2"}, + {file = "lxml-4.6.2-cp37-cp37m-win_amd64.whl", hash = "sha256:f4822c0660c3754f1a41a655e37cb4dbbc9be3d35b125a37fab6f82d47674ebc"}, + {file = "lxml-4.6.2-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:0448576c148c129594d890265b1a83b9cd76fd1f0a6a04620753d9a6bcfd0a4d"}, + {file = "lxml-4.6.2-cp38-cp38-manylinux1_i686.whl", hash = "sha256:60a20bfc3bd234d54d49c388950195d23a5583d4108e1a1d47c9eef8d8c042b3"}, + {file = "lxml-4.6.2-cp38-cp38-manylinux1_x86_64.whl", hash = "sha256:2e5cc908fe43fe1aa299e58046ad66981131a66aea3129aac7770c37f590a644"}, + {file = "lxml-4.6.2-cp38-cp38-manylinux2014_aarch64.whl", hash = "sha256:50c348995b47b5a4e330362cf39fc503b4a43b14a91c34c83b955e1805c8e308"}, + {file = "lxml-4.6.2-cp38-cp38-win32.whl", hash = "sha256:94d55bd03d8671686e3f012577d9caa5421a07286dd351dfef64791cf7c6c505"}, + {file = "lxml-4.6.2-cp38-cp38-win_amd64.whl", hash = "sha256:7a7669ff50f41225ca5d6ee0a1ec8413f3a0d8aa2b109f86d540887b7ec0d72a"}, + {file = "lxml-4.6.2-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:e0bfe9bb028974a481410432dbe1b182e8191d5d40382e5b8ff39cdd2e5c5931"}, + {file = "lxml-4.6.2-cp39-cp39-manylinux1_i686.whl", hash = "sha256:6fd8d5903c2e53f49e99359b063df27fdf7acb89a52b6a12494208bf61345a03"}, + {file = "lxml-4.6.2-cp39-cp39-manylinux1_x86_64.whl", hash = "sha256:7e9eac1e526386df7c70ef253b792a0a12dd86d833b1d329e038c7a235dfceb5"}, + {file = "lxml-4.6.2-cp39-cp39-manylinux2014_aarch64.whl", hash = "sha256:7ee8af0b9f7de635c61cdd5b8534b76c52cd03536f29f51151b377f76e214a1a"}, + {file = "lxml-4.6.2-cp39-cp39-win32.whl", hash = "sha256:2e6fd1b8acd005bd71e6c94f30c055594bbd0aa02ef51a22bbfa961ab63b2d75"}, + {file = "lxml-4.6.2-cp39-cp39-win_amd64.whl", hash = "sha256:535332fe9d00c3cd455bd3dd7d4bacab86e2d564bdf7606079160fa6251caacf"}, + {file = "lxml-4.6.2.tar.gz", hash = "sha256:cd11c7e8d21af997ee8079037fff88f16fda188a9776eb4b81c7e4c9c0a7d7fc"}, +] +markdown = [ + {file = "Markdown-3.3.3-py3-none-any.whl", hash = "sha256:c109c15b7dc20a9ac454c9e6025927d44460b85bd039da028d85e2b6d0bcc328"}, + {file = "Markdown-3.3.3.tar.gz", hash = "sha256:5d9f2b5ca24bc4c7a390d22323ca4bad200368612b5aaa7796babf971d2b2f18"}, +] +markupsafe = [ + {file = "MarkupSafe-1.1.1-cp27-cp27m-macosx_10_6_intel.whl", hash = "sha256:09027a7803a62ca78792ad89403b1b7a73a01c8cb65909cd876f7fcebd79b161"}, + {file = "MarkupSafe-1.1.1-cp27-cp27m-manylinux1_i686.whl", hash = "sha256:e249096428b3ae81b08327a63a485ad0878de3fb939049038579ac0ef61e17e7"}, + {file = "MarkupSafe-1.1.1-cp27-cp27m-manylinux1_x86_64.whl", hash = "sha256:500d4957e52ddc3351cabf489e79c91c17f6e0899158447047588650b5e69183"}, + {file = "MarkupSafe-1.1.1-cp27-cp27m-win32.whl", hash = "sha256:b2051432115498d3562c084a49bba65d97cf251f5a331c64a12ee7e04dacc51b"}, + {file = "MarkupSafe-1.1.1-cp27-cp27m-win_amd64.whl", hash = "sha256:98c7086708b163d425c67c7a91bad6e466bb99d797aa64f965e9d25c12111a5e"}, + {file = "MarkupSafe-1.1.1-cp27-cp27mu-manylinux1_i686.whl", hash = "sha256:cd5df75523866410809ca100dc9681e301e3c27567cf498077e8551b6d20e42f"}, + {file = "MarkupSafe-1.1.1-cp27-cp27mu-manylinux1_x86_64.whl", hash = "sha256:43a55c2930bbc139570ac2452adf3d70cdbb3cfe5912c71cdce1c2c6bbd9c5d1"}, + {file = "MarkupSafe-1.1.1-cp34-cp34m-macosx_10_6_intel.whl", hash = "sha256:1027c282dad077d0bae18be6794e6b6b8c91d58ed8a8d89a89d59693b9131db5"}, + {file = "MarkupSafe-1.1.1-cp34-cp34m-manylinux1_i686.whl", hash = "sha256:62fe6c95e3ec8a7fad637b7f3d372c15ec1caa01ab47926cfdf7a75b40e0eac1"}, + {file = "MarkupSafe-1.1.1-cp34-cp34m-manylinux1_x86_64.whl", hash = "sha256:88e5fcfb52ee7b911e8bb6d6aa2fd21fbecc674eadd44118a9cc3863f938e735"}, + {file = "MarkupSafe-1.1.1-cp34-cp34m-win32.whl", hash = "sha256:ade5e387d2ad0d7ebf59146cc00c8044acbd863725f887353a10df825fc8ae21"}, + {file = "MarkupSafe-1.1.1-cp34-cp34m-win_amd64.whl", hash = "sha256:09c4b7f37d6c648cb13f9230d847adf22f8171b1ccc4d5682398e77f40309235"}, + {file = "MarkupSafe-1.1.1-cp35-cp35m-macosx_10_6_intel.whl", hash = "sha256:79855e1c5b8da654cf486b830bd42c06e8780cea587384cf6545b7d9ac013a0b"}, + {file = "MarkupSafe-1.1.1-cp35-cp35m-manylinux1_i686.whl", hash = "sha256:c8716a48d94b06bb3b2524c2b77e055fb313aeb4ea620c8dd03a105574ba704f"}, + {file = "MarkupSafe-1.1.1-cp35-cp35m-manylinux1_x86_64.whl", hash = "sha256:7c1699dfe0cf8ff607dbdcc1e9b9af1755371f92a68f706051cc8c37d447c905"}, + {file = "MarkupSafe-1.1.1-cp35-cp35m-win32.whl", hash = "sha256:6dd73240d2af64df90aa7c4e7481e23825ea70af4b4922f8ede5b9e35f78a3b1"}, + {file = "MarkupSafe-1.1.1-cp35-cp35m-win_amd64.whl", hash = "sha256:9add70b36c5666a2ed02b43b335fe19002ee5235efd4b8a89bfcf9005bebac0d"}, + {file = "MarkupSafe-1.1.1-cp36-cp36m-macosx_10_6_intel.whl", hash = "sha256:24982cc2533820871eba85ba648cd53d8623687ff11cbb805be4ff7b4c971aff"}, + {file = "MarkupSafe-1.1.1-cp36-cp36m-manylinux1_i686.whl", hash = "sha256:00bc623926325b26bb9605ae9eae8a215691f33cae5df11ca5424f06f2d1f473"}, + {file = "MarkupSafe-1.1.1-cp36-cp36m-manylinux1_x86_64.whl", hash = "sha256:717ba8fe3ae9cc0006d7c451f0bb265ee07739daf76355d06366154ee68d221e"}, + {file = "MarkupSafe-1.1.1-cp36-cp36m-win32.whl", hash = "sha256:535f6fc4d397c1563d08b88e485c3496cf5784e927af890fb3c3aac7f933ec66"}, + {file = "MarkupSafe-1.1.1-cp36-cp36m-win_amd64.whl", hash = "sha256:b1282f8c00509d99fef04d8ba936b156d419be841854fe901d8ae224c59f0be5"}, + {file = "MarkupSafe-1.1.1-cp37-cp37m-macosx_10_6_intel.whl", hash = "sha256:8defac2f2ccd6805ebf65f5eeb132adcf2ab57aa11fdf4c0dd5169a004710e7d"}, + {file = "MarkupSafe-1.1.1-cp37-cp37m-manylinux1_i686.whl", hash = "sha256:46c99d2de99945ec5cb54f23c8cd5689f6d7177305ebff350a58ce5f8de1669e"}, + {file = "MarkupSafe-1.1.1-cp37-cp37m-manylinux1_x86_64.whl", hash = "sha256:ba59edeaa2fc6114428f1637ffff42da1e311e29382d81b339c1817d37ec93c6"}, + {file = "MarkupSafe-1.1.1-cp37-cp37m-win32.whl", hash = "sha256:b00c1de48212e4cc9603895652c5c410df699856a2853135b3967591e4beebc2"}, + {file = "MarkupSafe-1.1.1-cp37-cp37m-win_amd64.whl", hash = "sha256:9bf40443012702a1d2070043cb6291650a0841ece432556f784f004937f0f32c"}, + {file = "MarkupSafe-1.1.1-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:6788b695d50a51edb699cb55e35487e430fa21f1ed838122d722e0ff0ac5ba15"}, + {file = "MarkupSafe-1.1.1-cp38-cp38-manylinux1_i686.whl", hash = "sha256:cdb132fc825c38e1aeec2c8aa9338310d29d337bebbd7baa06889d09a60a1fa2"}, + {file = "MarkupSafe-1.1.1-cp38-cp38-manylinux1_x86_64.whl", hash = "sha256:13d3144e1e340870b25e7b10b98d779608c02016d5184cfb9927a9f10c689f42"}, + {file = "MarkupSafe-1.1.1-cp38-cp38-win32.whl", hash = "sha256:596510de112c685489095da617b5bcbbac7dd6384aeebeda4df6025d0256a81b"}, + {file = "MarkupSafe-1.1.1-cp38-cp38-win_amd64.whl", hash = "sha256:e8313f01ba26fbbe36c7be1966a7b7424942f670f38e666995b88d012765b9be"}, + {file = "MarkupSafe-1.1.1.tar.gz", hash = "sha256:29872e92839765e546828bb7754a68c418d927cd064fd4708fab9fe9c8bb116b"}, +] +matplotlib = [ + {file = "matplotlib-3.3.3-cp36-cp36m-macosx_10_9_x86_64.whl", hash = "sha256:b2a5e1f637a92bb6f3526cc54cc8af0401112e81ce5cba6368a1b7908f9e18bc"}, + {file = "matplotlib-3.3.3-cp36-cp36m-manylinux1_i686.whl", hash = "sha256:c586ac1d64432f92857c3cf4478cfb0ece1ae18b740593f8a39f2f0b27c7fda5"}, + {file = "matplotlib-3.3.3-cp36-cp36m-manylinux1_x86_64.whl", hash = "sha256:9b03722c89a43a61d4d148acfc89ec5bb54cd0fd1539df25b10eb9c5fa6c393a"}, + {file = "matplotlib-3.3.3-cp36-cp36m-win32.whl", hash = "sha256:2c2c5041608cb75c39cbd0ed05256f8a563e144234a524c59d091abbfa7a868f"}, + {file = "matplotlib-3.3.3-cp36-cp36m-win_amd64.whl", hash = "sha256:c092fc4673260b1446b8578015321081d5db73b94533fe4bf9b69f44e948d174"}, + {file = "matplotlib-3.3.3-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:27c9393fada62bd0ad7c730562a0fecbd3d5aaa8d9ed80ba7d3ebb8abc4f0453"}, + {file = "matplotlib-3.3.3-cp37-cp37m-manylinux1_i686.whl", hash = "sha256:b8ba2a1dbb4660cb469fe8e1febb5119506059e675180c51396e1723ff9b79d9"}, + {file = "matplotlib-3.3.3-cp37-cp37m-manylinux1_x86_64.whl", hash = "sha256:0caa687fce6174fef9b27d45f8cc57cbc572e04e98c81db8e628b12b563d59a2"}, + {file = "matplotlib-3.3.3-cp37-cp37m-win32.whl", hash = "sha256:b7b09c61a91b742cb5460b72efd1fe26ef83c1c704f666e0af0df156b046aada"}, + {file = "matplotlib-3.3.3-cp37-cp37m-win_amd64.whl", hash = "sha256:6ffd2d80d76df2e5f9f0c0140b5af97e3b87dd29852dcdb103ec177d853ec06b"}, + {file = "matplotlib-3.3.3-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:5111d6d47a0f5b8f3e10af7a79d5e7eb7e73a22825391834734274c4f312a8a0"}, + {file = "matplotlib-3.3.3-cp38-cp38-manylinux1_i686.whl", hash = "sha256:a4fe54eab2c7129add75154823e6543b10261f9b65b2abe692d68743a4999f8c"}, + {file = "matplotlib-3.3.3-cp38-cp38-manylinux1_x86_64.whl", hash = "sha256:83e6c895d93fdf93eeff1a21ee96778ba65ef258e5d284160f7c628fee40c38f"}, + {file = "matplotlib-3.3.3-cp38-cp38-win32.whl", hash = "sha256:b26c472847911f5a7eb49e1c888c31c77c4ddf8023c1545e0e8e0367ba74fb15"}, + {file = "matplotlib-3.3.3-cp38-cp38-win_amd64.whl", hash = "sha256:09225edca87a79815822eb7d3be63a83ebd4d9d98d5aa3a15a94f4eee2435954"}, + {file = "matplotlib-3.3.3-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:eb6b6700ea454bb88333d98601e74928e06f9669c1ea231b4c4c666c1d7701b4"}, + {file = "matplotlib-3.3.3-cp39-cp39-manylinux1_i686.whl", hash = "sha256:2d31aff0c8184b05006ad756b9a4dc2a0805e94d28f3abc3187e881b6673b302"}, + {file = "matplotlib-3.3.3-cp39-cp39-manylinux1_x86_64.whl", hash = "sha256:d082f77b4ed876ae94a9373f0db96bf8768a7cca6c58fc3038f94e30ffde1880"}, + {file = "matplotlib-3.3.3-cp39-cp39-win32.whl", hash = "sha256:e71cdd402047e657c1662073e9361106c6981e9621ab8c249388dfc3ec1de07b"}, + {file = "matplotlib-3.3.3-cp39-cp39-win_amd64.whl", hash = "sha256:756ee498b9ba35460e4cbbd73f09018e906daa8537fff61da5b5bf8d5e9de5c7"}, + {file = "matplotlib-3.3.3-pp36-pypy36_pp73-macosx_10_9_x86_64.whl", hash = "sha256:7ad44f2c74c50567c694ee91c6fa16d67e7c8af6f22c656b80469ad927688457"}, + {file = "matplotlib-3.3.3-pp36-pypy36_pp73-manylinux2010_x86_64.whl", hash = "sha256:3a4c3e9be63adf8e9b305aa58fb3ec40ecc61fd0f8fd3328ce55bc30e7a2aeb0"}, + {file = "matplotlib-3.3.3-pp37-pypy37_pp73-macosx_10_9_x86_64.whl", hash = "sha256:746897fbd72bd462b888c74ed35d812ca76006b04f717cd44698cdfc99aca70d"}, + {file = "matplotlib-3.3.3-pp37-pypy37_pp73-manylinux2010_x86_64.whl", hash = "sha256:5ed3d3342698c2b1f3651f8ea6c099b0f196d16ee00e33dc3a6fee8cb01d530a"}, + {file = "matplotlib-3.3.3.tar.gz", hash = "sha256:b1b60c6476c4cfe9e5cf8ab0d3127476fd3d5f05de0f343a452badaad0e4bdec"}, +] +mccabe = [ + {file = "mccabe-0.6.1-py2.py3-none-any.whl", hash = "sha256:ab8a6258860da4b6677da4bd2fe5dc2c659cff31b3ee4f7f5d64e79735b80d42"}, + {file = "mccabe-0.6.1.tar.gz", hash = "sha256:dd8d182285a0fe56bace7f45b5e7d1a6ebcbf524e8f3bd87eb0f125271b8831f"}, +] +mistune = [ + {file = "mistune-0.8.4-py2.py3-none-any.whl", hash = "sha256:88a1051873018da288eee8538d476dffe1262495144b33ecb586c4ab266bb8d4"}, + {file = "mistune-0.8.4.tar.gz", hash = "sha256:59a3429db53c50b5c6bcc8a07f8848cb00d7dc8bdb431a4ab41920d201d4756e"}, +] +more-itertools = [ + {file = "more-itertools-8.6.0.tar.gz", hash = "sha256:b3a9005928e5bed54076e6e549c792b306fddfe72b2d1d22dd63d42d5d3899cf"}, + {file = "more_itertools-8.6.0-py3-none-any.whl", hash = "sha256:8e1a2a43b2f2727425f2b5839587ae37093f19153dc26c0927d1048ff6557330"}, +] +mpld3 = [ + {file = "mpld3-0.3.tar.gz", hash = "sha256:4d455884a211bf99b37ecc760759435c7bb6a5955de47d8daf4967e301878ab7"}, +] +multiprocess = [ + {file = "multiprocess-0.70.11.1-cp27-cp27m-macosx_10_8_x86_64.whl", hash = "sha256:8f0d0640642acc654fe2fb5cb529ebbe116468a1dd1544d484db6e79033767c8"}, + {file = "multiprocess-0.70.11.1-cp27-cp27m-manylinux1_i686.whl", hash = "sha256:4b33a0111e341fad5e3c6bb6dd7f592596f2974cc5ecddee06b9a999bac4cbb0"}, + {file = "multiprocess-0.70.11.1-cp27-cp27m-manylinux1_x86_64.whl", hash = "sha256:0eab6e0e87acba9586e5d6869d21271cc865d72d74b7f6b30b6290dffca5caae"}, + {file = "multiprocess-0.70.11.1-cp27-cp27m-win32.whl", hash = "sha256:4d97020a50a18862fbb1f84d81914a2a28f2d78bc315de9a6699459682df2a67"}, + {file = "multiprocess-0.70.11.1-cp27-cp27m-win_amd64.whl", hash = "sha256:217e96638fbfd951a203b8dc17410839e4aea8aa3fb9cc393c37e491dcac2c65"}, + {file = "multiprocess-0.70.11.1-py35-none-any.whl", hash = "sha256:ebb92b67a61b901bfc277c4525e86afba24a60638d192b62f8c332933da995f4"}, + {file = "multiprocess-0.70.11.1-py36-none-any.whl", hash = "sha256:d8e87b086373fbd19c28659391e5b8888aadeaeb88f0e448e55502578bde4920"}, + {file = "multiprocess-0.70.11.1-py37-none-any.whl", hash = "sha256:164c77448e357ebee0dc6abc7ee8c823e40e295e629a5fc6d31725109a3a7ee9"}, + {file = "multiprocess-0.70.11.1-py38-none-any.whl", hash = "sha256:7761fed45cae123aa4b7bb918e77a5cfef6fd436c65bc87453e76bf2bdc3e29e"}, + {file = "multiprocess-0.70.11.1-py39-none-any.whl", hash = "sha256:ae026110257fc551fc949d96d69160768810d9019786c8c84c0c28d1f88fab67"}, + {file = "multiprocess-0.70.11.1.zip", hash = "sha256:9d5e417f3ebce4d027a3c900995840f167f316d9f73c0a7a1fbb4ac0116298d0"}, +] +murmurhash = [ + {file = "murmurhash-1.0.5-cp36-cp36m-macosx_10_9_x86_64.whl", hash = "sha256:ef8819d15973e0d6f69688bafc097a1fae081675c1de39807028869a1320b1a9"}, + {file = "murmurhash-1.0.5-cp36-cp36m-manylinux1_x86_64.whl", hash = "sha256:76251513a2acad6c2e4b7aeffc5fcb807ee97a66cad5c2990557556555a6b7e9"}, + {file = "murmurhash-1.0.5-cp36-cp36m-manylinux2014_x86_64.whl", hash = "sha256:d58315961dc5a5e740f41f2ac5c3a0ebc61ef472f8afeb4db7eeb3b863243105"}, + {file = "murmurhash-1.0.5-cp36-cp36m-win_amd64.whl", hash = "sha256:23c56182822a1ed88e2a098ac56958dfec380696a9a943df203b9b41e4bcf5e4"}, + {file = "murmurhash-1.0.5-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:023391cfefe584ac544c1ea0936976c0119b17dd27bb8280652cef1704f76428"}, + {file = "murmurhash-1.0.5-cp37-cp37m-manylinux1_x86_64.whl", hash = "sha256:f00321998f0a6bad3fd068babf448a296d4b0b1f4dd424cab863ebe5ed54182f"}, + {file = "murmurhash-1.0.5-cp37-cp37m-manylinux2014_x86_64.whl", hash = "sha256:8381172e03c5f6f947005fb146a53c5e5a9e0d630be4a40cbf8838e9324bfe1c"}, + {file = "murmurhash-1.0.5-cp37-cp37m-win_amd64.whl", hash = "sha256:fed7578fbaa6c301f27ed80834c1f7494ea7d335e269e98b9aee477cf0b3b487"}, + {file = "murmurhash-1.0.5-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:d4c3a0242014cf4c84e9ea0ba3f13b48f02a3992de3da7b1116d11b816451195"}, + {file = "murmurhash-1.0.5-cp38-cp38-manylinux1_x86_64.whl", hash = "sha256:99e55488476a5f70e8d305fd31258f140e52f724f788bcc50c31ec846a2b3766"}, + {file = "murmurhash-1.0.5-cp38-cp38-manylinux2014_x86_64.whl", hash = "sha256:b9292c532538cf47846ca81056cfeab08b877c35fe7521d6524aa92ddcd833e2"}, + {file = "murmurhash-1.0.5-cp38-cp38-win_amd64.whl", hash = "sha256:fd17973fd4554715efd8d86b3e9200358e49e437fdb92a897ca127aced48b61c"}, + {file = "murmurhash-1.0.5-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:81474a45c4074637a6dfc8fea4cdebf091ab5aa781c2cfcb94c43b16030badd7"}, + {file = "murmurhash-1.0.5-cp39-cp39-manylinux1_x86_64.whl", hash = "sha256:a9bd2312996e6e47605af305a1e5f091eba1bdd637cdd9986aec4885cb4c5530"}, + {file = "murmurhash-1.0.5-cp39-cp39-manylinux2014_x86_64.whl", hash = "sha256:892749023da26420d194f37bfa30df1368aaac0149cfa3b2105db36b66549e37"}, + {file = "murmurhash-1.0.5-cp39-cp39-win_amd64.whl", hash = "sha256:add366944eb8ec73013a4f36e166c5a4f0f7628ffe1746bc5fe031347489e5e8"}, + {file = "murmurhash-1.0.5.tar.gz", hash = "sha256:98ec9d727bd998a35385abd56b062cf0cca216725ea7ec5068604ab566f7e97f"}, +] +mypy-extensions = [ + {file = "mypy_extensions-0.4.3-py2.py3-none-any.whl", hash = "sha256:090fedd75945a69ae91ce1303b5824f428daf5a028d2f6ab8a299250a846f15d"}, + {file = "mypy_extensions-0.4.3.tar.gz", hash = "sha256:2d82818f5bb3e369420cb3c4060a7970edba416647068eb4c5343488a6c604a8"}, +] +nbclassic = [ + {file = "nbclassic-0.2.5-py3-none-any.whl", hash = "sha256:96ffc2b5e01e06825c7558066fc66f2dff00fc7a499ad4988738fda076d5587a"}, + {file = "nbclassic-0.2.5.tar.gz", hash = "sha256:e6da2116ab76a63de62f42cf8ea93c9a0c564aaf8315834f7c52efb85b4640ab"}, +] +nbclient = [ + {file = "nbclient-0.5.1-py3-none-any.whl", hash = "sha256:4d6b116187c795c99b9dba13d46e764d596574b14c296d60670c8dfe454db364"}, + {file = "nbclient-0.5.1.tar.gz", hash = "sha256:01e2d726d16eaf2cde6db74a87e2451453547e8832d142f73f72fddcd4fe0250"}, +] +nbconvert = [ + {file = "nbconvert-6.0.7-py3-none-any.whl", hash = "sha256:39e9f977920b203baea0be67eea59f7b37a761caa542abe80f5897ce3cf6311d"}, + {file = "nbconvert-6.0.7.tar.gz", hash = "sha256:cbbc13a86dfbd4d1b5dee106539de0795b4db156c894c2c5dc382062bbc29002"}, +] +nbformat = [ + {file = "nbformat-5.0.8-py3-none-any.whl", hash = "sha256:aa9450c16d29286dc69b92ea4913c1bffe86488f90184445996ccc03a2f60382"}, + {file = "nbformat-5.0.8.tar.gz", hash = "sha256:f545b22138865bfbcc6b1ffe89ed5a2b8e2dc5d4fe876f2ca60d8e6f702a30f8"}, +] +nbsphinx = [ + {file = "nbsphinx-0.8.0-py3-none-any.whl", hash = "sha256:14ccbbd3d5944fd7e14087f67b83ea75cd41c9eb679561258237987d322e9381"}, + {file = "nbsphinx-0.8.0.tar.gz", hash = "sha256:369c16fe93af14c878d61fb3e81d838196fb35b27deade2cd7b95efe1fe56ea0"}, +] +nest-asyncio = [ + {file = "nest_asyncio-1.4.3-py3-none-any.whl", hash = "sha256:dbe032f3e9ff7f120e76be22bf6e7958e867aed1743e6894b8a9585fe8495cc9"}, + {file = "nest_asyncio-1.4.3.tar.gz", hash = "sha256:eaa09ef1353ebefae19162ad423eef7a12166bcc63866f8bff8f3635353cd9fa"}, +] +networkx = [ + {file = "networkx-2.5-py3-none-any.whl", hash = "sha256:8c5812e9f798d37c50570d15c4a69d5710a18d77bafc903ee9c5fba7454c616c"}, + {file = "networkx-2.5.tar.gz", hash = "sha256:7978955423fbc9639c10498878be59caf99b44dc304c2286162fd24b458c1602"}, +] +nlpaug = [ + {file = "nlpaug-1.1.1-py3-none-any.whl", hash = "sha256:b5601a89f2fc3b0ae60ac9a42d2fd4959ae7de36d024b34c7b2aafe52d1a2e6b"}, +] +nltk = [ + {file = "nltk-3.5.zip", hash = "sha256:845365449cd8c5f9731f7cb9f8bd6fd0767553b9d53af9eb1b3abf7700936b35"}, +] +nodeenv = [ + {file = "nodeenv-1.5.0-py2.py3-none-any.whl", hash = "sha256:5304d424c529c997bc888453aeaa6362d242b6b4631e90f3d4bf1b290f1c84a9"}, + {file = "nodeenv-1.5.0.tar.gz", hash = "sha256:ab45090ae383b716c4ef89e690c41ff8c2b257b85b309f01f3654df3d084bd7c"}, +] +notebook = [ + {file = "notebook-6.1.6-py3-none-any.whl", hash = "sha256:e6a62188e319a5d45dd2ed24719f646adf88bef8be1f654ebd0ab360ece6d7a6"}, + {file = "notebook-6.1.6.tar.gz", hash = "sha256:cf40d4f81541401db5a2fda1707ca7877157abd41f04ef7b88f02b67f3c61791"}, +] +num2words = [ + {file = "num2words-0.5.10-py3-none-any.whl", hash = "sha256:0b6e5f53f11d3005787e206d9c03382f459ef048a43c544e3db3b1e05a961548"}, + {file = "num2words-0.5.10.tar.gz", hash = "sha256:37cd4f60678f7e1045cdc3adf6acf93c8b41bf732da860f97d301f04e611cc57"}, +] +numpy = [ + {file = "numpy-1.18.5-cp35-cp35m-macosx_10_9_intel.whl", hash = "sha256:e91d31b34fc7c2c8f756b4e902f901f856ae53a93399368d9a0dc7be17ed2ca0"}, + {file = "numpy-1.18.5-cp35-cp35m-manylinux1_i686.whl", hash = "sha256:7d42ab8cedd175b5ebcb39b5208b25ba104842489ed59fbb29356f671ac93583"}, + {file = "numpy-1.18.5-cp35-cp35m-manylinux1_x86_64.whl", hash = "sha256:a78e438db8ec26d5d9d0e584b27ef25c7afa5a182d1bf4d05e313d2d6d515271"}, + {file = "numpy-1.18.5-cp35-cp35m-win32.whl", hash = "sha256:a87f59508c2b7ceb8631c20630118cc546f1f815e034193dc72390db038a5cb3"}, + {file = "numpy-1.18.5-cp35-cp35m-win_amd64.whl", hash = "sha256:965df25449305092b23d5145b9bdaeb0149b6e41a77a7d728b1644b3c99277c1"}, + {file = "numpy-1.18.5-cp36-cp36m-macosx_10_9_x86_64.whl", hash = "sha256:ac792b385d81151bae2a5a8adb2b88261ceb4976dbfaaad9ce3a200e036753dc"}, + {file = "numpy-1.18.5-cp36-cp36m-manylinux1_i686.whl", hash = "sha256:ef627986941b5edd1ed74ba89ca43196ed197f1a206a3f18cc9faf2fb84fd675"}, + {file = "numpy-1.18.5-cp36-cp36m-manylinux1_x86_64.whl", hash = "sha256:f718a7949d1c4f622ff548c572e0c03440b49b9531ff00e4ed5738b459f011e8"}, + {file = "numpy-1.18.5-cp36-cp36m-win32.whl", hash = "sha256:4064f53d4cce69e9ac613256dc2162e56f20a4e2d2086b1956dd2fcf77b7fac5"}, + {file = "numpy-1.18.5-cp36-cp36m-win_amd64.whl", hash = "sha256:b03b2c0badeb606d1232e5f78852c102c0a7989d3a534b3129e7856a52f3d161"}, + {file = "numpy-1.18.5-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:a7acefddf994af1aeba05bbbafe4ba983a187079f125146dc5859e6d817df824"}, + {file = "numpy-1.18.5-cp37-cp37m-manylinux1_i686.whl", hash = "sha256:cd49930af1d1e49a812d987c2620ee63965b619257bd76eaaa95870ca08837cf"}, + {file = "numpy-1.18.5-cp37-cp37m-manylinux1_x86_64.whl", hash = "sha256:b39321f1a74d1f9183bf1638a745b4fd6fe80efbb1f6b32b932a588b4bc7695f"}, + {file = "numpy-1.18.5-cp37-cp37m-win32.whl", hash = "sha256:cae14a01a159b1ed91a324722d746523ec757357260c6804d11d6147a9e53e3f"}, + {file = "numpy-1.18.5-cp37-cp37m-win_amd64.whl", hash = "sha256:0172304e7d8d40e9e49553901903dc5f5a49a703363ed756796f5808a06fc233"}, + {file = "numpy-1.18.5-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:e15b382603c58f24265c9c931c9a45eebf44fe2e6b4eaedbb0d025ab3255228b"}, + {file = "numpy-1.18.5-cp38-cp38-manylinux1_i686.whl", hash = "sha256:3676abe3d621fc467c4c1469ee11e395c82b2d6b5463a9454e37fe9da07cd0d7"}, + {file = "numpy-1.18.5-cp38-cp38-manylinux1_x86_64.whl", hash = "sha256:4674f7d27a6c1c52a4d1aa5f0881f1eff840d2206989bae6acb1c7668c02ebfb"}, + {file = "numpy-1.18.5-cp38-cp38-win32.whl", hash = "sha256:9c9d6531bc1886454f44aa8f809268bc481295cf9740827254f53c30104f074a"}, + {file = "numpy-1.18.5-cp38-cp38-win_amd64.whl", hash = "sha256:3dd6823d3e04b5f223e3e265b4a1eae15f104f4366edd409e5a5e413a98f911f"}, + {file = "numpy-1.18.5.zip", hash = "sha256:34e96e9dae65c4839bd80012023aadd6ee2ccb73ce7fdf3074c62f301e63120b"}, +] +oauthlib = [ + {file = "oauthlib-3.1.0-py2.py3-none-any.whl", hash = "sha256:df884cd6cbe20e32633f1db1072e9356f53638e4361bef4e8b03c9127c9328ea"}, + {file = "oauthlib-3.1.0.tar.gz", hash = "sha256:bee41cc35fcca6e988463cacc3bcb8a96224f470ca547e697b604cc697b2f889"}, +] +omegaconf = [ + {file = "omegaconf-2.0.5-py3-none-any.whl", hash = "sha256:d1a39f93e06b33ed7033311006d41bdc7a92e6c484c09327f4dc6bdcbbfe8a8e"}, + {file = "omegaconf-2.0.5.tar.gz", hash = "sha256:be2378999380395d51eedb39cfcc03d967971d9baa99d1c36f8527b09ea72709"}, +] +opt-einsum = [ + {file = "opt_einsum-3.3.0-py3-none-any.whl", hash = "sha256:2455e59e3947d3c275477df7f5205b30635e266fe6dc300e3d9f9646bfcea147"}, + {file = "opt_einsum-3.3.0.tar.gz", hash = "sha256:59f6475f77bbc37dcf7cd748519c0ec60722e91e63ca114e68821c0c54a46549"}, +] +overrides = [ + {file = "overrides-3.1.0.tar.gz", hash = "sha256:30f761124579e59884b018758c4d7794914ef02a6c038621123fec49ea7599c6"}, +] +packaging = [ + {file = "packaging-20.8-py2.py3-none-any.whl", hash = "sha256:24e0da08660a87484d1602c30bb4902d74816b6985b93de36926f5bc95741858"}, + {file = "packaging-20.8.tar.gz", hash = "sha256:78598185a7008a470d64526a8059de9aaa449238f280fc9eb6b13ba6c4109093"}, +] +pandas = [ + {file = "pandas-1.2.0-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:cba93d4fd3b0a42858b2b599495aff793fb5d94587979f45a14177d1217ba446"}, + {file = "pandas-1.2.0-cp37-cp37m-manylinux1_i686.whl", hash = "sha256:9e18631d996fe131de6cb31a8bdae18965cc8f39eb23fdfbbf42808ecc63dabf"}, + {file = "pandas-1.2.0-cp37-cp37m-manylinux1_x86_64.whl", hash = "sha256:7b54c14130a3448d81eed1348f52429c23e27188d9db6e6d4afeae792bc49c11"}, + {file = "pandas-1.2.0-cp37-cp37m-win32.whl", hash = "sha256:6c1a57e4d0d6f9633a07817c44e6b36d81c265fe4c52d0c0505513a2d0f7953c"}, + {file = "pandas-1.2.0-cp37-cp37m-win_amd64.whl", hash = "sha256:43482789c55cbabeed9482263cfc98a11e8fcae900cb63ef038948acb4a72570"}, + {file = "pandas-1.2.0-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:0be6102dd99910513e75ed6536284743ead810349c51bdeadd2a5b6649f30abb"}, + {file = "pandas-1.2.0-cp38-cp38-manylinux1_i686.whl", hash = "sha256:9c6692cea6d56da8650847172bdb148622f545e7782d17995822434c79d7a211"}, + {file = "pandas-1.2.0-cp38-cp38-manylinux1_x86_64.whl", hash = "sha256:272675a98fa4954b9fc0933df775596fc942e50015d7e75d8f19548808a2bfdf"}, + {file = "pandas-1.2.0-cp38-cp38-manylinux2014_aarch64.whl", hash = "sha256:33318fa24b192b1a4684347ff76679a7267fd4e547da9f71556a5914f0dc10e7"}, + {file = "pandas-1.2.0-cp38-cp38-win32.whl", hash = "sha256:3bc6d2be03cb75981d8cbeda09503cd9d6d699fc0dc28a65e197165ad527b7b8"}, + {file = "pandas-1.2.0-cp38-cp38-win_amd64.whl", hash = "sha256:7904ee438549b5223ce8dc008772458dd7c5cf0ccc64cf903e81202400702235"}, + {file = "pandas-1.2.0-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:f8b87d2f541cd9bc4ecfe85a561abac85c33fe4de4ce70cca36b2768af2611f5"}, + {file = "pandas-1.2.0-cp39-cp39-manylinux1_i686.whl", hash = "sha256:91fd0b94e7b98528177a05e6f65efea79d7ef9dec15ee48c7c69fc39fdd87235"}, + {file = "pandas-1.2.0-cp39-cp39-manylinux1_x86_64.whl", hash = "sha256:8f92b07cdbfa3704d85b4264e52c216cafe6c0059b0d07cdad8cb29e0b90f2b8"}, + {file = "pandas-1.2.0-cp39-cp39-win32.whl", hash = "sha256:2d8b4f532db37418121831a461fd107d826c240b098f52e7a1b4ab3d5aaa4fb2"}, + {file = "pandas-1.2.0-cp39-cp39-win_amd64.whl", hash = "sha256:616478c1bd8fe1e600f521ae2da434e021c11e7a4e5da3451d02906143d3629a"}, + {file = "pandas-1.2.0.tar.gz", hash = "sha256:e03386615b970b8b41da6a68afe717626741bb2431cec993640685614c0680e4"}, +] +pandocfilters = [ + {file = "pandocfilters-1.4.3.tar.gz", hash = "sha256:bc63fbb50534b4b1f8ebe1860889289e8af94a23bff7445259592df25a3906eb"}, +] +parso = [ + {file = "parso-0.8.1-py2.py3-none-any.whl", hash = "sha256:15b00182f472319383252c18d5913b69269590616c947747bc50bf4ac768f410"}, + {file = "parso-0.8.1.tar.gz", hash = "sha256:8519430ad07087d4c997fda3a7918f7cfa27cb58972a8c89c2a0295a1c940e9e"}, +] +pathspec = [ + {file = "pathspec-0.8.1-py2.py3-none-any.whl", hash = "sha256:aa0cb481c4041bf52ffa7b0d8fa6cd3e88a2ca4879c533c9153882ee2556790d"}, + {file = "pathspec-0.8.1.tar.gz", hash = "sha256:86379d6b86d75816baba717e64b1a3a3469deb93bb76d613c9ce79edc5cb68fd"}, +] +pexpect = [ + {file = "pexpect-4.8.0-py2.py3-none-any.whl", hash = "sha256:0b48a55dcb3c05f3329815901ea4fc1537514d6ba867a152b581d69ae3710937"}, + {file = "pexpect-4.8.0.tar.gz", hash = "sha256:fc65a43959d153d0114afe13997d439c22823a27cefceb5ff35c2178c6784c0c"}, +] +pickleshare = [ + {file = "pickleshare-0.7.5-py2.py3-none-any.whl", hash = "sha256:9649af414d74d4df115d5d718f82acb59c9d418196b7b4290ed47a12ce62df56"}, + {file = "pickleshare-0.7.5.tar.gz", hash = "sha256:87683d47965c1da65cdacaf31c8441d12b8044cdec9aca500cd78fc2c683afca"}, +] +pillow = [ + {file = "Pillow-8.0.1-cp36-cp36m-macosx_10_10_x86_64.whl", hash = "sha256:b63d4ff734263ae4ce6593798bcfee6dbfb00523c82753a3a03cbc05555a9cc3"}, + {file = "Pillow-8.0.1-cp36-cp36m-manylinux1_i686.whl", hash = "sha256:5f9403af9c790cc18411ea398a6950ee2def2a830ad0cfe6dc9122e6d528b302"}, + {file = "Pillow-8.0.1-cp36-cp36m-manylinux1_x86_64.whl", hash = "sha256:6b4a8fd632b4ebee28282a9fef4c341835a1aa8671e2770b6f89adc8e8c2703c"}, + {file = "Pillow-8.0.1-cp36-cp36m-manylinux2014_aarch64.whl", hash = "sha256:cc3ea6b23954da84dbee8025c616040d9aa5eaf34ea6895a0a762ee9d3e12e11"}, + {file = "Pillow-8.0.1-cp36-cp36m-win32.whl", hash = "sha256:d8a96747df78cda35980905bf26e72960cba6d355ace4780d4bdde3b217cdf1e"}, + {file = "Pillow-8.0.1-cp36-cp36m-win_amd64.whl", hash = "sha256:7ba0ba61252ab23052e642abdb17fd08fdcfdbbf3b74c969a30c58ac1ade7cd3"}, + {file = "Pillow-8.0.1-cp37-cp37m-macosx_10_10_x86_64.whl", hash = "sha256:795e91a60f291e75de2e20e6bdd67770f793c8605b553cb6e4387ce0cb302e09"}, + {file = "Pillow-8.0.1-cp37-cp37m-manylinux1_i686.whl", hash = "sha256:0a2e8d03787ec7ad71dc18aec9367c946ef8ef50e1e78c71f743bc3a770f9fae"}, + {file = "Pillow-8.0.1-cp37-cp37m-manylinux1_x86_64.whl", hash = "sha256:006de60d7580d81f4a1a7e9f0173dc90a932e3905cc4d47ea909bc946302311a"}, + {file = "Pillow-8.0.1-cp37-cp37m-manylinux2014_aarch64.whl", hash = "sha256:bd7bf289e05470b1bc74889d1466d9ad4a56d201f24397557b6f65c24a6844b8"}, + {file = "Pillow-8.0.1-cp37-cp37m-win32.whl", hash = "sha256:95edb1ed513e68bddc2aee3de66ceaf743590bf16c023fb9977adc4be15bd3f0"}, + {file = "Pillow-8.0.1-cp37-cp37m-win_amd64.whl", hash = "sha256:e38d58d9138ef972fceb7aeec4be02e3f01d383723965bfcef14d174c8ccd039"}, + {file = "Pillow-8.0.1-cp38-cp38-macosx_10_10_x86_64.whl", hash = "sha256:d3d07c86d4efa1facdf32aa878bd508c0dc4f87c48125cc16b937baa4e5b5e11"}, + {file = "Pillow-8.0.1-cp38-cp38-manylinux1_i686.whl", hash = "sha256:fbd922f702582cb0d71ef94442bfca57624352622d75e3be7a1e7e9360b07e72"}, + {file = "Pillow-8.0.1-cp38-cp38-manylinux1_x86_64.whl", hash = "sha256:92c882b70a40c79de9f5294dc99390671e07fc0b0113d472cbea3fde15db1792"}, + {file = "Pillow-8.0.1-cp38-cp38-manylinux2014_aarch64.whl", hash = "sha256:7c9401e68730d6c4245b8e361d3d13e1035cbc94db86b49dc7da8bec235d0015"}, + {file = "Pillow-8.0.1-cp38-cp38-win32.whl", hash = "sha256:6c1aca8231625115104a06e4389fcd9ec88f0c9befbabd80dc206c35561be271"}, + {file = "Pillow-8.0.1-cp38-cp38-win_amd64.whl", hash = "sha256:cc9ec588c6ef3a1325fa032ec14d97b7309db493782ea8c304666fb10c3bd9a7"}, + {file = "Pillow-8.0.1-cp39-cp39-macosx_10_10_x86_64.whl", hash = "sha256:eb472586374dc66b31e36e14720747595c2b265ae962987261f044e5cce644b5"}, + {file = "Pillow-8.0.1-cp39-cp39-manylinux1_i686.whl", hash = "sha256:0eeeae397e5a79dc088d8297a4c2c6f901f8fb30db47795113a4a605d0f1e5ce"}, + {file = "Pillow-8.0.1-cp39-cp39-manylinux1_x86_64.whl", hash = "sha256:81f812d8f5e8a09b246515fac141e9d10113229bc33ea073fec11403b016bcf3"}, + {file = "Pillow-8.0.1-cp39-cp39-manylinux2014_aarch64.whl", hash = "sha256:895d54c0ddc78a478c80f9c438579ac15f3e27bf442c2a9aa74d41d0e4d12544"}, + {file = "Pillow-8.0.1-cp39-cp39-win32.whl", hash = "sha256:2fb113757a369a6cdb189f8df3226e995acfed0a8919a72416626af1a0a71140"}, + {file = "Pillow-8.0.1-cp39-cp39-win_amd64.whl", hash = "sha256:59e903ca800c8cfd1ebe482349ec7c35687b95e98cefae213e271c8c7fffa021"}, + {file = "Pillow-8.0.1-pp36-pypy36_pp73-macosx_10_10_x86_64.whl", hash = "sha256:5abd653a23c35d980b332bc0431d39663b1709d64142e3652890df4c9b6970f6"}, + {file = "Pillow-8.0.1-pp36-pypy36_pp73-manylinux2010_x86_64.whl", hash = "sha256:4b0ef2470c4979e345e4e0cc1bbac65fda11d0d7b789dbac035e4c6ce3f98adb"}, + {file = "Pillow-8.0.1-pp37-pypy37_pp73-win32.whl", hash = "sha256:8de332053707c80963b589b22f8e0229f1be1f3ca862a932c1bcd48dafb18dd8"}, + {file = "Pillow-8.0.1.tar.gz", hash = "sha256:11c5c6e9b02c9dac08af04f093eb5a2f84857df70a7d4a6a6ad461aca803fb9e"}, +] +plac = [ + {file = "plac-1.1.3-py2.py3-none-any.whl", hash = "sha256:487e553017d419f35add346c4c09707e52fa53f7e7181ce1098ca27620e9ceee"}, + {file = "plac-1.1.3.tar.gz", hash = "sha256:398cb947c60c4c25e275e1f1dadf027e7096858fb260b8ece3b33bcff90d985f"}, +] +plotly = [ + {file = "plotly-4.14.1-py2.py3-none-any.whl", hash = "sha256:f0462e494d324a1649dee0208ab04fc7a158740488373d31a499f6d691167a04"}, + {file = "plotly-4.14.1.tar.gz", hash = "sha256:f2a38726ddc7ce185a277c78a41b50bb8cfcfa4f53b45a481417401cadc0454c"}, +] +pluggy = [ + {file = "pluggy-0.13.1-py2.py3-none-any.whl", hash = "sha256:966c145cd83c96502c3c3868f50408687b38434af77734af1e9ca461a4081d2d"}, + {file = "pluggy-0.13.1.tar.gz", hash = "sha256:15b2acde666561e1298d71b523007ed7364de07029219b604cf808bfa1c765b0"}, +] +pre-commit = [ + {file = "pre_commit-2.9.3-py2.py3-none-any.whl", hash = "sha256:6c86d977d00ddc8a60d68eec19f51ef212d9462937acf3ea37c7adec32284ac0"}, + {file = "pre_commit-2.9.3.tar.gz", hash = "sha256:ee784c11953e6d8badb97d19bc46b997a3a9eded849881ec587accd8608d74a4"}, +] +preshed = [ + {file = "preshed-3.0.5-cp36-cp36m-macosx_10_9_x86_64.whl", hash = "sha256:572899224578d30f6a67fadecb3d62b824866b4d2b6bad73f71abf7585db1389"}, + {file = "preshed-3.0.5-cp36-cp36m-manylinux1_x86_64.whl", hash = "sha256:67c11e384ce4c008bc487ba3a29bafdfe038b9a2546ccfe0fe2160480b356fed"}, + {file = "preshed-3.0.5-cp36-cp36m-manylinux2014_x86_64.whl", hash = "sha256:6e833f1632a1d0232bdc6df6c3542fb130ef044d8656b24576d9fd19e5f1e0d1"}, + {file = "preshed-3.0.5-cp36-cp36m-win_amd64.whl", hash = "sha256:1ce0846cb7ebb2ea913d44ec2e296098c285443ecdea80ddf02656bbef4deacb"}, + {file = "preshed-3.0.5-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:8a560850b8c53c1487ba51c2b0f5769535512b36d3b129ad5796b64653abe2f9"}, + {file = "preshed-3.0.5-cp37-cp37m-manylinux1_x86_64.whl", hash = "sha256:6f126bcc414a0304b54956f9dac2628a0f9bef1657d1b3a3837fc82b791aa2a1"}, + {file = "preshed-3.0.5-cp37-cp37m-manylinux2014_x86_64.whl", hash = "sha256:1bdededa7fd81f26a42bc9d11d542657c74746b7ea7fc2b2ca6d0ddbf1f93792"}, + {file = "preshed-3.0.5-cp37-cp37m-win_amd64.whl", hash = "sha256:9ebf444f8487782c84d7b5acb1d7195e603155882fafc4697344199eeeafbe5f"}, + {file = "preshed-3.0.5-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:8a3adffde3126c2a0ab7d57cab1d605cb5f63da1ba88088ad3cf8debfd9aa4dc"}, + {file = "preshed-3.0.5-cp38-cp38-manylinux1_x86_64.whl", hash = "sha256:56b9603517bb2a364418163236d6a147a1d722ff7546cbe085e76e25ae118e89"}, + {file = "preshed-3.0.5-cp38-cp38-manylinux2014_x86_64.whl", hash = "sha256:5e06a49477bd257eea02bf823b5d3e201d00a19d6976523a58da8606b2358481"}, + {file = "preshed-3.0.5-cp38-cp38-win_amd64.whl", hash = "sha256:ca4a7681b643b8356e7dfdab9cf668b2b34bd07ef4b09ebed44c8aeb3b1626ee"}, + {file = "preshed-3.0.5-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:85074eebf90a858a6b68242f1ae265ca99e1af45bf9dafcb9a83d49b0815a2e1"}, + {file = "preshed-3.0.5-cp39-cp39-manylinux1_x86_64.whl", hash = "sha256:12cbe1e378b4f1c6b06f5e4130408befe916e55ea1616e6aa63c5cd0ccd9c927"}, + {file = "preshed-3.0.5-cp39-cp39-manylinux2014_x86_64.whl", hash = "sha256:30f0c8ea85113d0565a1e3eb6222d00513ec39b56f3f9a2615e304575e65422e"}, + {file = "preshed-3.0.5-cp39-cp39-win_amd64.whl", hash = "sha256:fb4d2e82add82d63b2c97802b759a58ff200d06b632e2edc48a9ced1e6472faf"}, + {file = "preshed-3.0.5.tar.gz", hash = "sha256:c6d3dba39ed5059aaf99767017b9568c75b2d0780c3481e204b1daecde00360e"}, +] +progressbar = [ + {file = "progressbar-2.5.tar.gz", hash = "sha256:5d81cb529da2e223b53962afd6c8ca0f05c6670e40309a7219eacc36af9b6c63"}, +] +prometheus-client = [ + {file = "prometheus_client-0.9.0-py2.py3-none-any.whl", hash = "sha256:b08c34c328e1bf5961f0b4352668e6c8f145b4a087e09b7296ef62cbe4693d35"}, + {file = "prometheus_client-0.9.0.tar.gz", hash = "sha256:9da7b32f02439d8c04f7777021c304ed51d9ec180604700c1ba72a4d44dceb03"}, +] +prompt-toolkit = [ + {file = "prompt_toolkit-3.0.8-py3-none-any.whl", hash = "sha256:7debb9a521e0b1ee7d2fe96ee4bd60ef03c6492784de0547337ca4433e46aa63"}, + {file = "prompt_toolkit-3.0.8.tar.gz", hash = "sha256:25c95d2ac813909f813c93fde734b6e44406d1477a9faef7c915ff37d39c0a8c"}, +] +protobuf = [ + {file = "protobuf-3.14.0-cp27-cp27m-macosx_10_9_x86_64.whl", hash = "sha256:629b03fd3caae7f815b0c66b41273f6b1900a579e2ccb41ef4493a4f5fb84f3a"}, + {file = "protobuf-3.14.0-cp27-cp27mu-manylinux1_x86_64.whl", hash = "sha256:5b7a637212cc9b2bcf85dd828b1178d19efdf74dbfe1ddf8cd1b8e01fdaaa7f5"}, + {file = "protobuf-3.14.0-cp35-cp35m-macosx_10_9_intel.whl", hash = "sha256:43b554b9e73a07ba84ed6cf25db0ff88b1e06be610b37656e292e3cbb5437472"}, + {file = "protobuf-3.14.0-cp35-cp35m-manylinux1_x86_64.whl", hash = "sha256:5e9806a43232a1fa0c9cf5da8dc06f6910d53e4390be1fa06f06454d888a9142"}, + {file = "protobuf-3.14.0-cp35-cp35m-win32.whl", hash = "sha256:1c51fda1bbc9634246e7be6016d860be01747354ed7015ebe38acf4452f470d2"}, + {file = "protobuf-3.14.0-cp35-cp35m-win_amd64.whl", hash = "sha256:4b74301b30513b1a7494d3055d95c714b560fbb630d8fb9956b6f27992c9f980"}, + {file = "protobuf-3.14.0-cp36-cp36m-macosx_10_9_x86_64.whl", hash = "sha256:86a75477addde4918e9a1904e5c6af8d7b691f2a3f65587d73b16100fbe4c3b2"}, + {file = "protobuf-3.14.0-cp36-cp36m-manylinux1_x86_64.whl", hash = "sha256:ecc33531a213eee22ad60e0e2aaea6c8ba0021f0cce35dbf0ab03dee6e2a23a1"}, + {file = "protobuf-3.14.0-cp36-cp36m-win32.whl", hash = "sha256:72230ed56f026dd664c21d73c5db73ebba50d924d7ba6b7c0d81a121e390406e"}, + {file = "protobuf-3.14.0-cp36-cp36m-win_amd64.whl", hash = "sha256:0fc96785262042e4863b3f3b5c429d4636f10d90061e1840fce1baaf59b1a836"}, + {file = "protobuf-3.14.0-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:4e75105c9dfe13719b7293f75bd53033108f4ba03d44e71db0ec2a0e8401eafd"}, + {file = "protobuf-3.14.0-cp37-cp37m-manylinux1_x86_64.whl", hash = "sha256:2a7e2fe101a7ace75e9327b9c946d247749e564a267b0515cf41dfe450b69bac"}, + {file = "protobuf-3.14.0-cp37-cp37m-win32.whl", hash = "sha256:b0d5d35faeb07e22a1ddf8dce620860c8fe145426c02d1a0ae2688c6e8ede36d"}, + {file = "protobuf-3.14.0-cp37-cp37m-win_amd64.whl", hash = "sha256:8971c421dbd7aad930c9bd2694122f332350b6ccb5202a8b7b06f3f1a5c41ed5"}, + {file = "protobuf-3.14.0-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:9616f0b65a30851e62f1713336c931fcd32c057202b7ff2cfbfca0fc7d5e3043"}, + {file = "protobuf-3.14.0-cp38-cp38-manylinux1_x86_64.whl", hash = "sha256:22bcd2e284b3b1d969c12e84dc9b9a71701ec82d8ce975fdda19712e1cfd4e00"}, + {file = "protobuf-3.14.0-py2.py3-none-any.whl", hash = "sha256:0e247612fadda953047f53301a7b0407cb0c3cb4ae25a6fde661597a04039b3c"}, + {file = "protobuf-3.14.0.tar.gz", hash = "sha256:1d63eb389347293d8915fb47bee0951c7b5dab522a4a60118b9a18f33e21f8ce"}, +] +ptyprocess = [ + {file = "ptyprocess-0.7.0-py2.py3-none-any.whl", hash = "sha256:4b41f3967fce3af57cc7e94b888626c18bf37a083e3651ca8feeb66d492fef35"}, + {file = "ptyprocess-0.7.0.tar.gz", hash = "sha256:5c5d0a3b48ceee0b48485e0c26037c0acd7d29765ca3fbb5cb3831d347423220"}, +] +py = [ + {file = "py-1.10.0-py2.py3-none-any.whl", hash = "sha256:3b80836aa6d1feeaa108e046da6423ab8f6ceda6468545ae8d02d9d58d18818a"}, + {file = "py-1.10.0.tar.gz", hash = "sha256:21b81bda15b66ef5e1a777a21c4dcd9c20ad3efd0b3f817e7a809035269e1bd3"}, +] +py-rouge = [ + {file = "py-rouge-1.1.tar.gz", hash = "sha256:b6caf2f031c45f699a9481c8962b8c33688165a3f2a22e1bfbaede8e073d6bb0"}, + {file = "py_rouge-1.1-py3-none-any.whl", hash = "sha256:9ae2a859a9edc6d25f3908e48706f7d82d6e78ea18954560c4cb21897dc1d270"}, +] +pyahocorasick = [ + {file = "pyahocorasick-1.4.0.tar.gz", hash = "sha256:f9431a20e47e893cadd29f367825e882dbc6fc324a3c24c41e3ff9648e5d04b2"}, +] +pyarrow = [ + {file = "pyarrow-2.0.0-cp35-cp35m-macosx_10_13_intel.whl", hash = "sha256:6afc71cc9c234f3cdbe971297468755ec3392966cb19d3a6caf42fd7dbc6aaa9"}, + {file = "pyarrow-2.0.0-cp35-cp35m-macosx_10_9_intel.whl", hash = "sha256:eb05038b750a6e16a9680f9d2c40d050796284ea1f94690da8f4f28805af0495"}, + {file = "pyarrow-2.0.0-cp35-cp35m-manylinux1_x86_64.whl", hash = "sha256:3e33e9003794c9062f4c963a10f2a0d787b83d4d1a517a375294f2293180b778"}, + {file = "pyarrow-2.0.0-cp35-cp35m-manylinux2010_x86_64.whl", hash = "sha256:ffb306951b5925a0638dc2ef1ab7ce8033f39e5b4e0fef5787b91ef4fa7da19d"}, + {file = "pyarrow-2.0.0-cp35-cp35m-manylinux2014_x86_64.whl", hash = "sha256:dc0d04c42632e65c4fcbe2f82c70109c5f347652844ead285bc1285dc3a67660"}, + {file = "pyarrow-2.0.0-cp35-cp35m-win_amd64.whl", hash = "sha256:916b593a24f2812b9a75adef1143b1dd89d799e1803282fea2829c5dc0b828ea"}, + {file = "pyarrow-2.0.0-cp36-cp36m-macosx_10_13_x86_64.whl", hash = "sha256:c801e59ec4e8d9d871e299726a528c3ba3139f2ce2d9cdab101f8483c52eec7c"}, + {file = "pyarrow-2.0.0-cp36-cp36m-macosx_10_9_x86_64.whl", hash = "sha256:0bf43e520c33ceb1dd47263a5326830fca65f18d827f7f7b8fe7e64fc4364d88"}, + {file = "pyarrow-2.0.0-cp36-cp36m-manylinux1_x86_64.whl", hash = "sha256:0b358773eb9fb1b31c8217c6c8c0b4681c3dff80562dc23ad5b379f0279dad69"}, + {file = "pyarrow-2.0.0-cp36-cp36m-manylinux2010_x86_64.whl", hash = "sha256:1000e491e9a539588ec33a2c2603cf05f1d4629aef375345bfd64f2ab7bc8529"}, + {file = "pyarrow-2.0.0-cp36-cp36m-manylinux2014_x86_64.whl", hash = "sha256:ce0462cec7f81c4ff87ce1a95c82a8d467606dce6c72e92906ac251c6115f32b"}, + {file = "pyarrow-2.0.0-cp36-cp36m-win_amd64.whl", hash = "sha256:16ec87163a2fb4abd48bf79cbdf70a7455faa83740e067c2280cfa45a63ed1f3"}, + {file = "pyarrow-2.0.0-cp37-cp37m-macosx_10_13_x86_64.whl", hash = "sha256:acdd18fd83c0be0b53a8e734c0a650fb27bbf4e7d96a8f7eb0a7506ea58bd594"}, + {file = "pyarrow-2.0.0-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:9a8d3c6baa6e159017d97e8a028ae9eaa2811d8f1ab3d22710c04dcddc0dd7a1"}, + {file = "pyarrow-2.0.0-cp37-cp37m-manylinux1_x86_64.whl", hash = "sha256:652c5dff97624375ed0f97cc8ad6f88ee01953f15c17083917735de171f03fe0"}, + {file = "pyarrow-2.0.0-cp37-cp37m-manylinux2010_x86_64.whl", hash = "sha256:00d8fb8a9b2d9bb2f0ced2765b62c5d72689eed06c47315bca004584b0ccda60"}, + {file = "pyarrow-2.0.0-cp37-cp37m-manylinux2014_x86_64.whl", hash = "sha256:fb69672e69e1b752744ee1e236fdf03aad78ffec905fc5c19adbaf88bac4d0fd"}, + {file = "pyarrow-2.0.0-cp37-cp37m-win_amd64.whl", hash = "sha256:ccff3a72f70ebfcc002bf75f5ad1248065e5c9c14e0dcfa599a438ea221c5658"}, + {file = "pyarrow-2.0.0-cp38-cp38-macosx_10_13_x86_64.whl", hash = "sha256:bc8c3713086e4a137b3fda4b149440458b1b0bd72f67b1afa2c7068df1edc060"}, + {file = "pyarrow-2.0.0-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:9f4ba9ab479c0172e532f5d73c68e30a31c16b01e09bb21eba9201561231f722"}, + {file = "pyarrow-2.0.0-cp38-cp38-manylinux1_x86_64.whl", hash = "sha256:0db5156a66615591a4a8c66a9a30890a364a259de8d2a6ccb873c7d1740e6c75"}, + {file = "pyarrow-2.0.0-cp38-cp38-manylinux2010_x86_64.whl", hash = "sha256:cf9bf10daadbbf1a360ac1c7dab0b4f8381d81a3f452737bd6ed310d57a88be8"}, + {file = "pyarrow-2.0.0-cp38-cp38-manylinux2014_x86_64.whl", hash = "sha256:dd661b6598ce566c6f41d31cc1fc4482308613c2c0c808bd8db33b0643192f84"}, + {file = "pyarrow-2.0.0-cp38-cp38-win_amd64.whl", hash = "sha256:14b02a629986c25e045f81771799e07a8bb3f339898c111314066436769a3dd4"}, + {file = "pyarrow-2.0.0.tar.gz", hash = "sha256:b5e6cd217457e8febcc98a6c279b96f72d5c31a24cd2bffd8d3b2da701d2025c"}, +] +pyasn1 = [ + {file = "pyasn1-0.4.8-py2.4.egg", hash = "sha256:fec3e9d8e36808a28efb59b489e4528c10ad0f480e57dcc32b4de5c9d8c9fdf3"}, + {file = "pyasn1-0.4.8-py2.5.egg", hash = "sha256:0458773cfe65b153891ac249bcf1b5f8f320b7c2ce462151f8fa74de8934becf"}, + {file = "pyasn1-0.4.8-py2.6.egg", hash = "sha256:5c9414dcfede6e441f7e8f81b43b34e834731003427e5b09e4e00e3172a10f00"}, + {file = "pyasn1-0.4.8-py2.7.egg", hash = "sha256:6e7545f1a61025a4e58bb336952c5061697da694db1cae97b116e9c46abcf7c8"}, + {file = "pyasn1-0.4.8-py2.py3-none-any.whl", hash = "sha256:39c7e2ec30515947ff4e87fb6f456dfc6e84857d34be479c9d4a4ba4bf46aa5d"}, + {file = "pyasn1-0.4.8-py3.1.egg", hash = "sha256:78fa6da68ed2727915c4767bb386ab32cdba863caa7dbe473eaae45f9959da86"}, + {file = "pyasn1-0.4.8-py3.2.egg", hash = "sha256:08c3c53b75eaa48d71cf8c710312316392ed40899cb34710d092e96745a358b7"}, + {file = "pyasn1-0.4.8-py3.3.egg", hash = "sha256:03840c999ba71680a131cfaee6fab142e1ed9bbd9c693e285cc6aca0d555e576"}, + {file = "pyasn1-0.4.8-py3.4.egg", hash = "sha256:7ab8a544af125fb704feadb008c99a88805126fb525280b2270bb25cc1d78a12"}, + {file = "pyasn1-0.4.8-py3.5.egg", hash = "sha256:e89bf84b5437b532b0803ba5c9a5e054d21fec423a89952a74f87fa2c9b7bce2"}, + {file = "pyasn1-0.4.8-py3.6.egg", hash = "sha256:014c0e9976956a08139dc0712ae195324a75e142284d5f87f1a87ee1b068a359"}, + {file = "pyasn1-0.4.8-py3.7.egg", hash = "sha256:99fcc3c8d804d1bc6d9a099921e39d827026409a58f2a720dcdb89374ea0c776"}, + {file = "pyasn1-0.4.8.tar.gz", hash = "sha256:aef77c9fb94a3ac588e87841208bdec464471d9871bd5050a287cc9a475cd0ba"}, +] +pyasn1-modules = [ + {file = "pyasn1-modules-0.2.8.tar.gz", hash = "sha256:905f84c712230b2c592c19470d3ca8d552de726050d1d1716282a1f6146be65e"}, + {file = "pyasn1_modules-0.2.8-py2.4.egg", hash = "sha256:0fe1b68d1e486a1ed5473f1302bd991c1611d319bba158e98b106ff86e1d7199"}, + {file = "pyasn1_modules-0.2.8-py2.5.egg", hash = "sha256:fe0644d9ab041506b62782e92b06b8c68cca799e1a9636ec398675459e031405"}, + {file = "pyasn1_modules-0.2.8-py2.6.egg", hash = "sha256:a99324196732f53093a84c4369c996713eb8c89d360a496b599fb1a9c47fc3eb"}, + {file = "pyasn1_modules-0.2.8-py2.7.egg", hash = "sha256:0845a5582f6a02bb3e1bde9ecfc4bfcae6ec3210dd270522fee602365430c3f8"}, + {file = "pyasn1_modules-0.2.8-py2.py3-none-any.whl", hash = "sha256:a50b808ffeb97cb3601dd25981f6b016cbb3d31fbf57a8b8a87428e6158d0c74"}, + {file = "pyasn1_modules-0.2.8-py3.1.egg", hash = "sha256:f39edd8c4ecaa4556e989147ebf219227e2cd2e8a43c7e7fcb1f1c18c5fd6a3d"}, + {file = "pyasn1_modules-0.2.8-py3.2.egg", hash = "sha256:b80486a6c77252ea3a3e9b1e360bc9cf28eaac41263d173c032581ad2f20fe45"}, + {file = "pyasn1_modules-0.2.8-py3.3.egg", hash = "sha256:65cebbaffc913f4fe9e4808735c95ea22d7a7775646ab690518c056784bc21b4"}, + {file = "pyasn1_modules-0.2.8-py3.4.egg", hash = "sha256:15b7c67fabc7fc240d87fb9aabf999cf82311a6d6fb2c70d00d3d0604878c811"}, + {file = "pyasn1_modules-0.2.8-py3.5.egg", hash = "sha256:426edb7a5e8879f1ec54a1864f16b882c2837bfd06eee62f2c982315ee2473ed"}, + {file = "pyasn1_modules-0.2.8-py3.6.egg", hash = "sha256:cbac4bc38d117f2a49aeedec4407d23e8866ea4ac27ff2cf7fb3e5b570df19e0"}, + {file = "pyasn1_modules-0.2.8-py3.7.egg", hash = "sha256:c29a5e5cc7a3f05926aff34e097e84f8589cd790ce0ed41b67aed6857b26aafd"}, +] +pycodestyle = [ + {file = "pycodestyle-2.6.0-py2.py3-none-any.whl", hash = "sha256:2295e7b2f6b5bd100585ebcb1f616591b652db8a741695b3d8f5d28bdc934367"}, + {file = "pycodestyle-2.6.0.tar.gz", hash = "sha256:c58a7d2815e0e8d7972bf1803331fb0152f867bd89adf8a01dfd55085434192e"}, +] +pycparser = [ + {file = "pycparser-2.20-py2.py3-none-any.whl", hash = "sha256:7582ad22678f0fcd81102833f60ef8d0e57288b6b5fb00323d101be910e35705"}, + {file = "pycparser-2.20.tar.gz", hash = "sha256:2d475327684562c3a96cc71adf7dc8c4f0565175cf86b6d7a404ff4c771f15f0"}, +] +pyflakes = [ + {file = "pyflakes-2.2.0-py2.py3-none-any.whl", hash = "sha256:0d94e0e05a19e57a99444b6ddcf9a6eb2e5c68d3ca1e98e90707af8152c90a92"}, + {file = "pyflakes-2.2.0.tar.gz", hash = "sha256:35b2d75ee967ea93b55750aa9edbbf72813e06a66ba54438df2cfac9e3c27fc8"}, +] +pygments = [ + {file = "Pygments-2.7.3-py3-none-any.whl", hash = "sha256:f275b6c0909e5dafd2d6269a656aa90fa58ebf4a74f8fcf9053195d226b24a08"}, + {file = "Pygments-2.7.3.tar.gz", hash = "sha256:ccf3acacf3782cbed4a989426012f1c535c9a90d3a7fc3f16d231b9372d2b716"}, +] +pyparsing = [ + {file = "pyparsing-2.4.7-py2.py3-none-any.whl", hash = "sha256:ef9d7589ef3c200abe66653d3f1ab1033c3c419ae9b9bdb1240a85b024efc88b"}, + {file = "pyparsing-2.4.7.tar.gz", hash = "sha256:c203ec8783bf771a155b207279b9bccb8dea02d8f0c9e5f8ead507bc3246ecc1"}, +] +pyrsistent = [ + {file = "pyrsistent-0.17.3.tar.gz", hash = "sha256:2e636185d9eb976a18a8a8e96efce62f2905fea90041958d8cc2a189756ebf3e"}, +] +pysocks = [ + {file = "PySocks-1.7.1-py27-none-any.whl", hash = "sha256:08e69f092cc6dbe92a0fdd16eeb9b9ffbc13cadfe5ca4c7bd92ffb078b293299"}, + {file = "PySocks-1.7.1-py3-none-any.whl", hash = "sha256:2725bd0a9925919b9b51739eea5f9e2bae91e83288108a9ad338b2e3a4435ee5"}, + {file = "PySocks-1.7.1.tar.gz", hash = "sha256:3f8804571ebe159c380ac6de37643bb4685970655d3bba243530d6558b799aa0"}, +] +pytest = [ + {file = "pytest-6.2.1-py3-none-any.whl", hash = "sha256:1969f797a1a0dbd8ccf0fecc80262312729afea9c17f1d70ebf85c5e76c6f7c8"}, + {file = "pytest-6.2.1.tar.gz", hash = "sha256:66e419b1899bc27346cb2c993e12c5e5e8daba9073c1fbce33b9807abc95c306"}, +] +pytest-cov = [ + {file = "pytest-cov-2.10.1.tar.gz", hash = "sha256:47bd0ce14056fdd79f93e1713f88fad7bdcc583dcd7783da86ef2f085a0bb88e"}, + {file = "pytest_cov-2.10.1-py2.py3-none-any.whl", hash = "sha256:45ec2d5182f89a81fc3eb29e3d1ed3113b9e9a873bcddb2a71faaab066110191"}, +] +python-dateutil = [ + {file = "python-dateutil-2.8.1.tar.gz", hash = "sha256:73ebfe9dbf22e832286dafa60473e4cd239f8592f699aa5adaf10050e6e1823c"}, + {file = "python_dateutil-2.8.1-py2.py3-none-any.whl", hash = "sha256:75bb3f31ea686f1197762692a9ee6a7550b59fc6ca3a1f4b5d7e32fb98e2da2a"}, +] +python-levenshtein = [ + {file = "python-Levenshtein-0.12.0.tar.gz", hash = "sha256:033a11de5e3d19ea25c9302d11224e1a1898fe5abd23c61c7c360c25195e3eb1"}, +] +pytorch-lightning = [ + {file = "pytorch-lightning-1.1.2.tar.gz", hash = "sha256:00f8d47277f414d572b169f24ee09efa54e3f0cc80144435d27d1825472dd5b1"}, + {file = "pytorch_lightning-1.1.2-py3-none-any.whl", hash = "sha256:34070e1a8a7cddc5d55b1ffa8692ae54ead53c493a6f3e0bdb553017be9bfefc"}, +] +pytz = [ + {file = "pytz-2020.5-py2.py3-none-any.whl", hash = "sha256:16962c5fb8db4a8f63a26646d8886e9d769b6c511543557bc84e9569fb9a9cb4"}, + {file = "pytz-2020.5.tar.gz", hash = "sha256:180befebb1927b16f6b57101720075a984c019ac16b1b7575673bea42c6c3da5"}, +] +pywin32 = [ + {file = "pywin32-300-cp35-cp35m-win32.whl", hash = "sha256:1c204a81daed2089e55d11eefa4826c05e604d27fe2be40b6bf8db7b6a39da63"}, + {file = "pywin32-300-cp35-cp35m-win_amd64.whl", hash = "sha256:350c5644775736351b77ba68da09a39c760d75d2467ecec37bd3c36a94fbed64"}, + {file = "pywin32-300-cp36-cp36m-win32.whl", hash = "sha256:a3b4c48c852d4107e8a8ec980b76c94ce596ea66d60f7a697582ea9dce7e0db7"}, + {file = "pywin32-300-cp36-cp36m-win_amd64.whl", hash = "sha256:27a30b887afbf05a9cbb05e3ffd43104a9b71ce292f64a635389dbad0ed1cd85"}, + {file = "pywin32-300-cp37-cp37m-win32.whl", hash = "sha256:d7e8c7efc221f10d6400c19c32a031add1c4a58733298c09216f57b4fde110dc"}, + {file = "pywin32-300-cp37-cp37m-win_amd64.whl", hash = "sha256:8151e4d7a19262d6694162d6da85d99a16f8b908949797fd99c83a0bfaf5807d"}, + {file = "pywin32-300-cp38-cp38-win32.whl", hash = "sha256:fbb3b1b0fbd0b4fc2a3d1d81fe0783e30062c1abed1d17c32b7879d55858cfae"}, + {file = "pywin32-300-cp38-cp38-win_amd64.whl", hash = "sha256:60a8fa361091b2eea27f15718f8eb7f9297e8d51b54dbc4f55f3d238093d5190"}, + {file = "pywin32-300-cp39-cp39-win32.whl", hash = "sha256:638b68eea5cfc8def537e43e9554747f8dee786b090e47ead94bfdafdb0f2f50"}, + {file = "pywin32-300-cp39-cp39-win_amd64.whl", hash = "sha256:b1609ce9bd5c411b81f941b246d683d6508992093203d4eb7f278f4ed1085c3f"}, +] +pywinpty = [ + {file = "pywinpty-0.5.7-cp27-cp27m-win32.whl", hash = "sha256:b358cb552c0f6baf790de375fab96524a0498c9df83489b8c23f7f08795e966b"}, + {file = "pywinpty-0.5.7-cp27-cp27m-win_amd64.whl", hash = "sha256:1e525a4de05e72016a7af27836d512db67d06a015aeaf2fa0180f8e6a039b3c2"}, + {file = "pywinpty-0.5.7-cp35-cp35m-win32.whl", hash = "sha256:2740eeeb59297593a0d3f762269b01d0285c1b829d6827445fcd348fb47f7e70"}, + {file = "pywinpty-0.5.7-cp35-cp35m-win_amd64.whl", hash = "sha256:33df97f79843b2b8b8bc5c7aaf54adec08cc1bae94ee99dfb1a93c7a67704d95"}, + {file = "pywinpty-0.5.7-cp36-cp36m-win32.whl", hash = "sha256:e854211df55d107f0edfda8a80b39dfc87015bef52a8fe6594eb379240d81df2"}, + {file = "pywinpty-0.5.7-cp36-cp36m-win_amd64.whl", hash = "sha256:dbd838de92de1d4ebf0dce9d4d5e4fc38d0b7b1de837947a18b57a882f219139"}, + {file = "pywinpty-0.5.7-cp37-cp37m-win32.whl", hash = "sha256:5fb2c6c6819491b216f78acc2c521b9df21e0f53b9a399d58a5c151a3c4e2a2d"}, + {file = "pywinpty-0.5.7-cp37-cp37m-win_amd64.whl", hash = "sha256:dd22c8efacf600730abe4a46c1388355ce0d4ab75dc79b15d23a7bd87bf05b48"}, + {file = "pywinpty-0.5.7-cp38-cp38-win_amd64.whl", hash = "sha256:8fc5019ff3efb4f13708bd3b5ad327589c1a554cb516d792527361525a7cb78c"}, + {file = "pywinpty-0.5.7.tar.gz", hash = "sha256:2d7e9c881638a72ffdca3f5417dd1563b60f603e1b43e5895674c2a1b01f95a0"}, +] +pyyaml = [ + {file = "PyYAML-5.3.1-cp27-cp27m-win32.whl", hash = "sha256:74809a57b329d6cc0fdccee6318f44b9b8649961fa73144a98735b0aaf029f1f"}, + {file = "PyYAML-5.3.1-cp27-cp27m-win_amd64.whl", hash = "sha256:240097ff019d7c70a4922b6869d8a86407758333f02203e0fc6ff79c5dcede76"}, + {file = "PyYAML-5.3.1-cp35-cp35m-win32.whl", hash = "sha256:4f4b913ca1a7319b33cfb1369e91e50354d6f07a135f3b901aca02aa95940bd2"}, + {file = "PyYAML-5.3.1-cp35-cp35m-win_amd64.whl", hash = "sha256:cc8955cfbfc7a115fa81d85284ee61147059a753344bc51098f3ccd69b0d7e0c"}, + {file = "PyYAML-5.3.1-cp36-cp36m-win32.whl", hash = "sha256:7739fc0fa8205b3ee8808aea45e968bc90082c10aef6ea95e855e10abf4a37b2"}, + {file = "PyYAML-5.3.1-cp36-cp36m-win_amd64.whl", hash = "sha256:69f00dca373f240f842b2931fb2c7e14ddbacd1397d57157a9b005a6a9942648"}, + {file = "PyYAML-5.3.1-cp37-cp37m-win32.whl", hash = "sha256:d13155f591e6fcc1ec3b30685d50bf0711574e2c0dfffd7644babf8b5102ca1a"}, + {file = "PyYAML-5.3.1-cp37-cp37m-win_amd64.whl", hash = "sha256:73f099454b799e05e5ab51423c7bcf361c58d3206fa7b0d555426b1f4d9a3eaf"}, + {file = "PyYAML-5.3.1-cp38-cp38-win32.whl", hash = "sha256:06a0d7ba600ce0b2d2fe2e78453a470b5a6e000a985dd4a4e54e436cc36b0e97"}, + {file = "PyYAML-5.3.1-cp38-cp38-win_amd64.whl", hash = "sha256:95f71d2af0ff4227885f7a6605c37fd53d3a106fcab511b8860ecca9fcf400ee"}, + {file = "PyYAML-5.3.1-cp39-cp39-win32.whl", hash = "sha256:ad9c67312c84def58f3c04504727ca879cb0013b2517c85a9a253f0cb6380c0a"}, + {file = "PyYAML-5.3.1-cp39-cp39-win_amd64.whl", hash = "sha256:6034f55dab5fea9e53f436aa68fa3ace2634918e8b5994d82f3621c04ff5ed2e"}, + {file = "PyYAML-5.3.1.tar.gz", hash = "sha256:b8eac752c5e14d3eca0e6dd9199cd627518cb5ec06add0de9d32baeee6fe645d"}, +] +pyzmq = [ + {file = "pyzmq-20.0.0-cp35-cp35m-macosx_10_9_intel.whl", hash = "sha256:523d542823cabb94065178090e05347bd204365f6e7cb260f0071c995d392fc2"}, + {file = "pyzmq-20.0.0-cp35-cp35m-manylinux1_i686.whl", hash = "sha256:225774a48ed7414c0395335e7123ef8c418dbcbe172caabdc2496133b03254c2"}, + {file = "pyzmq-20.0.0-cp35-cp35m-manylinux1_x86_64.whl", hash = "sha256:bc7dd697356b31389d5118b9bcdef3e8d8079e8181800c4e8d72dccd56e1ff68"}, + {file = "pyzmq-20.0.0-cp35-cp35m-win32.whl", hash = "sha256:d81184489369ec325bd50ba1c935361e63f31f578430b9ad95471899361a8253"}, + {file = "pyzmq-20.0.0-cp35-cp35m-win_amd64.whl", hash = "sha256:7113eb93dcd0a5750c65d123ed0099e036a3a3f2dcb48afedd025ffa125c983b"}, + {file = "pyzmq-20.0.0-cp36-cp36m-macosx_10_9_intel.whl", hash = "sha256:b62113eeb9a0649cebed9b21fd578f3a0175ef214a2a91dcb7b31bbf55805295"}, + {file = "pyzmq-20.0.0-cp36-cp36m-manylinux1_i686.whl", hash = "sha256:f0beef935efe78a63c785bb21ed56c1c24448511383e3994927c8bb2caf5e714"}, + {file = "pyzmq-20.0.0-cp36-cp36m-manylinux1_x86_64.whl", hash = "sha256:46250789730489009fe139cbf576679557c070a6a3628077d09a4153d52fd381"}, + {file = "pyzmq-20.0.0-cp36-cp36m-win32.whl", hash = "sha256:bf755905a7d30d2749079611b9a89924c1f2da2695dc09ce221f42122c9808e3"}, + {file = "pyzmq-20.0.0-cp36-cp36m-win_amd64.whl", hash = "sha256:2742e380d186673eee6a570ef83d4568741945434ba36d92b98d36cdbfedbd44"}, + {file = "pyzmq-20.0.0-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:1e9b75a119606732023a305d1c214146c09a91f8116f6aff3e8b7d0a60b6f0ff"}, + {file = "pyzmq-20.0.0-cp37-cp37m-manylinux1_i686.whl", hash = "sha256:03638e46d486dd1c118e03c8bf9c634bdcae679600eac6573ae1e54906de7c2f"}, + {file = "pyzmq-20.0.0-cp37-cp37m-manylinux1_x86_64.whl", hash = "sha256:63ee08e35be72fdd7568065a249a5b5cf51a2e8ab6ee63cf9f73786fcb9e710b"}, + {file = "pyzmq-20.0.0-cp37-cp37m-win32.whl", hash = "sha256:c95dda497a7c1b1e734b5e8353173ca5dd7b67784d8821d13413a97856588057"}, + {file = "pyzmq-20.0.0-cp37-cp37m-win_amd64.whl", hash = "sha256:cc09c5cd1a4332611c8564d65e6a432dc6db3e10793d0254da9fa1e31d9ffd6d"}, + {file = "pyzmq-20.0.0-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:6e24907857c80dc67692e31f5bf3ad5bf483ee0142cec95b3d47e2db8c43bdda"}, + {file = "pyzmq-20.0.0-cp38-cp38-macosx_11_0_x86_64.whl", hash = "sha256:53706f4a792cdae422121fb6a5e65119bad02373153364fc9d004cf6a90394de"}, + {file = "pyzmq-20.0.0-cp38-cp38-manylinux1_i686.whl", hash = "sha256:895695be380f0f85d2e3ec5ccf68a93c92d45bd298567525ad5633071589872c"}, + {file = "pyzmq-20.0.0-cp38-cp38-manylinux1_x86_64.whl", hash = "sha256:d92c7f41a53ece82b91703ea433c7d34143248cf0cead33aa11c5fc621c764bf"}, + {file = "pyzmq-20.0.0-cp38-cp38-win32.whl", hash = "sha256:309d763d89ec1845c0e0fa14e1fb6558fd8c9ef05ed32baec27d7a8499cc7bb0"}, + {file = "pyzmq-20.0.0-cp38-cp38-win_amd64.whl", hash = "sha256:0e554fd390021edbe0330b67226325a820b0319c5b45e1b0a59bf22ccc36e793"}, + {file = "pyzmq-20.0.0-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:cfa54a162a7b32641665e99b2c12084555afe9fc8fe80ec8b2f71a57320d10e1"}, + {file = "pyzmq-20.0.0-cp39-cp39-macosx_11_0_x86_64.whl", hash = "sha256:dc2f48b575dff6edefd572f1ac84cf0c3f18ad5fcf13384de32df740a010594a"}, + {file = "pyzmq-20.0.0-cp39-cp39-manylinux1_i686.whl", hash = "sha256:5efe02bdcc5eafcac0aab531292294298f0ab8d28ed43be9e507d0e09173d1a4"}, + {file = "pyzmq-20.0.0-cp39-cp39-manylinux1_x86_64.whl", hash = "sha256:0af84f34f27b5c6a0e906c648bdf46d4caebf9c8e6e16db0728f30a58141cad6"}, + {file = "pyzmq-20.0.0-cp39-cp39-win32.whl", hash = "sha256:c63fafd2556d218368c51d18588f8e6f8d86d09d493032415057faf6de869b34"}, + {file = "pyzmq-20.0.0-cp39-cp39-win_amd64.whl", hash = "sha256:f110a4d3f8f01209eec304ed542f6c8054cce9b0f16dfe3d571e57c290e4e133"}, + {file = "pyzmq-20.0.0-pp36-pypy36_pp73-macosx_10_9_x86_64.whl", hash = "sha256:4d9259a5eb3f71abbaf61f165cacf42240bfeea3783bebd8255341abdfe206f1"}, + {file = "pyzmq-20.0.0.tar.gz", hash = "sha256:824ad5888331aadeac772bce27e1c2fbcab82fade92edbd234542c4e12f0dca9"}, +] +recommonmark = [ + {file = "recommonmark-0.7.1-py2.py3-none-any.whl", hash = "sha256:1b1db69af0231efce3fa21b94ff627ea33dee7079a01dd0a7f8482c3da148b3f"}, + {file = "recommonmark-0.7.1.tar.gz", hash = "sha256:bdb4db649f2222dcd8d2d844f0006b958d627f732415d399791ee436a3686d67"}, +] +regex = [ + {file = "regex-2020.11.13-cp36-cp36m-macosx_10_9_x86_64.whl", hash = "sha256:8b882a78c320478b12ff024e81dc7d43c1462aa4a3341c754ee65d857a521f85"}, + {file = "regex-2020.11.13-cp36-cp36m-manylinux1_i686.whl", hash = "sha256:a63f1a07932c9686d2d416fb295ec2c01ab246e89b4d58e5fa468089cab44b70"}, + {file = "regex-2020.11.13-cp36-cp36m-manylinux1_x86_64.whl", hash = "sha256:6e4b08c6f8daca7d8f07c8d24e4331ae7953333dbd09c648ed6ebd24db5a10ee"}, + {file = "regex-2020.11.13-cp36-cp36m-manylinux2010_i686.whl", hash = "sha256:bba349276b126947b014e50ab3316c027cac1495992f10e5682dc677b3dfa0c5"}, + {file = "regex-2020.11.13-cp36-cp36m-manylinux2010_x86_64.whl", hash = "sha256:56e01daca75eae420bce184edd8bb341c8eebb19dd3bce7266332258f9fb9dd7"}, + {file = "regex-2020.11.13-cp36-cp36m-manylinux2014_aarch64.whl", hash = "sha256:6a8ce43923c518c24a2579fda49f093f1397dad5d18346211e46f134fc624e31"}, + {file = "regex-2020.11.13-cp36-cp36m-manylinux2014_i686.whl", hash = "sha256:1ab79fcb02b930de09c76d024d279686ec5d532eb814fd0ed1e0051eb8bd2daa"}, + {file = "regex-2020.11.13-cp36-cp36m-manylinux2014_x86_64.whl", hash = "sha256:9801c4c1d9ae6a70aeb2128e5b4b68c45d4f0af0d1535500884d644fa9b768c6"}, + {file = "regex-2020.11.13-cp36-cp36m-win32.whl", hash = "sha256:49cae022fa13f09be91b2c880e58e14b6da5d10639ed45ca69b85faf039f7a4e"}, + {file = "regex-2020.11.13-cp36-cp36m-win_amd64.whl", hash = "sha256:749078d1eb89484db5f34b4012092ad14b327944ee7f1c4f74d6279a6e4d1884"}, + {file = "regex-2020.11.13-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:b2f4007bff007c96a173e24dcda236e5e83bde4358a557f9ccf5e014439eae4b"}, + {file = "regex-2020.11.13-cp37-cp37m-manylinux1_i686.whl", hash = "sha256:38c8fd190db64f513fe4e1baa59fed086ae71fa45083b6936b52d34df8f86a88"}, + {file = "regex-2020.11.13-cp37-cp37m-manylinux1_x86_64.whl", hash = "sha256:5862975b45d451b6db51c2e654990c1820523a5b07100fc6903e9c86575202a0"}, + {file = "regex-2020.11.13-cp37-cp37m-manylinux2010_i686.whl", hash = "sha256:262c6825b309e6485ec2493ffc7e62a13cf13fb2a8b6d212f72bd53ad34118f1"}, + {file = "regex-2020.11.13-cp37-cp37m-manylinux2010_x86_64.whl", hash = "sha256:bafb01b4688833e099d79e7efd23f99172f501a15c44f21ea2118681473fdba0"}, + {file = "regex-2020.11.13-cp37-cp37m-manylinux2014_aarch64.whl", hash = "sha256:e32f5f3d1b1c663af7f9c4c1e72e6ffe9a78c03a31e149259f531e0fed826512"}, + {file = "regex-2020.11.13-cp37-cp37m-manylinux2014_i686.whl", hash = "sha256:3bddc701bdd1efa0d5264d2649588cbfda549b2899dc8d50417e47a82e1387ba"}, + {file = "regex-2020.11.13-cp37-cp37m-manylinux2014_x86_64.whl", hash = "sha256:02951b7dacb123d8ea6da44fe45ddd084aa6777d4b2454fa0da61d569c6fa538"}, + {file = "regex-2020.11.13-cp37-cp37m-win32.whl", hash = "sha256:0d08e71e70c0237883d0bef12cad5145b84c3705e9c6a588b2a9c7080e5af2a4"}, + {file = "regex-2020.11.13-cp37-cp37m-win_amd64.whl", hash = "sha256:1fa7ee9c2a0e30405e21031d07d7ba8617bc590d391adfc2b7f1e8b99f46f444"}, + {file = "regex-2020.11.13-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:baf378ba6151f6e272824b86a774326f692bc2ef4cc5ce8d5bc76e38c813a55f"}, + {file = "regex-2020.11.13-cp38-cp38-manylinux1_i686.whl", hash = "sha256:e3faaf10a0d1e8e23a9b51d1900b72e1635c2d5b0e1bea1c18022486a8e2e52d"}, + {file = "regex-2020.11.13-cp38-cp38-manylinux1_x86_64.whl", hash = "sha256:2a11a3e90bd9901d70a5b31d7dd85114755a581a5da3fc996abfefa48aee78af"}, + {file = "regex-2020.11.13-cp38-cp38-manylinux2010_i686.whl", hash = "sha256:d1ebb090a426db66dd80df8ca85adc4abfcbad8a7c2e9a5ec7513ede522e0a8f"}, + {file = "regex-2020.11.13-cp38-cp38-manylinux2010_x86_64.whl", hash = "sha256:b2b1a5ddae3677d89b686e5c625fc5547c6e492bd755b520de5332773a8af06b"}, + {file = "regex-2020.11.13-cp38-cp38-manylinux2014_aarch64.whl", hash = "sha256:2c99e97d388cd0a8d30f7c514d67887d8021541b875baf09791a3baad48bb4f8"}, + {file = "regex-2020.11.13-cp38-cp38-manylinux2014_i686.whl", hash = "sha256:c084582d4215593f2f1d28b65d2a2f3aceff8342aa85afd7be23a9cad74a0de5"}, + {file = "regex-2020.11.13-cp38-cp38-manylinux2014_x86_64.whl", hash = "sha256:a3d748383762e56337c39ab35c6ed4deb88df5326f97a38946ddd19028ecce6b"}, + {file = "regex-2020.11.13-cp38-cp38-win32.whl", hash = "sha256:7913bd25f4ab274ba37bc97ad0e21c31004224ccb02765ad984eef43e04acc6c"}, + {file = "regex-2020.11.13-cp38-cp38-win_amd64.whl", hash = "sha256:6c54ce4b5d61a7129bad5c5dc279e222afd00e721bf92f9ef09e4fae28755683"}, + {file = "regex-2020.11.13-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:1862a9d9194fae76a7aaf0150d5f2a8ec1da89e8b55890b1786b8f88a0f619dc"}, + {file = "regex-2020.11.13-cp39-cp39-manylinux1_i686.whl", hash = "sha256:4902e6aa086cbb224241adbc2f06235927d5cdacffb2425c73e6570e8d862364"}, + {file = "regex-2020.11.13-cp39-cp39-manylinux1_x86_64.whl", hash = "sha256:7a25fcbeae08f96a754b45bdc050e1fb94b95cab046bf56b016c25e9ab127b3e"}, + {file = "regex-2020.11.13-cp39-cp39-manylinux2010_i686.whl", hash = "sha256:d2d8ce12b7c12c87e41123997ebaf1a5767a5be3ec545f64675388970f415e2e"}, + {file = "regex-2020.11.13-cp39-cp39-manylinux2010_x86_64.whl", hash = "sha256:f7d29a6fc4760300f86ae329e3b6ca28ea9c20823df123a2ea8693e967b29917"}, + {file = "regex-2020.11.13-cp39-cp39-manylinux2014_aarch64.whl", hash = "sha256:717881211f46de3ab130b58ec0908267961fadc06e44f974466d1887f865bd5b"}, + {file = "regex-2020.11.13-cp39-cp39-manylinux2014_i686.whl", hash = "sha256:3128e30d83f2e70b0bed9b2a34e92707d0877e460b402faca908c6667092ada9"}, + {file = "regex-2020.11.13-cp39-cp39-manylinux2014_x86_64.whl", hash = "sha256:8f6a2229e8ad946e36815f2a03386bb8353d4bde368fdf8ca5f0cb97264d3b5c"}, + {file = "regex-2020.11.13-cp39-cp39-win32.whl", hash = "sha256:f8f295db00ef5f8bae530fc39af0b40486ca6068733fb860b42115052206466f"}, + {file = "regex-2020.11.13-cp39-cp39-win_amd64.whl", hash = "sha256:a15f64ae3a027b64496a71ab1f722355e570c3fac5ba2801cafce846bf5af01d"}, + {file = "regex-2020.11.13.tar.gz", hash = "sha256:83d6b356e116ca119db8e7c6fc2983289d87b27b3fac238cfe5dca529d884562"}, +] +requests = [ + {file = "requests-2.25.1-py2.py3-none-any.whl", hash = "sha256:c210084e36a42ae6b9219e00e48287def368a26d03a048ddad7bfee44f75871e"}, + {file = "requests-2.25.1.tar.gz", hash = "sha256:27973dd4a904a4f13b263a19c866c13b92a39ed1c964655f025f3f8d3d75b804"}, +] +requests-oauthlib = [ + {file = "requests-oauthlib-1.3.0.tar.gz", hash = "sha256:b4261601a71fd721a8bd6d7aa1cc1d6a8a93b4a9f5e96626f8e4d91e8beeaa6a"}, + {file = "requests_oauthlib-1.3.0-py2.py3-none-any.whl", hash = "sha256:7f71572defaecd16372f9006f33c2ec8c077c3cfa6f5911a9a90202beb513f3d"}, + {file = "requests_oauthlib-1.3.0-py3.7.egg", hash = "sha256:fa6c47b933f01060936d87ae9327fead68768b69c6c9ea2109c48be30f2d4dbc"}, +] +retrying = [ + {file = "retrying-1.3.3.tar.gz", hash = "sha256:08c039560a6da2fe4f2c426d0766e284d3b736e355f8dd24b37367b0bb41973b"}, +] +rouge-score = [ + {file = "rouge_score-0.0.4-py2.py3-none-any.whl", hash = "sha256:b57fd8a3589a392f1bb43a31788a46269102c84849564277761826b91f4d07d5"}, + {file = "rouge_score-0.0.4.tar.gz", hash = "sha256:68f75b8780a38683b49cfc35d8392e267682d116ccdf4c2161669c6ff9d4a501"}, +] +rsa = [ + {file = "rsa-4.6-py3-none-any.whl", hash = "sha256:6166864e23d6b5195a5cfed6cd9fed0fe774e226d8f854fcb23b7bbef0350233"}, + {file = "rsa-4.6.tar.gz", hash = "sha256:109ea5a66744dd859bf16fe904b8d8b627adafb9408753161e766a92e7d681fa"}, +] +s3transfer = [ + {file = "s3transfer-0.3.3-py2.py3-none-any.whl", hash = "sha256:2482b4259524933a022d59da830f51bd746db62f047d6eb213f2f8855dcb8a13"}, + {file = "s3transfer-0.3.3.tar.gz", hash = "sha256:921a37e2aefc64145e7b73d50c71bb4f26f46e4c9f414dc648c6245ff92cf7db"}, +] +sacremoses = [ + {file = "sacremoses-0.0.43.tar.gz", hash = "sha256:123c1bf2664351fb05e16f87d3786dbe44a050cfd7b85161c09ad9a63a8e2948"}, +] +scikit-learn = [ + {file = "scikit-learn-0.24.0.tar.gz", hash = "sha256:076369634ee72b5a5941440661e2f306ff4ac30903802dc52031c7e9199ac640"}, + {file = "scikit_learn-0.24.0-cp36-cp36m-macosx_10_9_x86_64.whl", hash = "sha256:890d7d588f65acb0c4f6c083347c9076916bda5e6bd8400f06244b1afc1009af"}, + {file = "scikit_learn-0.24.0-cp36-cp36m-manylinux2010_i686.whl", hash = "sha256:e534f5f3796db6781c87e9835dcd51b7854c8c5a379c9210b93605965c1941fd"}, + {file = "scikit_learn-0.24.0-cp36-cp36m-manylinux2010_x86_64.whl", hash = "sha256:d7fe05fcb44eadd6d6c874c768f085f5de1239db3a3b7be4d3d23d12e4120589"}, + {file = "scikit_learn-0.24.0-cp36-cp36m-manylinux2014_aarch64.whl", hash = "sha256:7f654befc5ad413690cc58f3f34a3e906caf825195ce0fda00a8e9565e1403e6"}, + {file = "scikit_learn-0.24.0-cp36-cp36m-win32.whl", hash = "sha256:afeb06dc69847927634e58579b9cdc72e1390b79497336b2324b1b173f33bd47"}, + {file = "scikit_learn-0.24.0-cp36-cp36m-win_amd64.whl", hash = "sha256:26f66b3726b54dfb76ea51c5d9c2431ed17ebc066cb4527662b9e851a3e7ba61"}, + {file = "scikit_learn-0.24.0-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:c08b27cb78ee8d2dc781a7affed09859441f5b624f9f92da59ac0791c8774dfc"}, + {file = "scikit_learn-0.24.0-cp37-cp37m-manylinux2010_i686.whl", hash = "sha256:905d8934d1e27a686698864a5863ff2c0e13a2ae1adb78a8a848aacc8a49927d"}, + {file = "scikit_learn-0.24.0-cp37-cp37m-manylinux2010_x86_64.whl", hash = "sha256:d819d625832fb2969911a243e009cfa135cb8ef1e150866e417d6e9d75290087"}, + {file = "scikit_learn-0.24.0-cp37-cp37m-manylinux2014_aarch64.whl", hash = "sha256:18f7131e62265bf2691ed1d0303c640313894ccfe4278427478c6b2f45094b53"}, + {file = "scikit_learn-0.24.0-cp37-cp37m-win32.whl", hash = "sha256:b0d13fd56d26cf3de0314a4fd48037108c638fe126d813f5c1222bb0f08b6a76"}, + {file = "scikit_learn-0.24.0-cp37-cp37m-win_amd64.whl", hash = "sha256:c912247e42114f389858ae05d63f4359d4e667ea72aaabee191aee9ad3f9774a"}, + {file = "scikit_learn-0.24.0-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:758619e49cd7c17282e6cc60d5cc73c02c072b47c9a10010bb3bb47e0d976e50"}, + {file = "scikit_learn-0.24.0-cp38-cp38-manylinux2010_i686.whl", hash = "sha256:66f27bf21202a850bcd7b6303916e4907f6e22ec59a14974ede4955aed5c7ed0"}, + {file = "scikit_learn-0.24.0-cp38-cp38-manylinux2010_x86_64.whl", hash = "sha256:5e6e3c042cea83f2e20a45e563b8eabc1f8f72446251fe23ebefdf111a173a33"}, + {file = "scikit_learn-0.24.0-cp38-cp38-manylinux2014_aarch64.whl", hash = "sha256:2a5348585aa793bc8cc5a72f8e9067c9380834b0aadbd55f924843b071f13282"}, + {file = "scikit_learn-0.24.0-cp38-cp38-win32.whl", hash = "sha256:743b6edd98c98991be46c08e6b21df3861d5ae915f91d59f988384d93f7263e7"}, + {file = "scikit_learn-0.24.0-cp38-cp38-win_amd64.whl", hash = "sha256:2951f87d35e72f007701c6e028aa230f6df6212a3194677c0c950486066a454d"}, + {file = "scikit_learn-0.24.0-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:44e452ea8491225c5783d49577aad0f36202dfd52aec7f82c0fdfe5fbd5f7400"}, + {file = "scikit_learn-0.24.0-cp39-cp39-manylinux2010_i686.whl", hash = "sha256:800aaf63f8838c00e85db2267dd226f89858594843fd03932a9eda95746d2c40"}, + {file = "scikit_learn-0.24.0-cp39-cp39-manylinux2010_x86_64.whl", hash = "sha256:3eeff086f7329521d27249a082ea3c48c085cedb110db5f65968ab55c3ba2e09"}, + {file = "scikit_learn-0.24.0-cp39-cp39-manylinux2014_aarch64.whl", hash = "sha256:4395e91b3548005f4a645018435b5a94f8cce232b5b70753020e606c6a750656"}, + {file = "scikit_learn-0.24.0-cp39-cp39-win32.whl", hash = "sha256:80ca024154b84b6ac4cfc86930ba13fdc348a209753bf2c16129db6f9eb8a80b"}, + {file = "scikit_learn-0.24.0-cp39-cp39-win_amd64.whl", hash = "sha256:490436b44b3a1957cb625e871764b0aa330b34cc416aea4abc6c38ca63d0d682"}, +] +scipy = [ + {file = "scipy-1.4.1-cp35-cp35m-macosx_10_6_intel.whl", hash = "sha256:c5cac0c0387272ee0e789e94a570ac51deb01c796b37fb2aad1fb13f85e2f97d"}, + {file = "scipy-1.4.1-cp35-cp35m-manylinux1_i686.whl", hash = "sha256:a144811318853a23d32a07bc7fd5561ff0cac5da643d96ed94a4ffe967d89672"}, + {file = "scipy-1.4.1-cp35-cp35m-manylinux1_x86_64.whl", hash = "sha256:71eb180f22c49066f25d6df16f8709f215723317cc951d99e54dc88020ea57be"}, + {file = "scipy-1.4.1-cp35-cp35m-win32.whl", hash = "sha256:770254a280d741dd3436919d47e35712fb081a6ff8bafc0f319382b954b77802"}, + {file = "scipy-1.4.1-cp35-cp35m-win_amd64.whl", hash = "sha256:a1aae70d52d0b074d8121333bc807a485f9f1e6a69742010b33780df2e60cfe0"}, + {file = "scipy-1.4.1-cp36-cp36m-macosx_10_6_intel.whl", hash = "sha256:bb517872058a1f087c4528e7429b4a44533a902644987e7b2fe35ecc223bc408"}, + {file = "scipy-1.4.1-cp36-cp36m-manylinux1_i686.whl", hash = "sha256:dba8306f6da99e37ea08c08fef6e274b5bf8567bb094d1dbe86a20e532aca088"}, + {file = "scipy-1.4.1-cp36-cp36m-manylinux1_x86_64.whl", hash = "sha256:386086e2972ed2db17cebf88610aab7d7f6e2c0ca30042dc9a89cf18dcc363fa"}, + {file = "scipy-1.4.1-cp36-cp36m-win32.whl", hash = "sha256:8d3bc3993b8e4be7eade6dcc6fd59a412d96d3a33fa42b0fa45dc9e24495ede9"}, + {file = "scipy-1.4.1-cp36-cp36m-win_amd64.whl", hash = "sha256:dc60bb302f48acf6da8ca4444cfa17d52c63c5415302a9ee77b3b21618090521"}, + {file = "scipy-1.4.1-cp37-cp37m-macosx_10_6_intel.whl", hash = "sha256:787cc50cab3020a865640aba3485e9fbd161d4d3b0d03a967df1a2881320512d"}, + {file = "scipy-1.4.1-cp37-cp37m-manylinux1_i686.whl", hash = "sha256:0902a620a381f101e184a958459b36d3ee50f5effd186db76e131cbefcbb96f7"}, + {file = "scipy-1.4.1-cp37-cp37m-manylinux1_x86_64.whl", hash = "sha256:00af72998a46c25bdb5824d2b729e7dabec0c765f9deb0b504f928591f5ff9d4"}, + {file = "scipy-1.4.1-cp37-cp37m-win32.whl", hash = "sha256:9508a7c628a165c2c835f2497837bf6ac80eb25291055f56c129df3c943cbaf8"}, + {file = "scipy-1.4.1-cp37-cp37m-win_amd64.whl", hash = "sha256:a2d6df9eb074af7f08866598e4ef068a2b310d98f87dc23bd1b90ec7bdcec802"}, + {file = "scipy-1.4.1-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:3092857f36b690a321a662fe5496cb816a7f4eecd875e1d36793d92d3f884073"}, + {file = "scipy-1.4.1-cp38-cp38-manylinux1_i686.whl", hash = "sha256:8a07760d5c7f3a92e440ad3aedcc98891e915ce857664282ae3c0220f3301eb6"}, + {file = "scipy-1.4.1-cp38-cp38-manylinux1_x86_64.whl", hash = "sha256:1e3190466d669d658233e8a583b854f6386dd62d655539b77b3fa25bfb2abb70"}, + {file = "scipy-1.4.1-cp38-cp38-win32.whl", hash = "sha256:cc971a82ea1170e677443108703a2ec9ff0f70752258d0e9f5433d00dda01f59"}, + {file = "scipy-1.4.1-cp38-cp38-win_amd64.whl", hash = "sha256:2cce3f9847a1a51019e8c5b47620da93950e58ebc611f13e0d11f4980ca5fecb"}, + {file = "scipy-1.4.1.tar.gz", hash = "sha256:dee1bbf3a6c8f73b6b218cb28eed8dd13347ea2f87d572ce19b289d6fd3fbc59"}, +] +segtok = [ + {file = "segtok-1.5.10.tar.gz", hash = "sha256:2ed6525e840a669ec7ba3be61353a4f660264aecfc726834dbe4b01eb4ef7804"}, +] +semver = [ + {file = "semver-2.13.0-py2.py3-none-any.whl", hash = "sha256:ced8b23dceb22134307c1b8abfa523da14198793d9787ac838e70e29e77458d4"}, + {file = "semver-2.13.0.tar.gz", hash = "sha256:fa0fe2722ee1c3f57eac478820c3a5ae2f624af8264cbdf9000c980ff7f75e3f"}, +] +send2trash = [ + {file = "Send2Trash-1.5.0-py3-none-any.whl", hash = "sha256:f1691922577b6fa12821234aeb57599d887c4900b9ca537948d2dac34aea888b"}, + {file = "Send2Trash-1.5.0.tar.gz", hash = "sha256:60001cc07d707fe247c94f74ca6ac0d3255aabcb930529690897ca2a39db28b2"}, +] +sentencepiece = [ + {file = "sentencepiece-0.1.94-cp35-cp35m-macosx_10_6_x86_64.whl", hash = "sha256:7b6c794d30272a5e635e958fdb4976dd991bf35eed90441104a042b2e51723c7"}, + {file = "sentencepiece-0.1.94-cp35-cp35m-manylinux2014_aarch64.whl", hash = "sha256:b5e3eedad0ef5b3a4ae1d201fc0edc7f4b4d567c016913d4b996ebf0ab66748b"}, + {file = "sentencepiece-0.1.94-cp35-cp35m-manylinux2014_i686.whl", hash = "sha256:58db565195ee31efbaca9d00937f9f73aa131cc820c2ad46a39ac62f8671866f"}, + {file = "sentencepiece-0.1.94-cp35-cp35m-manylinux2014_ppc64le.whl", hash = "sha256:cbde526df19d6bcfa2b8503b2a4bf6996dd3172f631fd2b7efd7b6435d96407c"}, + {file = "sentencepiece-0.1.94-cp35-cp35m-manylinux2014_s390x.whl", hash = "sha256:b01057743c2488c8d6e7b45b0732ee23976ac3d58d11cd90390cbc3221c07402"}, + {file = "sentencepiece-0.1.94-cp35-cp35m-manylinux2014_x86_64.whl", hash = "sha256:cd6434909e1c8494b3254bf3150420e45489214d9bc7ab6ad4d1804d75d6d58f"}, + {file = "sentencepiece-0.1.94-cp36-cp36m-macosx_10_6_x86_64.whl", hash = "sha256:7b4867845e6935c43e37042a451d2ce84d9d97365300151a8c1c1cc724acad32"}, + {file = "sentencepiece-0.1.94-cp36-cp36m-manylinux2014_aarch64.whl", hash = "sha256:4d7d0844a57156b630fb98e21203c2755b342824b8c5a445e4ac78612c291218"}, + {file = "sentencepiece-0.1.94-cp36-cp36m-manylinux2014_i686.whl", hash = "sha256:a75f418bd92c6c92e2ee0c95e89b45b76bc54e45ed7cf2b3b74d313b263d1baa"}, + {file = "sentencepiece-0.1.94-cp36-cp36m-manylinux2014_ppc64le.whl", hash = "sha256:995e645a94107e46317987d348216a0fb1ae3a8befec9c99cc506b8994aa133d"}, + {file = "sentencepiece-0.1.94-cp36-cp36m-manylinux2014_s390x.whl", hash = "sha256:232a882ebf074966e24943119ab83554642bd339bd5d6bd2641092133983bc6a"}, + {file = "sentencepiece-0.1.94-cp36-cp36m-manylinux2014_x86_64.whl", hash = "sha256:db744b73b5a5fd7adfa5cfc4eb4b7d0f408c2059783fd52c934b49743a0d2326"}, + {file = "sentencepiece-0.1.94-cp36-cp36m-win32.whl", hash = "sha256:1d7c9f52a2e32a7a2eb9685ddf74a86b5df94fcaccf37be661ac9bb5c9db4893"}, + {file = "sentencepiece-0.1.94-cp36-cp36m-win_amd64.whl", hash = "sha256:11bd70be4baf4e67b1714e43bcd1e7fed0ce04616a20388367299846fdaf712d"}, + {file = "sentencepiece-0.1.94-cp37-cp37m-macosx_10_6_x86_64.whl", hash = "sha256:9c8476febe8eb0a165cf04192ebd2b15124d83cfc44269e10d2a83ace677f109"}, + {file = "sentencepiece-0.1.94-cp37-cp37m-manylinux2014_aarch64.whl", hash = "sha256:9d2245d400424ab261e3253308001606668126a08efdc19ee2c348b0e228e1e1"}, + {file = "sentencepiece-0.1.94-cp37-cp37m-manylinux2014_i686.whl", hash = "sha256:e4aef0be184f3c5b72a1c3f7e01fbf245eb3b3c70365f823e24542008afe387f"}, + {file = "sentencepiece-0.1.94-cp37-cp37m-manylinux2014_ppc64le.whl", hash = "sha256:5c2969c4f62039d82f761c9548011bf39673a1eb8dc8f747943b88851523c943"}, + {file = "sentencepiece-0.1.94-cp37-cp37m-manylinux2014_s390x.whl", hash = "sha256:c9d440d9ecf8c8787b89bc8596f7a47c548a9968f802d654faaf5652598ffbb0"}, + {file = "sentencepiece-0.1.94-cp37-cp37m-manylinux2014_x86_64.whl", hash = "sha256:295ef1ccf570c33728040a461cf837611495e8a5bd954012a5784fb3529ff460"}, + {file = "sentencepiece-0.1.94-cp37-cp37m-win32.whl", hash = "sha256:9d446ad41744a898f34800ee492553b4a24255a0f922cb32fe33a3c0a865d153"}, + {file = "sentencepiece-0.1.94-cp37-cp37m-win_amd64.whl", hash = "sha256:fd12969cf8420870bee743398e2e60f722d1ffdf9d201dc1d6b09096c971bfd9"}, + {file = "sentencepiece-0.1.94-cp38-cp38-macosx_10_6_x86_64.whl", hash = "sha256:3f6c0b5c501053a2f9d99daccbf187f367ded5ae35e9e031feae56188b352433"}, + {file = "sentencepiece-0.1.94-cp38-cp38-manylinux2014_aarch64.whl", hash = "sha256:05e6ef6a669d2e2d3232d95acfb2a9d255272484b898ea0650659d95448bf93f"}, + {file = "sentencepiece-0.1.94-cp38-cp38-manylinux2014_i686.whl", hash = "sha256:1e6b711563163fc8cf2c873d08b4495244859e3f6d6c18859b524395d8550482"}, + {file = "sentencepiece-0.1.94-cp38-cp38-manylinux2014_ppc64le.whl", hash = "sha256:bf524fa6243cfd05a04f65a6b17516ddd58438adf3c35df02ca3ebb832270a47"}, + {file = "sentencepiece-0.1.94-cp38-cp38-manylinux2014_s390x.whl", hash = "sha256:ed49f1187a25db531e2ad95718a5640a3f7e0467bc82e4267cc6f7b6caa3054a"}, + {file = "sentencepiece-0.1.94-cp38-cp38-manylinux2014_x86_64.whl", hash = "sha256:fb31a1827da0de50dc8ca33d4e657121594092c7231a4fb2d6a86149dfd98bc5"}, + {file = "sentencepiece-0.1.94-cp38-cp38-win32.whl", hash = "sha256:9c87f759dddefff52c12d4a3500a00faf22ea476a004c33c78794699069d8fc9"}, + {file = "sentencepiece-0.1.94-cp38-cp38-win_amd64.whl", hash = "sha256:4c11b2fc89c71510a900e2dbd4d93fb18a867ce7160f298bb6bb8a581d646d63"}, + {file = "sentencepiece-0.1.94-cp39-cp39-macosx_10_6_x86_64.whl", hash = "sha256:88ef71e36b09ddd53498064efaec5470a09698df2427362cc4e86198d88aa01e"}, + {file = "sentencepiece-0.1.94-cp39-cp39-manylinux2014_aarch64.whl", hash = "sha256:a89d90b45ba5025fcd19cad685c7572624a036d883091af967a75f3793c2aee4"}, + {file = "sentencepiece-0.1.94-cp39-cp39-manylinux2014_i686.whl", hash = "sha256:c571b26017d8dd1c47dc2eeae09caa15cfe3d2f31fb01f004d463403a1f1349b"}, + {file = "sentencepiece-0.1.94-cp39-cp39-manylinux2014_ppc64le.whl", hash = "sha256:42d35adb51eb530d57c56c2cd445dbf9bd9db36bf82741aa5b42216f7f34c12d"}, + {file = "sentencepiece-0.1.94-cp39-cp39-manylinux2014_s390x.whl", hash = "sha256:fee3e6849b9e0cef774fb003ba2950b282b1910cdd761794bbf8dc0aa9d5f7d3"}, + {file = "sentencepiece-0.1.94-cp39-cp39-manylinux2014_x86_64.whl", hash = "sha256:e5074d8239dcc6130dce8ffd734ab797f86679fc75a4a1d96adc243293178c05"}, + {file = "sentencepiece-0.1.94.tar.gz", hash = "sha256:849d74885f6f7af03a5d354b919bf23c757f94257d7a068bc464efd70d651e3a"}, +] +six = [ + {file = "six-1.15.0-py2.py3-none-any.whl", hash = "sha256:8b74bedcbbbaca38ff6d7491d76f2b06b3592611af620f8426e82dddb04a5ced"}, + {file = "six-1.15.0.tar.gz", hash = "sha256:30639c035cdb23534cd4aa2dd52c3bf48f06e5f4a941509c8bafd8ce11080259"}, +] +smart-open = [ + {file = "smart_open-4.1.0.tar.gz", hash = "sha256:26af5c1a3f2b76aab8c3200310f0fc783790ec5a231ffeec102e620acdd6262e"}, +] +sniffio = [ + {file = "sniffio-1.2.0-py3-none-any.whl", hash = "sha256:471b71698eac1c2112a40ce2752bb2f4a4814c22a54a3eed3676bc0f5ca9f663"}, + {file = "sniffio-1.2.0.tar.gz", hash = "sha256:c4666eecec1d3f50960c6bdf61ab7bc350648da6c126e3cf6898d8cd4ddcd3de"}, +] +snowballstemmer = [ + {file = "snowballstemmer-2.0.0-py2.py3-none-any.whl", hash = "sha256:209f257d7533fdb3cb73bdbd24f436239ca3b2fa67d56f6ff88e86be08cc5ef0"}, + {file = "snowballstemmer-2.0.0.tar.gz", hash = "sha256:df3bac3df4c2c01363f3dd2cfa78cce2840a79b9f1c2d2de9ce8d31683992f52"}, +] +spacy = [ + {file = "spacy-2.3.5-cp36-cp36m-macosx_10_9_x86_64.whl", hash = "sha256:faa728e56f7b8fe0a70c4bedc42611da23de86b783f6ad588a92c115f427b90c"}, + {file = "spacy-2.3.5-cp36-cp36m-manylinux2014_x86_64.whl", hash = "sha256:cecb9987a875620d0f185ff07dd04cd64d5097de48689e506256a27a46a644a1"}, + {file = "spacy-2.3.5-cp36-cp36m-win_amd64.whl", hash = "sha256:4e2e79ab7c2af2af8a91913d6d096dd2e6a5a422142cfb35b30c574f776b9fd7"}, + {file = "spacy-2.3.5-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:ec9eebfae2a35e464d1c35aa2109422765967ba5b10fa9f11da8873801d2241a"}, + {file = "spacy-2.3.5-cp37-cp37m-manylinux2014_x86_64.whl", hash = "sha256:f153d8aa6104694389ef85c578ac1a3900b142f108248c7b9f5790d010fbe4ee"}, + {file = "spacy-2.3.5-cp37-cp37m-win_amd64.whl", hash = "sha256:4b7c0c8ab94c6433f08633fef415a054d1f3345b205bcb064578c79f35192917"}, + {file = "spacy-2.3.5-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:118a92582b1054b5de7bc5ed763f47ee89388847ede1e0597c6df4b509643e14"}, + {file = "spacy-2.3.5-cp38-cp38-manylinux2014_x86_64.whl", hash = "sha256:c7b3d7928d047e5abcd591f8cf6a1c508da16423d371b8a21332101cab46ff7c"}, + {file = "spacy-2.3.5-cp38-cp38-win_amd64.whl", hash = "sha256:14bb12de0d03beb2d8309f194154db70fb364a0fae727e864c2b0228bf3438d8"}, + {file = "spacy-2.3.5-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:3e9496f5ea3d08f2b9fc3e326c2c8cc7886df0db982a41dca2521d3f22ca043e"}, + {file = "spacy-2.3.5-cp39-cp39-manylinux2014_x86_64.whl", hash = "sha256:49f7818bd8a597887013fdaaea3263d8b6e99ca64db0933c32f0896158898209"}, + {file = "spacy-2.3.5-cp39-cp39-win_amd64.whl", hash = "sha256:45497775e986d2790c7ee3625c565e3ef7e9ffa607d50230aa3382dd6d9b26e7"}, + {file = "spacy-2.3.5.tar.gz", hash = "sha256:315278ab60094643baecd866017c7d4cbd966efd2d517ad0e6c888edf7fa5aef"}, +] +sphinx = [ + {file = "Sphinx-3.4.2-py3-none-any.whl", hash = "sha256:b8aa4eb5502c53d3b5ca13a07abeedacd887f7770c198952fd5b9530d973e767"}, + {file = "Sphinx-3.4.2.tar.gz", hash = "sha256:77dec5ac77ca46eee54f59cf477780f4fb23327b3339ef39c8471abb829c1285"}, +] +sphinx-rtd-theme = [ + {file = "sphinx_rtd_theme-0.5.1-py2.py3-none-any.whl", hash = "sha256:fa6bebd5ab9a73da8e102509a86f3fcc36dec04a0b52ea80e5a033b2aba00113"}, + {file = "sphinx_rtd_theme-0.5.1.tar.gz", hash = "sha256:eda689eda0c7301a80cf122dad28b1861e5605cbf455558f3775e1e8200e83a5"}, +] +sphinxcontrib-applehelp = [ + {file = "sphinxcontrib-applehelp-1.0.2.tar.gz", hash = "sha256:a072735ec80e7675e3f432fcae8610ecf509c5f1869d17e2eecff44389cdbc58"}, + {file = "sphinxcontrib_applehelp-1.0.2-py2.py3-none-any.whl", hash = "sha256:806111e5e962be97c29ec4c1e7fe277bfd19e9652fb1a4392105b43e01af885a"}, +] +sphinxcontrib-devhelp = [ + {file = "sphinxcontrib-devhelp-1.0.2.tar.gz", hash = "sha256:ff7f1afa7b9642e7060379360a67e9c41e8f3121f2ce9164266f61b9f4b338e4"}, + {file = "sphinxcontrib_devhelp-1.0.2-py2.py3-none-any.whl", hash = "sha256:8165223f9a335cc1af7ffe1ed31d2871f325254c0423bc0c4c7cd1c1e4734a2e"}, +] +sphinxcontrib-htmlhelp = [ + {file = "sphinxcontrib-htmlhelp-1.0.3.tar.gz", hash = "sha256:e8f5bb7e31b2dbb25b9cc435c8ab7a79787ebf7f906155729338f3156d93659b"}, + {file = "sphinxcontrib_htmlhelp-1.0.3-py2.py3-none-any.whl", hash = "sha256:3c0bc24a2c41e340ac37c85ced6dafc879ab485c095b1d65d2461ac2f7cca86f"}, +] +sphinxcontrib-jsmath = [ + {file = "sphinxcontrib-jsmath-1.0.1.tar.gz", hash = "sha256:a9925e4a4587247ed2191a22df5f6970656cb8ca2bd6284309578f2153e0c4b8"}, + {file = "sphinxcontrib_jsmath-1.0.1-py2.py3-none-any.whl", hash = "sha256:2ec2eaebfb78f3f2078e73666b1415417a116cc848b72e5172e596c871103178"}, +] +sphinxcontrib-qthelp = [ + {file = "sphinxcontrib-qthelp-1.0.3.tar.gz", hash = "sha256:4c33767ee058b70dba89a6fc5c1892c0d57a54be67ddd3e7875a18d14cba5a72"}, + {file = "sphinxcontrib_qthelp-1.0.3-py2.py3-none-any.whl", hash = "sha256:bd9fc24bcb748a8d51fd4ecaade681350aa63009a347a8c14e637895444dfab6"}, +] +sphinxcontrib-serializinghtml = [ + {file = "sphinxcontrib-serializinghtml-1.1.4.tar.gz", hash = "sha256:eaa0eccc86e982a9b939b2b82d12cc5d013385ba5eadcc7e4fed23f4405f77bc"}, + {file = "sphinxcontrib_serializinghtml-1.1.4-py2.py3-none-any.whl", hash = "sha256:f242a81d423f59617a8e5cf16f5d4d74e28ee9a66f9e5b637a18082991db5a9a"}, +] +sqlitedict = [ + {file = "sqlitedict-1.7.0.tar.gz", hash = "sha256:2affcc301aacd4da7511692601ecbde392294205af418498f7d6d3ec0dbcad56"}, +] +srsly = [ + {file = "srsly-1.0.5-cp36-cp36m-macosx_10_9_x86_64.whl", hash = "sha256:a696e9c925e91f76ec53840c55483a4fbf76cb717424410a4f249d4805439038"}, + {file = "srsly-1.0.5-cp36-cp36m-manylinux2014_x86_64.whl", hash = "sha256:8fc4c0641537262e15c7b5b57edc47487b15ac47b696adcb81e0a770ef78e8f5"}, + {file = "srsly-1.0.5-cp36-cp36m-win_amd64.whl", hash = "sha256:11447f8e659e1f62f29302252fb057f179031457b36c83426027182f624fe565"}, + {file = "srsly-1.0.5-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:a2746afccfd4f51f0793cccc2b6d5e8a564c962870feec5c77408244c1dbb3c5"}, + {file = "srsly-1.0.5-cp37-cp37m-manylinux2014_x86_64.whl", hash = "sha256:a1449da4195e30a3bd1fd3122e5b1a0c57703843c590643555c412fc87132aa0"}, + {file = "srsly-1.0.5-cp37-cp37m-win_amd64.whl", hash = "sha256:23c7205b8c1cac49a03521bee37f0afe3680d9f0ec18c75ab3ac39bd3e15272b"}, + {file = "srsly-1.0.5-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:2615b8713dfe793ca57925076b0869385d56754816b1eaee5490a6827a1cb5c7"}, + {file = "srsly-1.0.5-cp38-cp38-manylinux2014_x86_64.whl", hash = "sha256:b5b887328ac6e210842560fcf32a29c2a9c1ed38c6d47479cadc03d81940da8c"}, + {file = "srsly-1.0.5-cp38-cp38-win_amd64.whl", hash = "sha256:fd5e1e01f5fd0f532a6f3977bb74facc42f1b7155402ee3d06c07a73e83e3c47"}, + {file = "srsly-1.0.5-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:779ebfaa3cf1d5c0f1286ac1baf06af5f2a17bb103622992c71acc6ac20b2781"}, + {file = "srsly-1.0.5-cp39-cp39-manylinux2014_x86_64.whl", hash = "sha256:4c43a1f28e555891a1e65650adea2c5d0f0fe4b3d63821de65c8357f32c3a11c"}, + {file = "srsly-1.0.5-cp39-cp39-win_amd64.whl", hash = "sha256:334f29435099e644a8047b63d60b8386a98b5f7b4739f7efc86b46ca0200aa0e"}, + {file = "srsly-1.0.5.tar.gz", hash = "sha256:d3dd796372367c71946d0cd6f734e49db3d99dd13a57bdac937d9eb62689fc9e"}, +] +stanza = [ + {file = "stanza-1.1.1-py3-none-any.whl", hash = "sha256:281e9f47623790e2770cf42f837346724c45a16a767302ec538287c54767eed7"}, + {file = "stanza-1.1.1.tar.gz", hash = "sha256:df8d683e371e06d9a6b9a67be1afa20e0e6328927e7dfd0cc77010eab35bf663"}, +] +tabulate = [ + {file = "tabulate-0.8.7-py3-none-any.whl", hash = "sha256:ac64cb76d53b1231d364babcd72abbb16855adac7de6665122f97b593f1eb2ba"}, + {file = "tabulate-0.8.7.tar.gz", hash = "sha256:db2723a20d04bcda8522165c73eea7c300eda74e0ce852d9022e0159d7895007"}, +] +tensorboard = [ + {file = "tensorboard-2.4.0-py3-none-any.whl", hash = "sha256:cde0c663a85609441cb4d624e7255fd8e2b6b1d679645095aac8a234a2812738"}, +] +tensorboard-plugin-wit = [ + {file = "tensorboard_plugin_wit-1.7.0-py3-none-any.whl", hash = "sha256:ee775f04821185c90d9a0e9c56970ee43d7c41403beb6629385b39517129685b"}, +] +tensorboardx = [ + {file = "tensorboardX-2.1-py2.py3-none-any.whl", hash = "sha256:2d81c10d9e3225dcd9bb5fb277588610bdf45317603e7682f6953d83b5b38f6a"}, + {file = "tensorboardX-2.1.tar.gz", hash = "sha256:9e8907cf2ab900542d6cb72bf91aa87b43005a7f0aa43126268697e3727872f9"}, +] +tensorflow = [ + {file = "tensorflow-2.3.1-cp35-cp35m-macosx_10_6_intel.whl", hash = "sha256:8490c06c72d6b2227f0bda4800bfbe9004ade3f25f5ccaac2581531bf2885ab5"}, + {file = "tensorflow-2.3.1-cp35-cp35m-manylinux2010_x86_64.whl", hash = "sha256:85c49c951d735c651ae989a6dd7a40ab8032317179d634f871e2e7556dc82a69"}, + {file = "tensorflow-2.3.1-cp35-cp35m-win_amd64.whl", hash = "sha256:94c7d1916844fd7db53dd8d9b2c88b48119d39992ae542ec8a076d6f806cc989"}, + {file = "tensorflow-2.3.1-cp36-cp36m-macosx_10_9_x86_64.whl", hash = "sha256:68afc5f01f32827a53c23a9aa8cd404cdcf308e90942d4a8023e7f9e669a330a"}, + {file = "tensorflow-2.3.1-cp36-cp36m-manylinux2010_x86_64.whl", hash = "sha256:f4aa1dd6e7a040c29b3567ad1f4537aebeb58fcb6bafbc11f11c2c461d6fda63"}, + {file = "tensorflow-2.3.1-cp36-cp36m-win_amd64.whl", hash = "sha256:4632c2c6c84ed3b5e29e4b292704a1a646e1aa06587b7a2404b6ecc99e07758a"}, + {file = "tensorflow-2.3.1-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:1f72edee9d2e8861edbb9e082608fd21de7113580b3fdaa4e194b472c2e196d0"}, + {file = "tensorflow-2.3.1-cp37-cp37m-manylinux2010_x86_64.whl", hash = "sha256:859afb9166ace41ee71f62938fc645981113bb3227b847c8cd2875549c9fa1dc"}, + {file = "tensorflow-2.3.1-cp37-cp37m-win_amd64.whl", hash = "sha256:b69a6f0a8e7158c3bc14b22ec0d03bd303e196644d5428094bacea0ed8507af7"}, + {file = "tensorflow-2.3.1-cp38-cp38-macosx_10_14_x86_64.whl", hash = "sha256:cdce1f71f592d840dd3e05b67f1010f616311d9856250ff772db537f39ef2992"}, + {file = "tensorflow-2.3.1-cp38-cp38-manylinux2010_x86_64.whl", hash = "sha256:87750a476aa6f76b3aad5e6182faf2a3036a3d4c0db3b6d7463ebbaf4b184a23"}, + {file = "tensorflow-2.3.1-cp38-cp38-win_amd64.whl", hash = "sha256:87b62ab25816597a5e5352604b383b292eafd19a33ae7848b5275ea74fc4da1d"}, +] +tensorflow-estimator = [ + {file = "tensorflow_estimator-2.3.0-py2.py3-none-any.whl", hash = "sha256:b75e034300ccb169403cf2695adf3368da68863aeb0c14c3760064c713d5c486"}, +] +termcolor = [ + {file = "termcolor-1.1.0.tar.gz", hash = "sha256:1d6d69ce66211143803fbc56652b41d73b4a400a2891d7bf7a1cdf4c02de613b"}, +] +terminado = [ + {file = "terminado-0.9.1-py3-none-any.whl", hash = "sha256:c55f025beb06c2e2669f7ba5a04f47bb3304c30c05842d4981d8f0fc9ab3b4e3"}, + {file = "terminado-0.9.1.tar.gz", hash = "sha256:3da72a155b807b01c9e8a5babd214e052a0a45a975751da3521a1c3381ce6d76"}, +] +terminaltables = [ + {file = "terminaltables-3.1.0.tar.gz", hash = "sha256:f3eb0eb92e3833972ac36796293ca0906e998dc3be91fbe1f8615b331b853b81"}, +] +testpath = [ + {file = "testpath-0.4.4-py2.py3-none-any.whl", hash = "sha256:bfcf9411ef4bf3db7579063e0546938b1edda3d69f4e1fb8756991f5951f85d4"}, + {file = "testpath-0.4.4.tar.gz", hash = "sha256:60e0a3261c149755f4399a1fff7d37523179a70fdc3abdf78de9fc2604aeec7e"}, +] +textattack = [ + {file = "textattack-0.2.15-py3-none-any.whl", hash = "sha256:cf6e293e456995642e80efc0452f28ff85d70fc042583663df6a0d7b0ecd1044"}, + {file = "textattack-0.2.15.tar.gz", hash = "sha256:0a86ecfb2c8a4436a1afeb144a3ec5a315dd3b3b0ddd65f6af19b43d9d46753f"}, +] +textblob = [ + {file = "textblob-0.15.3-py2.py3-none-any.whl", hash = "sha256:b0eafd8b129c9b196c8128056caed891d64b7fa20ba570e1fcde438f4f7dd312"}, + {file = "textblob-0.15.3.tar.gz", hash = "sha256:7ff3c00cb5a85a30132ee6768b8c68cb2b9d76432fec18cd1b3ffe2f8594ec8c"}, +] +thinc = [ + {file = "thinc-7.4.5-cp36-cp36m-macosx_10_9_x86_64.whl", hash = "sha256:5774007b5c52501cab5e2970cadca84923b4c420fff06172f2d0c86531973ce8"}, + {file = "thinc-7.4.5-cp36-cp36m-manylinux2014_x86_64.whl", hash = "sha256:02b71ae5a0fa906a0aca968bd65589e0ab9fabd511e57be839774228b1509224"}, + {file = "thinc-7.4.5-cp36-cp36m-win_amd64.whl", hash = "sha256:8b647de79fe5f98cd327983bf0e27d006b48ad9694ceabdb9a3832b614ed1618"}, + {file = "thinc-7.4.5-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:cce68c5ea54cd32cef661858363509afdedad047027e8cdf0dc4edec0c2cc010"}, + {file = "thinc-7.4.5-cp37-cp37m-manylinux2014_x86_64.whl", hash = "sha256:29a47ad0289dda0520b5af8538b30e8134553130200b83c34311feb71739968d"}, + {file = "thinc-7.4.5-cp37-cp37m-win_amd64.whl", hash = "sha256:24086aa0fb72f466782115d529574a825c89afa62eb817962b9339f61ab50e0d"}, + {file = "thinc-7.4.5-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:d01ab1480d37ebefcac22d63ffe01916c9f025ae3dbdbe5824ac3ea5cce8e3fd"}, + {file = "thinc-7.4.5-cp38-cp38-manylinux2014_x86_64.whl", hash = "sha256:309ec4cae81f4de2e4e4fbd0bcb52b10bef4b1a6352c6a9143f6a53d3b1060ef"}, + {file = "thinc-7.4.5-cp38-cp38-win_amd64.whl", hash = "sha256:10bafe5ddce698180098345b9c55f762dc3456558be844d35d64175e511581b6"}, + {file = "thinc-7.4.5-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:c43ed753aa70bc619e42e168be4926c8a47799af6121ff0727ba99b330afbb44"}, + {file = "thinc-7.4.5-cp39-cp39-manylinux2014_x86_64.whl", hash = "sha256:c408ab24b24e6368ce4b6ddebb579118042a22d3f2f2c4e19ca67e3eadc9ed33"}, + {file = "thinc-7.4.5-cp39-cp39-win_amd64.whl", hash = "sha256:fae320de65af70786c1526ffc33b88f2da650d3106f5f9a06b37f0ac3944a44f"}, + {file = "thinc-7.4.5.tar.gz", hash = "sha256:5743fde41706252ec6ce4737c68d3505f7e1cc3d4431174a17149838d594f8cb"}, +] +threadpoolctl = [ + {file = "threadpoolctl-2.1.0-py3-none-any.whl", hash = "sha256:38b74ca20ff3bb42caca8b00055111d74159ee95c4370882bbff2b93d24da725"}, + {file = "threadpoolctl-2.1.0.tar.gz", hash = "sha256:ddc57c96a38beb63db45d6c159b5ab07b6bced12c45a1f07b2b92f272aebfa6b"}, +] +tokenizers = [ + {file = "tokenizers-0.9.4-cp35-cp35m-macosx_10_11_x86_64.whl", hash = "sha256:082de5272363aee13f36641065a3dd2d78f5b51486e3ab7d6d34138905a46303"}, + {file = "tokenizers-0.9.4-cp35-cp35m-manylinux1_x86_64.whl", hash = "sha256:543dcb31b8534cf3ad66817f925f50f4ccd182ed1433fcd07adaed5d389f682b"}, + {file = "tokenizers-0.9.4-cp35-cp35m-manylinux2010_x86_64.whl", hash = "sha256:89f816e5aa61c464e9d82025f2c4f1f66cd92f648ab9194a154ba2b0e180dc70"}, + {file = "tokenizers-0.9.4-cp35-cp35m-manylinux2014_aarch64.whl", hash = "sha256:768f36e743604f567f4e4817a76738ed1bcdaecfef5ae8c74bdf2277a7a1902d"}, + {file = "tokenizers-0.9.4-cp35-cp35m-manylinux2014_ppc64le.whl", hash = "sha256:800917d7085245db0b55f88b2a12bd0ba4eb5966e8b88bd9f21aa46aadfa8204"}, + {file = "tokenizers-0.9.4-cp35-cp35m-manylinux2014_s390x.whl", hash = "sha256:bce664d24c744387760beab14cc7bd4e405bbef93c333ba3ca4a93347949c3ba"}, + {file = "tokenizers-0.9.4-cp35-cp35m-win32.whl", hash = "sha256:b57fc7f2003f1f7b873dcffd5d0ee7c71f01709c54c36f4d191e4a7911d49565"}, + {file = "tokenizers-0.9.4-cp35-cp35m-win_amd64.whl", hash = "sha256:1313d63ce286c6c9812a51ea39ae84cf1b8f2887c8ce8cc813459fdfbf526c9b"}, + {file = "tokenizers-0.9.4-cp36-cp36m-macosx_10_11_x86_64.whl", hash = "sha256:2dd1156815cf2ca2a0942c8efc72e0725b6cd4640a61e026c72bf5a330f4383a"}, + {file = "tokenizers-0.9.4-cp36-cp36m-manylinux1_x86_64.whl", hash = "sha256:58e1904c3e75e37be379ee4b29b21b05189d54bfab0260b334cff6e5a44a4f45"}, + {file = "tokenizers-0.9.4-cp36-cp36m-manylinux2010_x86_64.whl", hash = "sha256:4fd1a765af0a7aff7dab58d7fcd63a2e4a860e829b931bdfd59e2c56ba1769b9"}, + {file = "tokenizers-0.9.4-cp36-cp36m-manylinux2014_aarch64.whl", hash = "sha256:3cf5b470b2e06aadee22771740d87a706216385f881308c70cb317476ec40904"}, + {file = "tokenizers-0.9.4-cp36-cp36m-manylinux2014_ppc64le.whl", hash = "sha256:c83f7a26d6f0c765906440c7f2b726cbd18e5c7a63e0364095600c91e2905cc4"}, + {file = "tokenizers-0.9.4-cp36-cp36m-manylinux2014_s390x.whl", hash = "sha256:427257e78b71e9310d0c035df9b054525d1da91cc46efbae95fee2d523b88eb9"}, + {file = "tokenizers-0.9.4-cp36-cp36m-win32.whl", hash = "sha256:4a5ddd6689e18b6c5398b97134e79e948e1bbe7664f6962aa63f50fb05cae091"}, + {file = "tokenizers-0.9.4-cp36-cp36m-win_amd64.whl", hash = "sha256:53395c4423e8309b208f1e973337c08a3cb68af5eb9dee8d8618428fd4579803"}, + {file = "tokenizers-0.9.4-cp37-cp37m-macosx_10_11_x86_64.whl", hash = "sha256:d2824dedd9f26e3757159d99c743b287ebf78775ccf4a36a3e0ec7058ee66303"}, + {file = "tokenizers-0.9.4-cp37-cp37m-manylinux1_x86_64.whl", hash = "sha256:b49f17c2ac2bf88875a74d63e8070fd5a69e8c3b2874dee47649826b603a3af1"}, + {file = "tokenizers-0.9.4-cp37-cp37m-manylinux2010_x86_64.whl", hash = "sha256:da361a88b21cd141441fb139d1ee05c815103d49d10b49bfb4218a240d0d5a84"}, + {file = "tokenizers-0.9.4-cp37-cp37m-manylinux2014_aarch64.whl", hash = "sha256:a03c101d8058c851a7647cc74c68d4db511d7a3db8a73f7ec715e4fe14281ed7"}, + {file = "tokenizers-0.9.4-cp37-cp37m-manylinux2014_ppc64le.whl", hash = "sha256:8d8ca7daa2f2274ec9327961ac828c20fcadd76e88d07f611742f240a6c73abe"}, + {file = "tokenizers-0.9.4-cp37-cp37m-manylinux2014_s390x.whl", hash = "sha256:9de00f951fa8c1cf5c54a5a813447c9bf810759822de6ba6cfa42d7f503ff799"}, + {file = "tokenizers-0.9.4-cp37-cp37m-win32.whl", hash = "sha256:535cf3edfd0df2c1887ea388691dd8f614331f47b41cb40c0901a2ce070ff7e0"}, + {file = "tokenizers-0.9.4-cp37-cp37m-win_amd64.whl", hash = "sha256:f3351eef9187ba7b9ceb04ff74fcda535f26c4146fe40155c6ed6087302944fd"}, + {file = "tokenizers-0.9.4-cp38-cp38-macosx_10_11_x86_64.whl", hash = "sha256:06e1a1c50c7600d8162d8f0eeed460ad9e9234ffee7d5c7bcd1308024d781647"}, + {file = "tokenizers-0.9.4-cp38-cp38-manylinux1_x86_64.whl", hash = "sha256:c60b8ba2d8a948bb40c39223a4b2553c7c1df9f732b0077722b91df5d63c5e37"}, + {file = "tokenizers-0.9.4-cp38-cp38-manylinux2010_x86_64.whl", hash = "sha256:31184c4691aed1e84088d7a18c1000bbc59f7bedeec95774ec4027129ea16272"}, + {file = "tokenizers-0.9.4-cp38-cp38-manylinux2014_aarch64.whl", hash = "sha256:abdbd169738c33e2e643e7701230f43c2f4e6e03d49283d4250f19159f6a6c71"}, + {file = "tokenizers-0.9.4-cp38-cp38-manylinux2014_ppc64le.whl", hash = "sha256:ac4c0a2f052a83146c6475dc22f9eb740d352b29779ac6036459f00d897025b8"}, + {file = "tokenizers-0.9.4-cp38-cp38-manylinux2014_s390x.whl", hash = "sha256:96879e21be25b63fb99fa7d65b50b05c2a0333f104ca003917df7433d6eb073e"}, + {file = "tokenizers-0.9.4-cp38-cp38-win32.whl", hash = "sha256:1764a705be63fb61abcaa96637399f124528f9a01925c88efb438aefe315b61b"}, + {file = "tokenizers-0.9.4-cp38-cp38-win_amd64.whl", hash = "sha256:a3180c8a1cb77eca8fe9c291e0f197aee202c93ffdea4f96d06ca154f319980c"}, + {file = "tokenizers-0.9.4-cp39-cp39-macosx_10_11_x86_64.whl", hash = "sha256:d518ef8323690cd4d51979ff2f44edbac5862db8c8af125e815e41cf4517c638"}, + {file = "tokenizers-0.9.4-cp39-cp39-manylinux1_x86_64.whl", hash = "sha256:807f321731a3466b9e0230cbc8e6d9c5581d5ac6536d96360b5fe1ec457d837f"}, + {file = "tokenizers-0.9.4-cp39-cp39-manylinux2010_x86_64.whl", hash = "sha256:3ea6d65a32c8b3236553e489573f42855af484d24bf96ab32a5d6d1a2c4b0ed0"}, + {file = "tokenizers-0.9.4-cp39-cp39-manylinux2014_aarch64.whl", hash = "sha256:15440ba1db7c7b3eb7b5881b276555e25420ce14639926585837b7b60ddb55a8"}, + {file = "tokenizers-0.9.4-cp39-cp39-manylinux2014_ppc64le.whl", hash = "sha256:bd46747f5c7d6e1721234d5ec1c0038bcfe0050c147c92171c3ef5b36d6fb2a9"}, + {file = "tokenizers-0.9.4-cp39-cp39-manylinux2014_s390x.whl", hash = "sha256:9f79b57a4d6a1aa8379a931e8ee54cb155cc3f5f1ba5172bcdea504dbd4cb746"}, + {file = "tokenizers-0.9.4-cp39-cp39-win32.whl", hash = "sha256:c496748853c0300b8b7be916e130f0de8224575ee72e8889405477f120bfe575"}, + {file = "tokenizers-0.9.4-cp39-cp39-win_amd64.whl", hash = "sha256:2479ef9a30fe8a961cb49c8bf6a5c5e2ce8e1b87849374c9756f41cf06189bdf"}, + {file = "tokenizers-0.9.4.tar.gz", hash = "sha256:3ea3038008f1f74c8a1e1e2e73728690eed2d7fa4db0a51bcea391e644672426"}, +] +toml = [ + {file = "toml-0.10.2-py2.py3-none-any.whl", hash = "sha256:806143ae5bfb6a3c6e736a764057db0e6a0e05e338b5630894a5f779cabb4f9b"}, + {file = "toml-0.10.2.tar.gz", hash = "sha256:b3bda1d108d5dd99f4a20d24d9c348e91c4db7ab1b749200bded2f839ccbe68f"}, +] +toolz = [ + {file = "toolz-0.11.1-py3-none-any.whl", hash = "sha256:1bc473acbf1a1db4e72a1ce587be347450e8f08324908b8a266b486f408f04d5"}, + {file = "toolz-0.11.1.tar.gz", hash = "sha256:c7a47921f07822fe534fb1c01c9931ab335a4390c782bd28c6bcc7c2f71f3fbf"}, +] +torch = [ + {file = "torch-1.7.1-cp36-cp36m-manylinux1_x86_64.whl", hash = "sha256:422e64e98d0e100c360993819d0307e5d56e9517b26135808ad68984d577d75a"}, + {file = "torch-1.7.1-cp36-cp36m-win_amd64.whl", hash = "sha256:f0aaf657145533824b15f2fd8fde8f8c67fe6c6281088ef588091f03fad90243"}, + {file = "torch-1.7.1-cp36-none-macosx_10_9_x86_64.whl", hash = "sha256:af464a6f4314a875035e0c4c2b07517599704b214634f4ed3ad2e748c5ef291f"}, + {file = "torch-1.7.1-cp37-cp37m-manylinux1_x86_64.whl", hash = "sha256:5d76c255a41484c1d41a9ff570b9c9f36cb85df9428aa15a58ae16ac7cfc2ea6"}, + {file = "torch-1.7.1-cp37-cp37m-win_amd64.whl", hash = "sha256:d241c3f1c4d563e4ba86f84769c23e12606db167ee6f674eedff6d02901462e3"}, + {file = "torch-1.7.1-cp37-none-macosx_10_9_x86_64.whl", hash = "sha256:de84b4166e3f7335eb868b51d3bbd909ec33828af27290b4171bce832a55be3c"}, + {file = "torch-1.7.1-cp38-cp38-manylinux1_x86_64.whl", hash = "sha256:dd2fc6880c95e836960d86efbbc7f63d3287f2e1893c51d31f96dbfe02f0d73e"}, + {file = "torch-1.7.1-cp38-cp38-win_amd64.whl", hash = "sha256:e000b94be3aa58ad7f61e7d07cf379ea9366cf6c6874e68bd58ad0bdc537b3a7"}, + {file = "torch-1.7.1-cp38-none-macosx_10_9_x86_64.whl", hash = "sha256:2e49cac969976be63117004ee00d0a3e3dd4ea662ad77383f671b8992825de1a"}, + {file = "torch-1.7.1-cp39-cp39-manylinux1_x86_64.whl", hash = "sha256:a3793dcceb12b1e2281290cca1277c5ce86ddfd5bf044f654285a4d69057aea7"}, + {file = "torch-1.7.1-cp39-cp39-win_amd64.whl", hash = "sha256:6652a767a0572ae0feb74ad128758e507afd3b8396b6e7f147e438ba8d4c6f63"}, + {file = "torch-1.7.1-cp39-none-macosx_10_9_x86_64.whl", hash = "sha256:38d67f4fb189a92a977b2c0a38e4f6dd413e0bf55aa6d40004696df7e40a71ff"}, +] +tornado = [ + {file = "tornado-6.1-cp35-cp35m-macosx_10_9_x86_64.whl", hash = "sha256:d371e811d6b156d82aa5f9a4e08b58debf97c302a35714f6f45e35139c332e32"}, + {file = "tornado-6.1-cp35-cp35m-manylinux1_i686.whl", hash = "sha256:0d321a39c36e5f2c4ff12b4ed58d41390460f798422c4504e09eb5678e09998c"}, + {file = "tornado-6.1-cp35-cp35m-manylinux1_x86_64.whl", hash = "sha256:9de9e5188a782be6b1ce866e8a51bc76a0fbaa0e16613823fc38e4fc2556ad05"}, + {file = "tornado-6.1-cp35-cp35m-manylinux2010_i686.whl", hash = "sha256:61b32d06ae8a036a6607805e6720ef00a3c98207038444ba7fd3d169cd998910"}, + {file = "tornado-6.1-cp35-cp35m-manylinux2010_x86_64.whl", hash = "sha256:3e63498f680547ed24d2c71e6497f24bca791aca2fe116dbc2bd0ac7f191691b"}, + {file = "tornado-6.1-cp35-cp35m-manylinux2014_aarch64.whl", hash = "sha256:6c77c9937962577a6a76917845d06af6ab9197702a42e1346d8ae2e76b5e3675"}, + {file = "tornado-6.1-cp35-cp35m-win32.whl", hash = "sha256:6286efab1ed6e74b7028327365cf7346b1d777d63ab30e21a0f4d5b275fc17d5"}, + {file = "tornado-6.1-cp35-cp35m-win_amd64.whl", hash = "sha256:fa2ba70284fa42c2a5ecb35e322e68823288a4251f9ba9cc77be04ae15eada68"}, + {file = "tornado-6.1-cp36-cp36m-macosx_10_9_x86_64.whl", hash = "sha256:0a00ff4561e2929a2c37ce706cb8233b7907e0cdc22eab98888aca5dd3775feb"}, + {file = "tornado-6.1-cp36-cp36m-manylinux1_i686.whl", hash = "sha256:748290bf9112b581c525e6e6d3820621ff020ed95af6f17fedef416b27ed564c"}, + {file = "tornado-6.1-cp36-cp36m-manylinux1_x86_64.whl", hash = "sha256:e385b637ac3acaae8022e7e47dfa7b83d3620e432e3ecb9a3f7f58f150e50921"}, + {file = "tornado-6.1-cp36-cp36m-manylinux2010_i686.whl", hash = "sha256:25ad220258349a12ae87ede08a7b04aca51237721f63b1808d39bdb4b2164558"}, + {file = "tornado-6.1-cp36-cp36m-manylinux2010_x86_64.whl", hash = "sha256:65d98939f1a2e74b58839f8c4dab3b6b3c1ce84972ae712be02845e65391ac7c"}, + {file = "tornado-6.1-cp36-cp36m-manylinux2014_aarch64.whl", hash = "sha256:e519d64089b0876c7b467274468709dadf11e41d65f63bba207e04217f47c085"}, + {file = "tornado-6.1-cp36-cp36m-win32.whl", hash = "sha256:b87936fd2c317b6ee08a5741ea06b9d11a6074ef4cc42e031bc6403f82a32575"}, + {file = "tornado-6.1-cp36-cp36m-win_amd64.whl", hash = "sha256:cc0ee35043162abbf717b7df924597ade8e5395e7b66d18270116f8745ceb795"}, + {file = "tornado-6.1-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:7250a3fa399f08ec9cb3f7b1b987955d17e044f1ade821b32e5f435130250d7f"}, + {file = "tornado-6.1-cp37-cp37m-manylinux1_i686.whl", hash = "sha256:ed3ad863b1b40cd1d4bd21e7498329ccaece75db5a5bf58cd3c9f130843e7102"}, + {file = "tornado-6.1-cp37-cp37m-manylinux1_x86_64.whl", hash = "sha256:dcef026f608f678c118779cd6591c8af6e9b4155c44e0d1bc0c87c036fb8c8c4"}, + {file = "tornado-6.1-cp37-cp37m-manylinux2010_i686.whl", hash = "sha256:70dec29e8ac485dbf57481baee40781c63e381bebea080991893cd297742b8fd"}, + {file = "tornado-6.1-cp37-cp37m-manylinux2010_x86_64.whl", hash = "sha256:d3f7594930c423fd9f5d1a76bee85a2c36fd8b4b16921cae7e965f22575e9c01"}, + {file = "tornado-6.1-cp37-cp37m-manylinux2014_aarch64.whl", hash = "sha256:3447475585bae2e77ecb832fc0300c3695516a47d46cefa0528181a34c5b9d3d"}, + {file = "tornado-6.1-cp37-cp37m-win32.whl", hash = "sha256:e7229e60ac41a1202444497ddde70a48d33909e484f96eb0da9baf8dc68541df"}, + {file = "tornado-6.1-cp37-cp37m-win_amd64.whl", hash = "sha256:cb5ec8eead331e3bb4ce8066cf06d2dfef1bfb1b2a73082dfe8a161301b76e37"}, + {file = "tornado-6.1-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:20241b3cb4f425e971cb0a8e4ffc9b0a861530ae3c52f2b0434e6c1b57e9fd95"}, + {file = "tornado-6.1-cp38-cp38-manylinux1_i686.whl", hash = "sha256:c77da1263aa361938476f04c4b6c8916001b90b2c2fdd92d8d535e1af48fba5a"}, + {file = "tornado-6.1-cp38-cp38-manylinux1_x86_64.whl", hash = "sha256:fba85b6cd9c39be262fcd23865652920832b61583de2a2ca907dbd8e8a8c81e5"}, + {file = "tornado-6.1-cp38-cp38-manylinux2010_i686.whl", hash = "sha256:1e8225a1070cd8eec59a996c43229fe8f95689cb16e552d130b9793cb570a288"}, + {file = "tornado-6.1-cp38-cp38-manylinux2010_x86_64.whl", hash = "sha256:d14d30e7f46a0476efb0deb5b61343b1526f73ebb5ed84f23dc794bdb88f9d9f"}, + {file = "tornado-6.1-cp38-cp38-manylinux2014_aarch64.whl", hash = "sha256:8f959b26f2634a091bb42241c3ed8d3cedb506e7c27b8dd5c7b9f745318ddbb6"}, + {file = "tornado-6.1-cp38-cp38-win32.whl", hash = "sha256:34ca2dac9e4d7afb0bed4677512e36a52f09caa6fded70b4e3e1c89dbd92c326"}, + {file = "tornado-6.1-cp38-cp38-win_amd64.whl", hash = "sha256:6196a5c39286cc37c024cd78834fb9345e464525d8991c21e908cc046d1cc02c"}, + {file = "tornado-6.1-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:f0ba29bafd8e7e22920567ce0d232c26d4d47c8b5cf4ed7b562b5db39fa199c5"}, + {file = "tornado-6.1-cp39-cp39-manylinux1_i686.whl", hash = "sha256:33892118b165401f291070100d6d09359ca74addda679b60390b09f8ef325ffe"}, + {file = "tornado-6.1-cp39-cp39-manylinux1_x86_64.whl", hash = "sha256:7da13da6f985aab7f6f28debab00c67ff9cbacd588e8477034c0652ac141feea"}, + {file = "tornado-6.1-cp39-cp39-manylinux2010_i686.whl", hash = "sha256:e0791ac58d91ac58f694d8d2957884df8e4e2f6687cdf367ef7eb7497f79eaa2"}, + {file = "tornado-6.1-cp39-cp39-manylinux2010_x86_64.whl", hash = "sha256:66324e4e1beede9ac79e60f88de548da58b1f8ab4b2f1354d8375774f997e6c0"}, + {file = "tornado-6.1-cp39-cp39-manylinux2014_aarch64.whl", hash = "sha256:a48900ecea1cbb71b8c71c620dee15b62f85f7c14189bdeee54966fbd9a0c5bd"}, + {file = "tornado-6.1-cp39-cp39-win32.whl", hash = "sha256:d3d20ea5782ba63ed13bc2b8c291a053c8d807a8fa927d941bd718468f7b950c"}, + {file = "tornado-6.1-cp39-cp39-win_amd64.whl", hash = "sha256:548430be2740e327b3fe0201abe471f314741efcb0067ec4f2d7dcfb4825f3e4"}, + {file = "tornado-6.1.tar.gz", hash = "sha256:33c6e81d7bd55b468d2e793517c909b139960b6c790a60b7991b9b6b76fb9791"}, +] +tqdm = [ + {file = "tqdm-4.49.0-py2.py3-none-any.whl", hash = "sha256:8f3c5815e3b5e20bc40463fa6b42a352178859692a68ffaa469706e6d38342a5"}, + {file = "tqdm-4.49.0.tar.gz", hash = "sha256:faf9c671bd3fad5ebaeee366949d969dca2b2be32c872a7092a1e1a9048d105b"}, +] +traitlets = [ + {file = "traitlets-5.0.5-py3-none-any.whl", hash = "sha256:69ff3f9d5351f31a7ad80443c2674b7099df13cc41fc5fa6e2f6d3b0330b0426"}, + {file = "traitlets-5.0.5.tar.gz", hash = "sha256:178f4ce988f69189f7e523337a3e11d91c786ded9360174a3d9ca83e79bc5396"}, +] +transformers = [ + {file = "transformers-4.0.1-py3-none-any.whl", hash = "sha256:e147e7d245453637b0057c23257784fc4e6475903fdd660c547c236e256e9198"}, + {file = "transformers-4.0.1.tar.gz", hash = "sha256:6f754c4336418d97296b3cc13ebd5169bdc0ed0e6d19c8a2bad084da779caad7"}, +] +typed-ast = [ + {file = "typed_ast-1.4.2-cp35-cp35m-manylinux1_i686.whl", hash = "sha256:7703620125e4fb79b64aa52427ec192822e9f45d37d4b6625ab37ef403e1df70"}, + {file = "typed_ast-1.4.2-cp35-cp35m-manylinux1_x86_64.whl", hash = "sha256:c9aadc4924d4b5799112837b226160428524a9a45f830e0d0f184b19e4090487"}, + {file = "typed_ast-1.4.2-cp35-cp35m-manylinux2014_aarch64.whl", hash = "sha256:9ec45db0c766f196ae629e509f059ff05fc3148f9ffd28f3cfe75d4afb485412"}, + {file = "typed_ast-1.4.2-cp35-cp35m-win32.whl", hash = "sha256:85f95aa97a35bdb2f2f7d10ec5bbdac0aeb9dafdaf88e17492da0504de2e6400"}, + {file = "typed_ast-1.4.2-cp35-cp35m-win_amd64.whl", hash = "sha256:9044ef2df88d7f33692ae3f18d3be63dec69c4fb1b5a4a9ac950f9b4ba571606"}, + {file = "typed_ast-1.4.2-cp36-cp36m-macosx_10_9_x86_64.whl", hash = "sha256:c1c876fd795b36126f773db9cbb393f19808edd2637e00fd6caba0e25f2c7b64"}, + {file = "typed_ast-1.4.2-cp36-cp36m-manylinux1_i686.whl", hash = "sha256:5dcfc2e264bd8a1db8b11a892bd1647154ce03eeba94b461effe68790d8b8e07"}, + {file = "typed_ast-1.4.2-cp36-cp36m-manylinux1_x86_64.whl", hash = "sha256:8db0e856712f79c45956da0c9a40ca4246abc3485ae0d7ecc86a20f5e4c09abc"}, + {file = "typed_ast-1.4.2-cp36-cp36m-manylinux2014_aarch64.whl", hash = "sha256:d003156bb6a59cda9050e983441b7fa2487f7800d76bdc065566b7d728b4581a"}, + {file = "typed_ast-1.4.2-cp36-cp36m-win32.whl", hash = "sha256:4c790331247081ea7c632a76d5b2a265e6d325ecd3179d06e9cf8d46d90dd151"}, + {file = "typed_ast-1.4.2-cp36-cp36m-win_amd64.whl", hash = "sha256:d175297e9533d8d37437abc14e8a83cbc68af93cc9c1c59c2c292ec59a0697a3"}, + {file = "typed_ast-1.4.2-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:cf54cfa843f297991b7388c281cb3855d911137223c6b6d2dd82a47ae5125a41"}, + {file = "typed_ast-1.4.2-cp37-cp37m-manylinux1_i686.whl", hash = "sha256:b4fcdcfa302538f70929eb7b392f536a237cbe2ed9cba88e3bf5027b39f5f77f"}, + {file = "typed_ast-1.4.2-cp37-cp37m-manylinux1_x86_64.whl", hash = "sha256:987f15737aba2ab5f3928c617ccf1ce412e2e321c77ab16ca5a293e7bbffd581"}, + {file = "typed_ast-1.4.2-cp37-cp37m-manylinux2014_aarch64.whl", hash = "sha256:37f48d46d733d57cc70fd5f30572d11ab8ed92da6e6b28e024e4a3edfb456e37"}, + {file = "typed_ast-1.4.2-cp37-cp37m-win32.whl", hash = "sha256:36d829b31ab67d6fcb30e185ec996e1f72b892255a745d3a82138c97d21ed1cd"}, + {file = "typed_ast-1.4.2-cp37-cp37m-win_amd64.whl", hash = "sha256:8368f83e93c7156ccd40e49a783a6a6850ca25b556c0fa0240ed0f659d2fe496"}, + {file = "typed_ast-1.4.2-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:963c80b583b0661918718b095e02303d8078950b26cc00b5e5ea9ababe0de1fc"}, + {file = "typed_ast-1.4.2-cp38-cp38-manylinux1_i686.whl", hash = "sha256:e683e409e5c45d5c9082dc1daf13f6374300806240719f95dc783d1fc942af10"}, + {file = "typed_ast-1.4.2-cp38-cp38-manylinux1_x86_64.whl", hash = "sha256:84aa6223d71012c68d577c83f4e7db50d11d6b1399a9c779046d75e24bed74ea"}, + {file = "typed_ast-1.4.2-cp38-cp38-manylinux2014_aarch64.whl", hash = "sha256:a38878a223bdd37c9709d07cd357bb79f4c760b29210e14ad0fb395294583787"}, + {file = "typed_ast-1.4.2-cp38-cp38-win32.whl", hash = "sha256:a2c927c49f2029291fbabd673d51a2180038f8cd5a5b2f290f78c4516be48be2"}, + {file = "typed_ast-1.4.2-cp38-cp38-win_amd64.whl", hash = "sha256:c0c74e5579af4b977c8b932f40a5464764b2f86681327410aa028a22d2f54937"}, + {file = "typed_ast-1.4.2-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:07d49388d5bf7e863f7fa2f124b1b1d89d8aa0e2f7812faff0a5658c01c59aa1"}, + {file = "typed_ast-1.4.2-cp39-cp39-manylinux1_i686.whl", hash = "sha256:240296b27397e4e37874abb1df2a608a92df85cf3e2a04d0d4d61055c8305ba6"}, + {file = "typed_ast-1.4.2-cp39-cp39-manylinux1_x86_64.whl", hash = "sha256:d746a437cdbca200622385305aedd9aef68e8a645e385cc483bdc5e488f07166"}, + {file = "typed_ast-1.4.2-cp39-cp39-manylinux2014_aarch64.whl", hash = "sha256:14bf1522cdee369e8f5581238edac09150c765ec1cb33615855889cf33dcb92d"}, + {file = "typed_ast-1.4.2-cp39-cp39-win32.whl", hash = "sha256:cc7b98bf58167b7f2db91a4327da24fb93368838eb84a44c472283778fc2446b"}, + {file = "typed_ast-1.4.2-cp39-cp39-win_amd64.whl", hash = "sha256:7147e2a76c75f0f64c4319886e7639e490fee87c9d25cb1d4faef1d8cf83a440"}, + {file = "typed_ast-1.4.2.tar.gz", hash = "sha256:9fc0b3cb5d1720e7141d103cf4819aea239f7d136acf9ee4a69b047b7986175a"}, +] +typing-extensions = [ + {file = "typing_extensions-3.7.4.3-py2-none-any.whl", hash = "sha256:dafc7639cde7f1b6e1acc0f457842a83e722ccca8eef5270af2d74792619a89f"}, + {file = "typing_extensions-3.7.4.3-py3-none-any.whl", hash = "sha256:7cb407020f00f7bfc3cb3e7881628838e69d8f3fcab2f64742a5e76b2f841918"}, + {file = "typing_extensions-3.7.4.3.tar.gz", hash = "sha256:99d4073b617d30288f569d3f13d2bd7548c3a7e4c8de87db09a9d29bb3a4a60c"}, +] +untokenize = [ + {file = "untokenize-0.1.1.tar.gz", hash = "sha256:3865dbbbb8efb4bb5eaa72f1be7f3e0be00ea8b7f125c69cbd1f5fda926f37a2"}, +] +urllib3 = [ + {file = "urllib3-1.26.2-py2.py3-none-any.whl", hash = "sha256:d8ff90d979214d7b4f8ce956e80f4028fc6860e4431f731ea4a8c08f23f99473"}, + {file = "urllib3-1.26.2.tar.gz", hash = "sha256:19188f96923873c92ccb987120ec4acaa12f0461fa9ce5d3d0772bc965a39e08"}, +] +virtualenv = [ + {file = "virtualenv-20.2.2-py2.py3-none-any.whl", hash = "sha256:54b05fc737ea9c9ee9f8340f579e5da5b09fb64fd010ab5757eb90268616907c"}, + {file = "virtualenv-20.2.2.tar.gz", hash = "sha256:b7a8ec323ee02fb2312f098b6b4c9de99559b462775bc8fe3627a73706603c1b"}, +] +wasabi = [ + {file = "wasabi-0.8.0-py3-none-any.whl", hash = "sha256:98bc9c492c6aa8628303a02961a5cfa7b0c7fa6d2b397abdeb0adc4b39397c49"}, + {file = "wasabi-0.8.0.tar.gz", hash = "sha256:75fec6db6193c8615d7f398ae4aa2c4ad294e6e3e81c6a6dbbbd3864ee2223c3"}, +] +wcwidth = [ + {file = "wcwidth-0.2.5-py2.py3-none-any.whl", hash = "sha256:beb4802a9cebb9144e99086eff703a642a13d6a0052920003a230f3294bbe784"}, + {file = "wcwidth-0.2.5.tar.gz", hash = "sha256:c4d647b99872929fdb7bdcaa4fbe7f01413ed3d98077df798530e5b04f116c83"}, +] +webencodings = [ + {file = "webencodings-0.5.1-py2.py3-none-any.whl", hash = "sha256:a0af1213f3c2226497a97e2b3aa01a7e4bee4f403f95be16fc9acd2947514a78"}, + {file = "webencodings-0.5.1.tar.gz", hash = "sha256:b36a1c245f2d304965eb4e0a82848379241dc04b865afcc4aab16748587e1923"}, +] +werkzeug = [ + {file = "Werkzeug-1.0.1-py2.py3-none-any.whl", hash = "sha256:2de2a5db0baeae7b2d2664949077c2ac63fbd16d98da0ff71837f7d1dea3fd43"}, + {file = "Werkzeug-1.0.1.tar.gz", hash = "sha256:6c80b1e5ad3665290ea39320b91e1be1e0d5f60652b964a3070216de83d2e47c"}, +] +widgetsnbextension = [ + {file = "widgetsnbextension-3.5.1-py2.py3-none-any.whl", hash = "sha256:bd314f8ceb488571a5ffea6cc5b9fc6cba0adaf88a9d2386b93a489751938bcd"}, + {file = "widgetsnbextension-3.5.1.tar.gz", hash = "sha256:079f87d87270bce047512400efd70238820751a11d2d8cb137a5a5bdbaf255c7"}, +] +word2number = [ + {file = "word2number-1.1.zip", hash = "sha256:70e27a5d387f67b04c71fbb7621c05930b19bfd26efd6851e6e0f9969dcde7d0"}, +] +wrapt = [ + {file = "wrapt-1.12.1.tar.gz", hash = "sha256:b62ffa81fb85f4332a4f609cab4ac40709470da05643a082ec1eb88e6d9b97d7"}, +] +xxhash = [ + {file = "xxhash-2.0.0-cp27-cp27m-macosx_10_6_intel.whl", hash = "sha256:df8d1ebdef86bd5d772d81c91d5d111a5ee8e4b68b8fc6b6edfa5aa825dd2a3d"}, + {file = "xxhash-2.0.0-cp27-cp27m-manylinux1_i686.whl", hash = "sha256:f01c59f5bad2e46bb4235b71b36c56be353f08b6d514a3bd0deb9bf56e4b180a"}, + {file = "xxhash-2.0.0-cp27-cp27m-manylinux1_x86_64.whl", hash = "sha256:cb4feeb8881eb89b9ddd0fae797deb078ebdaad6b1ae6c185b9993d241ed365a"}, + {file = "xxhash-2.0.0-cp27-cp27m-manylinux2010_i686.whl", hash = "sha256:2912d7810bcf7e39b3929fb186fe46ff83b1bd4a3d6b7eba956d57fa1516ac0c"}, + {file = "xxhash-2.0.0-cp27-cp27m-manylinux2010_x86_64.whl", hash = "sha256:48b99c55fc643b32f5efca9c35fcaac6ea553958cf503e202c10eb62718e7a0e"}, + {file = "xxhash-2.0.0-cp27-cp27m-win32.whl", hash = "sha256:3221f1a5bc2ee1f150b84a0c4c7cddc7724aaa01460f3353cf63fd667d89f593"}, + {file = "xxhash-2.0.0-cp27-cp27m-win_amd64.whl", hash = "sha256:cba4b6d174b524623ac8b64bda734601d574f95033f87ddf9c495c69a70135e8"}, + {file = "xxhash-2.0.0-cp27-cp27mu-manylinux1_i686.whl", hash = "sha256:b94f13f4f946500f3cc78f11da4ec4b340bd92c5200b5fe4e6aeac96064aa1fd"}, + {file = "xxhash-2.0.0-cp27-cp27mu-manylinux1_x86_64.whl", hash = "sha256:635b1d7fa85d215112f41d089bd113ac139f6a42769fcc49c73e779904160f7f"}, + {file = "xxhash-2.0.0-cp27-cp27mu-manylinux2010_i686.whl", hash = "sha256:0f5f1b9ae8e2cf2ff606018769f7e46147df70291312f64e1b80d10482ca8c0b"}, + {file = "xxhash-2.0.0-cp27-cp27mu-manylinux2010_x86_64.whl", hash = "sha256:8f90deec6567a38e1da29feff36973468691e309b2db8235e64936e61df77c43"}, + {file = "xxhash-2.0.0-cp35-cp35m-macosx_10_6_intel.whl", hash = "sha256:8b7e930a60dfe7380e52466aa27941290dd575a5750c622158c86941797eaa1b"}, + {file = "xxhash-2.0.0-cp35-cp35m-manylinux1_i686.whl", hash = "sha256:44b26872fd63f1eaf1ab527817aebbd455a3fdcbd56ff6df74fd42a6a137cff4"}, + {file = "xxhash-2.0.0-cp35-cp35m-manylinux1_x86_64.whl", hash = "sha256:3d25b540148f1ebf4852e4115f3f4819b585ecd36f121a1f388e8966d69d3a1c"}, + {file = "xxhash-2.0.0-cp35-cp35m-manylinux2010_i686.whl", hash = "sha256:bcd1e9f3ba8df23edefe1d0a886f16b4e27602acbd8575b39540fea26e1aa6d2"}, + {file = "xxhash-2.0.0-cp35-cp35m-manylinux2010_x86_64.whl", hash = "sha256:fc03a399205268815742125b17d967afa9f23b08cdafe185e41368cf7ba9b278"}, + {file = "xxhash-2.0.0-cp35-cp35m-manylinux2014_aarch64.whl", hash = "sha256:bdbc195231c87d63b0503785d9c5264f4275a92da41d9f28fdf08fb321453356"}, + {file = "xxhash-2.0.0-cp35-cp35m-win32.whl", hash = "sha256:7291392bdb1d38c44557dfd3fcd4fd04c363a696dbfa7e6592700a31e4ff6657"}, + {file = "xxhash-2.0.0-cp35-cp35m-win_amd64.whl", hash = "sha256:e0fc170c3a00ca008d992c2e6324da3f1467b30044b5835d2feb27870645d38c"}, + {file = "xxhash-2.0.0-cp36-cp36m-macosx_10_6_intel.whl", hash = "sha256:5b3c0c84187556d463626ceed85f0d735a5b8ea1678da3e858d3934f38f23915"}, + {file = "xxhash-2.0.0-cp36-cp36m-manylinux1_i686.whl", hash = "sha256:2f0ca6673fcbae988389576a779c00a62a28718a18ddc7b2e5b32d7fb30c6f98"}, + {file = "xxhash-2.0.0-cp36-cp36m-manylinux1_x86_64.whl", hash = "sha256:d1859d54837af16ae2a7975477e619793ac698a374d909f533e317c3b384b223"}, + {file = "xxhash-2.0.0-cp36-cp36m-manylinux2010_i686.whl", hash = "sha256:9d0311fcd78dabe04ab3b4034659628b00ac220e77e37648f73aebbf4cb13680"}, + {file = "xxhash-2.0.0-cp36-cp36m-manylinux2010_x86_64.whl", hash = "sha256:0ecea927fd3df8f3f3a1d6e5bc85838eb44a69ea2f4c9263dfd0f68c4e17e483"}, + {file = "xxhash-2.0.0-cp36-cp36m-manylinux2014_aarch64.whl", hash = "sha256:4167f22b037e128820f7642ecc1fbf1b4b4956346093a2e75081bee82b9cfb7e"}, + {file = "xxhash-2.0.0-cp36-cp36m-win32.whl", hash = "sha256:85c5de6c56335b75beef2cba713f95a1b62422be5e27dad30b5083419c6839c4"}, + {file = "xxhash-2.0.0-cp36-cp36m-win_amd64.whl", hash = "sha256:ade1c356acd0b0454a3d3cf42442afe7ad0f46fc944ea1e84720b3858bfdb772"}, + {file = "xxhash-2.0.0-cp37-cp37m-macosx_10_6_intel.whl", hash = "sha256:fca7d0fb6fde33d1ac5f97298f44e711e5fe1b4587832864be8c6545cb072a54"}, + {file = "xxhash-2.0.0-cp37-cp37m-manylinux1_i686.whl", hash = "sha256:e296b0dee072a54c40c04f09ca35bb9902bb74b54f0fffeafabfc937b3ec85f9"}, + {file = "xxhash-2.0.0-cp37-cp37m-manylinux1_x86_64.whl", hash = "sha256:02476c5cef803cfd1350662b1e543e47ad64bd5f7f792033d94d590f9674da11"}, + {file = "xxhash-2.0.0-cp37-cp37m-manylinux2010_i686.whl", hash = "sha256:28c1f0bb6dadc11162d1f2e203d7a12d38b511b87fbb5ffa729594fd456f48e6"}, + {file = "xxhash-2.0.0-cp37-cp37m-manylinux2010_x86_64.whl", hash = "sha256:922ae5b1efa1f9a9cc959f7197113a623ad110853622e990433242a9d8d00d5c"}, + {file = "xxhash-2.0.0-cp37-cp37m-manylinux2014_aarch64.whl", hash = "sha256:7709bc8a5e30c74b07203553f33232531e7739458f72204908cedb08a00bd546"}, + {file = "xxhash-2.0.0-cp37-cp37m-win32.whl", hash = "sha256:fb3c9760598009b1d8bbe57785e278aeb956efb7372d8f9b0bb43cd46f420dff"}, + {file = "xxhash-2.0.0-cp37-cp37m-win_amd64.whl", hash = "sha256:3f29f6d455388cc415fe52c0f63f442aaea674cee35a2252d8d4dc8d640938c6"}, + {file = "xxhash-2.0.0-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:bf360465dc3d24b1501b799c85815c82ddcfc0ffbcba0232968f3a7cd64306fc"}, + {file = "xxhash-2.0.0-cp38-cp38-manylinux1_i686.whl", hash = "sha256:5d2edbb50025a67f061d09d381c54c7d0948c1572f6c9bd15ee238a303d368d9"}, + {file = "xxhash-2.0.0-cp38-cp38-manylinux1_x86_64.whl", hash = "sha256:7943ede91d8aedfcacb7178b2d881b7498145590206ff61c3e84dc66e6a51d6a"}, + {file = "xxhash-2.0.0-cp38-cp38-manylinux2010_i686.whl", hash = "sha256:b5c2edb8b0a2acc5bdac984b3177711f206463b970aa03087221771c2b0d8f1d"}, + {file = "xxhash-2.0.0-cp38-cp38-manylinux2010_x86_64.whl", hash = "sha256:16e4b7d508bb49b6fc84bf077f2f7f51263b5618cc61f33a64ed43786ec2c6cf"}, + {file = "xxhash-2.0.0-cp38-cp38-manylinux2014_aarch64.whl", hash = "sha256:80903d4ce7337921bbc8e5ac695b45691b43c0a00b21964c76e19ea21b9108ea"}, + {file = "xxhash-2.0.0-cp38-cp38-win32.whl", hash = "sha256:e37b25182e969212d5aec60a8da7d1e6a960dbffdb9ba4c63e2240de3605c184"}, + {file = "xxhash-2.0.0-cp38-cp38-win_amd64.whl", hash = "sha256:fabee25186b6649bbf6ff258f23941339902374786f8317b0422144ddaa505df"}, + {file = "xxhash-2.0.0-pp27-pypy_73-manylinux1_x86_64.whl", hash = "sha256:be93004b832717234a7d2f47dc555428ab1e8712f99cad7d212cebe0e27d3d48"}, + {file = "xxhash-2.0.0-pp27-pypy_73-manylinux2010_x86_64.whl", hash = "sha256:1b86f49b36c25ebdbd1b5539d428a37d9051ad49eb576a3edd964a8770bc8f3a"}, + {file = "xxhash-2.0.0-pp27-pypy_73-win32.whl", hash = "sha256:bde4d39997de901d0a66ebd631b34f9cf106676fec0878f36b7baf630cb3965a"}, + {file = "xxhash-2.0.0-pp36-pypy36_pp73-manylinux1_x86_64.whl", hash = "sha256:99b5412a3eddb1aa9aaf36cdbf93be4eca99ad83ff8c692672fdeedc7fb597de"}, + {file = "xxhash-2.0.0-pp36-pypy36_pp73-manylinux2010_x86_64.whl", hash = "sha256:33c4832e689f429539d70baf69162b41dfbabc7f31ca542b5b772cb8a55e7a79"}, + {file = "xxhash-2.0.0-pp36-pypy36_pp73-win32.whl", hash = "sha256:82034c9ed54db20f051133cba01de959b5208fe2900e67ebb4c9631f1fd523fd"}, + {file = "xxhash-2.0.0.tar.gz", hash = "sha256:58ca818554c1476fa1456f6cd4b87002e2294f09baf0f81e5a2a4968e62c423c"}, +] diff --git a/pyproject.toml b/pyproject.toml new file mode 100644 index 00000000..e35088a8 --- /dev/null +++ b/pyproject.toml @@ -0,0 +1,74 @@ +[tool.poetry] +name = "robustnessgym" +version = "0.0.2-alpha.0" +description = "Robustness Gym is an evaluation toolkit for natural language processing." +authors = ["Robustness Gym "] +maintainers = ["Karan Goel "] +license = "Apache-2.0" +readme = "README.md" +homepage = "https://robustnessgym.com" +repository = "https://github.com/robustness-gym/robustness-gym/" +keywords = [ + 'Machine Learning', + 'Natural Language Processing', + 'Evaluation', +] +classifiers = [ + "Topic :: Scientific/Engineering :: Artificial Intelligence", +] +documentation = "https://robustnessgym.readthedocs.io" + +[tool.poetry.urls] +"Issue Tracker" = "https://github.com/robustness-gym/robustness-gym/issues" + +[tool.poetry.dependencies] +python = "^3.8" +numpy = "^1.18.0" +dill = "^0.3.3" +tqdm = "^4.27.0" +datasets = "^1.1.3" +pyahocorasick = "^1.4.0" +nlpaug = "^1.1.1" +Cython = "^0.29.21" +cytoolz = "^0.11.0" +transformers = "^4.0.0" +progressbar = "^2.5" +nltk = "^3.5" +textblob = "^0.15.3" +spacy = "^2.3.5" +multiprocess = "^0.70.11" +fuzzywuzzy = "^0.18.0" +python-Levenshtein = "^0.12.0" +allennlp = "^1.3.0" +allennlp-models = "^1.3.0" +pytorch-lightning = "^1.1.2" +kaleido = "0.1.0" +fastBPE = "^0.1.0" +omegaconf = "^2.0.5" +hydra-core = "^1.0.4" +jupyterlab = "^3.0.0" +textattack = "^0.2.15" +plotly = "^4.14.1" +ipywidgets = "^7.6.2" +stanza = "^1.1.1" +rouge-score = "^0.0.4" +semver = "^2.13.0" +jsonlines = "^1.2.0" +tensorflow = "^2.3.0" + +[tool.poetry.extras] + +[tool.poetry.dev-dependencies] +black = "^20.8b1" +isort = "^5.7.0" +flake8 = "^3.8.4" +docformatter = "^1.4" +pytest-cov = "^2.10.1" +sphinx-rtd-theme = "^0.5.1" +nbsphinx = "^0.8.0" +recommonmark = "^0.7.1" +pre-commit = "^2.9.3" + +[build-system] +requires = ["poetry-core>=1.0.0"] +build-backend = "poetry.core.masonry.api" diff --git a/robustnessgym/__init__.py b/robustnessgym/__init__.py new file mode 100644 index 00000000..fcbc4433 --- /dev/null +++ b/robustnessgym/__init__.py @@ -0,0 +1,202 @@ +"""Import common classes.""" +# flake8: noqa +from robustnessgym.cachedops.allen.allen_predictor import AllenPredictor +from robustnessgym.cachedops.allen.constituency_parser import AllenConstituencyParser +from robustnessgym.cachedops.allen.dependency_parser import AllenDependencyParser +from robustnessgym.cachedops.allen.semantic_role_labeler import AllenSemanticRoleLabeler +from robustnessgym.cachedops.bootleg import Bootleg +from robustnessgym.cachedops.similarity import ( + RougeMatrix, + RougeScore, + SentenceSimilarityMatrix, +) +from robustnessgym.cachedops.spacy import Spacy +from robustnessgym.cachedops.stanza import Stanza +from robustnessgym.cachedops.strip_text import StripText +from robustnessgym.cachedops.textblob import TextBlob +from robustnessgym.core.cachedops import ( + CachedOperation, + SingleColumnCachedOperation, + stow, +) +from robustnessgym.core.dataset import Dataset +from robustnessgym.core.identifier import Identifier +from robustnessgym.core.slice import Slice +from robustnessgym.core.testbench import TestBench +from robustnessgym.slicebuilders.attacks.textattack import TextAttack +from robustnessgym.slicebuilders.slicebuilder import ( + SliceBuilder, + SliceBuilderCollection, +) +from robustnessgym.slicebuilders.subpopulations.constituency_overlap import ( + ConstituencyOverlapSubpopulation, + ConstituencySubtreeSubpopulation, + FuzzyConstituencySubtreeSubpopulation, +) +from robustnessgym.slicebuilders.subpopulations.entity_frequency import EntityFrequency +from robustnessgym.slicebuilders.subpopulations.hans import ( + HansAdjectives, + HansAdjectivesCompEnt, + HansAdjectivesCompNonEnt, + HansAdverbs, + HansAdvsEntailed, + HansAdvsNonEntailed, + HansAllPhrases, + HansCalledObjects, + HansConjs, + HansConstAdv, + HansConstQuotEntailed, + HansEntComplementNouns, + HansFoodWords, + HansIntransitiveVerbs, + HansLocationNounsA, + HansLocationNounsB, + HansNonEntComplementNouns, + HansNonEntQuotVerbs, + HansNPSVerbs, + HansNPZVerbs, + HansPassiveVerbs, + HansPastParticiples, + HansPluralNouns, + HansPluralNPZVerbs, + HansPrepositions, + HansQuestionEmbeddingVerbs, + HansQuestions, + HansReadWroteObjects, + HansRelations, + HansSingularNouns, + HansToldObjects, + HansTransitiveVerbs, + HansUnderstoodArgumentVerbs, + HansWonObjects, +) +from robustnessgym.slicebuilders.subpopulations.length import LengthSubpopulation +from robustnessgym.slicebuilders.subpopulations.lexical_overlap import ( + LexicalOverlapSubpopulation, +) +from robustnessgym.slicebuilders.subpopulations.phrase import ( + AhoCorasick, + HasAllPhrases, + HasAnyPhrase, + HasComparison, + HasDefiniteArticle, + HasIndefiniteArticle, + HasNegation, + HasPhrase, + HasPosessivePreposition, + HasQuantifier, + HasTemporalPreposition, +) +from robustnessgym.slicebuilders.subpopulations.score import ScoreSubpopulation +from robustnessgym.slicebuilders.subpopulations.similarity import ( + Abstractiveness, + Dispersion, + Distillation, + Ordering, + Position, + RougeMatrixScoreSubpopulation, + RougeScoreSubpopulation, +) +from robustnessgym.slicebuilders.transformations.eda import EasyDataAugmentation +from robustnessgym.slicebuilders.transformations.fairseq import FairseqBacktranslation +from robustnessgym.slicebuilders.transformations.nlpaug import NlpAugTransformation +from robustnessgym.slicebuilders.transformations.similarity import ( + RougeMatrixSentenceTransformation, +) +from robustnessgym.tasks.task import ( + BinaryNaturalLanguageInference, + BinarySentiment, + ExtractiveQuestionAnswering, + NaturalLanguageInference, + QuestionAnswering, + Sentiment, + Summarization, + Task, + TernaryNaturalLanguageInference, +) + +from .slicebuilders.attack import Attack +from .slicebuilders.curator import Curator +from .slicebuilders.subpopulation import Subpopulation, SubpopulationCollection + +# from .attacks import * +# from .augmentations import * +# from .cache import * +# from .cache import ( +# CachedOperation, +# stow +# ) +# from .dataset import Dataset +# from .identifier import Identifier +# from .model import Model +# from .report import Report +# from .slice import Slice +# from .slicebuilders import * +# from .slicebuilders.attacks.textattack.textattack import TextAttack +# from .slicebuilders.slicebuilder import ( +# SliceBuilder, +# ) +# from .slicebuilders.subpopulations.constituency_overlap.constituency_overlap import ( +# HasConstituencyOverlap, +# HasConstituencySubtree, +# HasFuzzyConstituencySubtree, +# ) +# from .slicebuilders.subpopulations.length.length import HasLength +# from .slicebuilders.subpopulations.ner.entity_frequency import EntityFrequency +# from .slicebuilders.subpopulations.phrase.hans import ( +# HansAllPhrases, +# HansSingularNouns, +# HansPluralNouns, +# HansTransitiveVerbs, +# HansPassiveVerbs, +# HansIntransitiveVerbs, +# HansNPSVerbs, +# HansNPZVerbs, +# HansPluralNPZVerbs, +# HansPrepositions, +# HansConjs, +# HansPastParticiples, +# HansUnderstoodArgumentVerbs, +# HansNonEntQuotVerbs, +# HansQuestionEmbeddingVerbs, +# HansCalledObjects, +# HansToldObjects, +# HansFoodWords, +# HansLocationNounsA, +# HansLocationNounsB, +# HansWonObjects, +# HansReadWroteObjects, +# HansAdjectives, +# HansAdjectivesCompNonEnt, +# HansAdjectivesCompEnt, +# HansAdverbs, +# HansConstAdv, +# HansConstQuotEntailed, +# HansRelations, +# HansQuestions, +# HansNonEntComplementNouns, +# HansEntComplementNouns, +# HansAdvsNonEntailed, +# HansAdvsEntailed, +# ) +# from .slicebuilders.subpopulations.phrase.phrase import ( +# AhoCorasick, +# HasPhrase, +# HasAnyPhrase, +# HasAllPhrases, +# ) +# from .slicebuilders.subpopulations.phrase.wordlists import ( +# HasCategoryPhrase +# ) +# from .storage import PicklerMixin +# from .task import ( +# Task, +# NaturalLanguageInference, +# BinaryNaturalLanguageInference, +# TernaryNaturalLanguageInference, +# ) +# from .testbench.testbench import TestBench +# from .tools import ( +# recmerge, +# persistent_hash, +# ) diff --git a/robustnessgym/cachedops/__init__.py b/robustnessgym/cachedops/__init__.py new file mode 100644 index 00000000..b46edf19 --- /dev/null +++ b/robustnessgym/cachedops/__init__.py @@ -0,0 +1,16 @@ +# from .allen.allen_predictor import AllenPredictor +# from .allen.constituency_parser.constituency_parser import AllenConstituencyParser +# from .allen.dependency_parser.dependency_parser import AllenDependencyParser +# from .allen.semantic_role_labeler.semantic_role_labeler import +# AllenSemanticRoleLabeler +# from .bootleg.bootleg import Bootleg +# from .cached_ops import ( +# CachedOperation, +# stow +# ) +# from .custom.strip_text import StripText +# from .spacy.spacy import Spacy +# from .textblob.textblob import TextBlob +from .spacy import Spacy # noqa +from .stanza import Stanza # noqa +from .textblob import TextBlob # noqa diff --git a/robustnessgym/cachedops/allen/__init__.py b/robustnessgym/cachedops/allen/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/robustnessgym/cachedops/allen/allen_predictor.py b/robustnessgym/cachedops/allen/allen_predictor.py new file mode 100644 index 00000000..3b426164 --- /dev/null +++ b/robustnessgym/cachedops/allen/allen_predictor.py @@ -0,0 +1,35 @@ +from typing import List + +import torch +from allennlp.predictors import Predictor + +from robustnessgym.core.cachedops import SingleColumnCachedOperation + + +class AllenPredictor(SingleColumnCachedOperation): + def __init__(self, path: str, device: str, *args, **kwargs): + super(AllenPredictor, self).__init__(*args, **kwargs) + + # If no device is passed in, automatically use GPU if available + if not device: + device = "cuda" if torch.cuda.is_available() else "cpu" + + # Resolve the device + cuda_device = -1 + if device.startswith("cuda"): + cuda_device = 0 if ":" not in device else int(device.split(":")[-1]) + + # Set up Allen's predictor + self._predictor = Predictor.from_path( + archive_path=path, cuda_device=cuda_device + ) + + @property + def predictor(self): + return self._predictor + + def single_column_apply(self, column_batch: List, *args, **kwargs) -> List: + # Apply the predictor + return self.predictor.predict_batch_json( + [{"sentence": text} for text in column_batch] + ) diff --git a/robustnessgym/cachedops/allen/constituency_parser.py b/robustnessgym/cachedops/allen/constituency_parser.py new file mode 100644 index 00000000..c34bd142 --- /dev/null +++ b/robustnessgym/cachedops/allen/constituency_parser.py @@ -0,0 +1,17 @@ +from robustnessgym.cachedops.allen.allen_predictor import AllenPredictor + + +class AllenConstituencyParser(AllenPredictor): + def __init__(self, device: str = None, *args, **kwargs): + super(AllenConstituencyParser, self).__init__( + path="https://storage.googleapis.com/allennlp-public-models/elmo" + "-constituency-parser-2020.02.10.tar.gz", + device=device, + *args, + **kwargs, + ) + + @classmethod + def encode(cls, prediction) -> str: + # Extract the tree from the output of the constituency parser + return super().encode(obj=prediction["trees"]) diff --git a/robustnessgym/cachedops/allen/dependency_parser.py b/robustnessgym/cachedops/allen/dependency_parser.py new file mode 100644 index 00000000..3bba74bb --- /dev/null +++ b/robustnessgym/cachedops/allen/dependency_parser.py @@ -0,0 +1,10 @@ +from robustnessgym.cachedops.allen.allen_predictor import AllenPredictor + + +class AllenDependencyParser(AllenPredictor): + def __init__(self, device: str = None): + super(AllenDependencyParser, self).__init__( + path="https://storage.googleapis.com/allennlp-public-models/" + "biaffine-dependency-parser-ptb-2020.04.06.tar.gz", + device=device, + ) diff --git a/robustnessgym/cachedops/allen/semantic_role_labeler.py b/robustnessgym/cachedops/allen/semantic_role_labeler.py new file mode 100644 index 00000000..9b85d195 --- /dev/null +++ b/robustnessgym/cachedops/allen/semantic_role_labeler.py @@ -0,0 +1,10 @@ +from robustnessgym.cachedops.allen.allen_predictor import AllenPredictor + + +class AllenSemanticRoleLabeler(AllenPredictor): + def __init__(self, device: str = None): + super(AllenSemanticRoleLabeler, self).__init__( + path="https://storage.googleapis.com/allennlp-public-models/bert-base-srl" + "-2020.03.24.tar.gz", + device=device, + ) diff --git a/robustnessgym/cachedops/bootleg.py b/robustnessgym/cachedops/bootleg.py new file mode 100644 index 00000000..130a63e0 --- /dev/null +++ b/robustnessgym/cachedops/bootleg.py @@ -0,0 +1,122 @@ +import tarfile +import urllib.request +from typing import Dict, List + +from torch import cuda + +from robustnessgym.cachedops.textblob import TextBlob +from robustnessgym.core.cachedops import SingleColumnCachedOperation +from robustnessgym.core.decorators import singlecolumn +from robustnessgym.core.tools import DownloadProgressBar + +try: + from bootleg.annotator import Annotator + from bootleg.utils.parser_utils import get_full_config +except ImportError: + _bootleg_available = False +else: + _bootleg_available = True + + +class Bootleg(SingleColumnCachedOperation): + def __init__(self, threshold: float = 0.3, device: str = None, *args, **kwargs): + + if not _bootleg_available: + # TODO(karan): add instructions to install bootleg + raise ImportError( + "Bootleg not available for import. Please install Bootleg." + ) + + super(Bootleg, self).__init__(threshold=threshold, *args, **kwargs) + + # Set the device + if not device: + device = "cuda" if cuda.is_available() else "cpu" + + # Fetch sources for Bootleg + self._fetch_sources() + + # Create the annotator + self.annotator = Annotator( + config_args=self._create_config(), + device=device, + cand_map=self.logdir / "entity_db/entity_mappings/alias2qids_wiki.json", + ) + self.annotator.set_threshold(threshold) + + @classmethod + def _fetch_sources(cls): + if not (cls.logdir / "bootleg_wiki").exists(): + print("bootleg_wiki not found. Downloading..") + urllib.request.urlretrieve( + "https://bootleg-emb.s3.amazonaws.com/models/2020_08_25/bootleg_wiki" + ".tar.gz", + filename=str(cls.logdir / "bootleg_wiki.tar.gz"), + reporthook=DownloadProgressBar(), + ) + + tar = tarfile.open(str(cls.logdir / "bootleg_wiki.tar.gz"), "r:gz") + tar.extractall() + tar.close() + + if not (cls.logdir / "emb_data").exists(): + print("emb_data not found. Downloading..") + urllib.request.urlretrieve( + "https://bootleg-emb.s3.amazonaws.com/emb_data.tar.gz", + filename=str(cls.logdir / "emb_data.tar.gz"), + reporthook=DownloadProgressBar(), + ) + + tar = tarfile.open(str(cls.logdir / "emb_data.tar.gz"), "r:gz") + tar.extractall() + tar.close() + + if not (cls.logdir / "entity_db").exists(): + print("entity_db not found. Downloading..") + urllib.request.urlretrieve( + "https://bootleg-emb.s3.amazonaws.com/entity_db.tar.gz", + filename=str(cls.logdir / "entity_db.tar.gz"), + reporthook=DownloadProgressBar(), + ) + + tar = tarfile.open(str(cls.logdir / "entity_db.tar.gz"), "r:gz") + tar.extractall() + tar.close() + + @classmethod + def _create_config(cls): + # load a config for Bootleg + config_args = get_full_config(cls.logdir / "bootleg_wiki/bootleg_config.json") + + # set the model checkpoint path + config_args.run_config.init_checkpoint = ( + cls.logdir / "bootleg_wiki/bootleg_model.pt" + ) + + # set the path for the entity db and candidate map + config_args.data_config.entity_dir = cls.logdir / "entity_db" + config_args.data_config.alias_cand_map = "alias2qids_wiki.json" + + # set the embedding paths + config_args.data_config.emb_dir = cls.logdir / "emb_data" + config_args.data_config.word_embedding.cache_dir = cls.logdir / "emb_data" + + return config_args + + @singlecolumn + def apply(self, batch: Dict[str, List], columns: List[str], **kwargs) -> List: + + # Use TextBlob to split the column into sentences + blobs = TextBlob.retrieve(batch=batch, columns=columns)[columns[0]] + + # Annotate each example + return [ + [ + ( + self.annotator.extract_mentions(str(text)), + self.annotator.label_mentions(str(text)), + ) + for text in blob.sentences + ] + for blob in blobs + ] diff --git a/robustnessgym/cachedops/similarity.py b/robustnessgym/cachedops/similarity.py new file mode 100644 index 00000000..8642ccd9 --- /dev/null +++ b/robustnessgym/cachedops/similarity.py @@ -0,0 +1,128 @@ +from typing import List, Sequence + +import cytoolz as tz +import numpy as np +from datasets import load_metric + +from robustnessgym.cachedops.spacy import Spacy +from robustnessgym.core.cachedops import CachedOperation +from robustnessgym.core.dataset import Batch, transpose_batch + + +class SentenceSimilarityMatrix(CachedOperation): + def __init__(self): + super(SentenceSimilarityMatrix, self).__init__() + + def similarity( + self, batch_sentences_1: List[List[str]], batch_sentences_2: List[List[str]] + ) -> List: + raise NotImplementedError("Must implement a similarity computation.") + + def apply(self, batch: Batch, columns: List[str], **kwargs): + assert len(columns) == 2, "Must specify exactly two columns." + + # Retrieve the sentences in the given columns + sentences = Spacy.retrieve( + batch=batch, + columns=[[col] for col in columns], + proc_fns=Spacy.sentences, + ) + + return self.similarity(*[sentences[col] for col in columns]) + + +class DocumentSimilarityScore(CachedOperation): + def __init__(self): + super(DocumentSimilarityScore, self).__init__() + self.metric = load_metric("rouge") + + def similarity(self, batch_doc_1: List[str], batch_doc_2: List[str]): + raise NotImplementedError("Must implement a similarity computation.") + + def apply(self, batch, columns, **kwargs): + assert len(columns) == 2 + return self.similarity(*[batch[col] for col in columns]) + + +class RougeScore(DocumentSimilarityScore): + def __init__(self): + super(RougeScore, self).__init__() + self.metric = load_metric("rouge") + + def similarity(self, batch_doc_1: List[str], batch_doc_2: List[str]): + # Compute the scores between every pair of documents + scores = self.metric.compute( + predictions=batch_doc_1, references=batch_doc_2, use_agregator=False + ) + + # Transpose the batch of scores + scores = [ + tz.valmap( + lambda v: { + m: getattr(v, m) for m in ["precision", "recall", "fmeasure"] + }, + example, + ) + for example in transpose_batch(scores) + ] + + return scores + + @classmethod + def select( + cls, decoded_batch: List, metric: Sequence[str] = ("rouge1", "fmeasure") + ): + if len(metric) == 1: + return [scores[metric[0]] for scores in decoded_batch] + elif len(metric) == 2: + return [scores[metric[0]][metric[1]] for scores in decoded_batch] + else: + raise ValueError(f"metric {metric} must be a sequence of length <= 2.") + + +class RougeMatrix(SentenceSimilarityMatrix): + def __init__(self): + super(RougeMatrix, self).__init__() + self.metric = load_metric("rouge") + + def similarity( + self, batch_sentences_1: List[List[str]], batch_sentences_2: List[List[str]] + ): + batch_similarity = [] + for sents_1, sents_2 in zip(batch_sentences_1, batch_sentences_2): + # Compute the scores between every pair of sentences + scores = self.metric.compute( + predictions=np.repeat(sents_1, len(sents_2)), + references=sents_2 * len(sents_1), + use_agregator=False, + ) + + # Organize all the scores into a similarity matrix for each metric + similarity_mat = { + k: { + m: np.array([getattr(e, m) for e in v]) + .reshape(len(sents_1), len(sents_2)) + .tolist() + for m in ["precision", "recall", "fmeasure"] + } + for k, v in scores.items() + } + + batch_similarity.append(similarity_mat) + + return batch_similarity + + @classmethod + def select( + cls, decoded_batch: List, metric: Sequence[str] = ("rouge1", "fmeasure") + ): + if len(metric) == 1: + return [ + tz.valmap(np.array, matrices[metric[0]]) for matrices in decoded_batch + ] + elif len(metric) == 2: + return [ + np.array(matrices[metric[0]][metric[1]]) for matrices in decoded_batch + ] + else: + raise ValueError(f"metric {metric} must be a sequence of length <= 2.") diff --git a/robustnessgym/cachedops/spacy.py b/robustnessgym/cachedops/spacy.py new file mode 100644 index 00000000..3e679709 --- /dev/null +++ b/robustnessgym/cachedops/spacy.py @@ -0,0 +1,215 @@ +"""Cachedop with Spacy.""" +import json +from typing import List + +import cytoolz as tz +import spacy +import torch +from spacy.tokens import Doc + +from robustnessgym.core.cachedops import SingleColumnCachedOperation +from robustnessgym.core.dataset import BatchOrDataset + + +class Spacy(SingleColumnCachedOperation): + """Class for running the Spacy pipeline using a CachedOperation.""" + + def __init__( + self, + lang: str = "en_core_web_sm", + nlp: spacy.language.Language = None, + neuralcoref: bool = False, + device: str = None, + *args, + **kwargs + ): + + # Set all the parameters + self.lang = lang + self.neuralcoref = neuralcoref + self._prebuilt = True + + # Set the device + self._on_gpu = False + if device and (device == "gpu" or device.startswith("cuda")): + spacy.prefer_gpu( + gpu_id=0 if ":" not in device else int(device.split(":")[1]) + ) + # Spacy sets the default torch float Tensor to torch.cuda.FloatTensor, + # which causes other GPU cachedops to crash. + torch.set_default_tensor_type("torch.FloatTensor") + self._on_gpu = True + + # Load up the Spacy module + self._nlp = nlp + if not nlp: + self._nlp = self._load_spacy(lang=lang) + self._prebuilt = False + + # Add neuralcoref + self._add_neuralcoref() + + if not nlp: + super(Spacy, self).__init__( + lang=lang, + neuralcoref=neuralcoref, + *args, + **kwargs, + ) + else: + super(Spacy, self).__init__( + lang=nlp.lang, + # No need to pass in neuralcoref separately, it's already in the + # pipeline if neuralcoref=True + pipeline=nlp.pipe_names, + *args, + **kwargs, + ) + print( + "Warning: Spacy.encode does not support arbitrary nlp pipelines so " + "information stored in the Doc object may be lost in encoding." + ) + + @staticmethod + def _load_spacy(lang: str = "en_core_web_sm"): + """Load the Spacy nlp pipeline.""" + return spacy.load(lang) + + def _add_neuralcoref(self): + """Add the neuralcoref pipeline to Spacy.""" + if self.neuralcoref: + try: + import neuralcoref as nc + + nc.add_to_pipe(self.nlp) + except ImportError: + print( + "Can't import neuralcoref. Please install neuralcoref using:\n" + "git clone https://github.com/huggingface/neuralcoref.git\n" + "cd neuralcoref\n" + "pip install -r requirements.txt\n" + "pip install -e ." + ) + + def __call__( + self, + batch_or_dataset: BatchOrDataset, + columns: List[str], + batch_size: int = 8192, + ) -> BatchOrDataset: + return super().__call__(batch_or_dataset, columns, batch_size) + + @property + def nlp(self): + """Return the nlp pipeline.""" + return self._nlp + + @classmethod + def encode(cls, obj: Doc) -> str: + """Encode the Doc object. + + Args: + obj: + + Returns: + """ + # JSON dump the Doc + doc_json = obj.to_json() + + if obj._.has("huggingface_neuralcoref"): + # Create a helper function that turns a Span into a dictionary + span_to_dict = lambda span: { + "start": span.start, + "end": span.end, + "text": span.text, + } + + # Create a helper function that converts a Cluster (output of + # neuralcoref) into a dictionary + cluster_to_dict = lambda cluster: { + "i": cluster.i, + "main": span_to_dict(cluster.main), + "mentions": [span_to_dict(span) for span in cluster.mentions], + } + + # Apply the helper functions to construct a dictionary for the + # neuralcoref information + neuralcoref_dict = { + "neuralcoref": [ + cluster_to_dict(cluster) for cluster in obj._.coref_clusters + ] + } + + # Combine the neuralcoref dictionary with the doc_json + doc_json = tz.merge(doc_json, neuralcoref_dict) + + # Convert the Spacy Doc to json before caching + return json.dumps(doc_json) + + def single_column_apply(self, column_batch: List, *args, **kwargs) -> List: + """Apply to a single column. + + Args: + column_batch: + *args: + **kwargs: + + Returns: + """ + if self._on_gpu: + # Adjust the default Tensor type: this is instantaneous + torch.set_default_tensor_type("torch.cuda.FloatTensor") + + # Apply Spacy's pipe method to process the examples + docs = list(self.nlp.pipe(column_batch)) + + if self._on_gpu: + # Reset the default Tensor type: this is instantaneous + torch.set_default_tensor_type("torch.FloatTensor") + + return docs + + @classmethod + def tokens(cls, decoded_batch: List) -> List[List[str]]: + """For each example, returns the list of tokens extracted by Spacy for + each key. + + Spacy stores the span of each token under the "tokens" key. This + function extracts the tokens from the text using the span of + each token. + """ + + token_batch = [] + # Iterate over each decoded Doc dictionary + for doc_dict in decoded_batch: + tokens = [] + for token_dict in doc_dict["tokens"]: + tokens.append(doc_dict["text"][token_dict["start"] : token_dict["end"]]) + + token_batch.append(tokens) + + return token_batch + + @classmethod + def entities(cls, decoded_batch: List) -> List[List[dict]]: + """For each example, returns the list of entity extracted by Spacy for + each column.""" + return [doc_dict["ents"] for doc_dict in decoded_batch] + + @classmethod + def sentences(cls, decoded_batch: List) -> List[List[str]]: + """For each example, returns the list of sentences extracted by Spacy + for each column.""" + return [ + [ + doc_dict["text"][sent["start"] : sent["end"]] + for sent in doc_dict["sents"] + ] + for doc_dict in decoded_batch + ] + + @classmethod + def num_tokens(cls, decoded_batch: List) -> List[int]: + """For each example, returns the length or the number of tokens + extracted by Spacy for each column.""" + return [len(doc_dict["tokens"]) for doc_dict in decoded_batch] diff --git a/robustnessgym/cachedops/stanza.py b/robustnessgym/cachedops/stanza.py new file mode 100644 index 00000000..3776e5a4 --- /dev/null +++ b/robustnessgym/cachedops/stanza.py @@ -0,0 +1,102 @@ +from typing import List + +from robustnessgym.core.cachedops import SingleColumnCachedOperation + +try: + import stanza +except ImportError: + _stanza_available = False +else: + _stanza_available = True + + +class Stanza(SingleColumnCachedOperation): + """Class for running the Stanza pipeline using a CachedOperation. + + URL: https://stanfordnlp.github.io/stanza/ + """ + + def __init__(self): + if not _stanza_available: + raise ImportError( + "Stanza not available for import. Install using " "\npip install stanza" + ) + super(Stanza, self).__init__() + + self._download() + self.nlp = stanza.Pipeline() + + def _download(self): + stanza.download() + + @classmethod + def encode(cls, obj: stanza.Document) -> str: + # Dump the Stanza Document to a string + return obj.to_serialized() + + @classmethod + def decode(cls, s: str): + # Load the Stanza Document from the string + return stanza.Document.from_serialized(s) + + def single_column_apply(self, column_batch: List, *args, **kwargs) -> List: + # Create a doc for each example + return [self.nlp(text) for text in column_batch] + + @classmethod + def _get_attribute( + cls, decoded_batch: List[stanza.Document], attribute: str + ) -> List: + """Get an arbitrary attribute using doc.get(attribute) from a list of + Stanza Documents.""" + return [doc.get(attribute) for doc in decoded_batch] + + @classmethod + def lemma(cls, decoded_batch: List[stanza.Document]) -> List[List[str]]: + """For each example, returns the list of lemmatized words.""" + return cls._get_attribute(decoded_batch, "lemma") + + @classmethod + def text(cls, decoded_batch: List[stanza.Document]) -> List[List[str]]: + """For each example, returns the list of tokens.""" + return cls._get_attribute(decoded_batch, "text") + + @classmethod + def upos(cls, decoded_batch: List[stanza.Document]) -> List[List[str]]: + """For each example, returns the list of upos.""" + return cls._get_attribute(decoded_batch, "upos") + + @classmethod + def xpos(cls, decoded_batch: List[stanza.Document]) -> List[List[str]]: + """For each example, returns the list of xpos.""" + return cls._get_attribute(decoded_batch, "xpos") + + @classmethod + def feats(cls, decoded_batch: List[stanza.Document]) -> List[List[str]]: + """For each example, returns the list of feats.""" + return cls._get_attribute(decoded_batch, "feats") + + @classmethod + def head(cls, decoded_batch: List[stanza.Document]) -> List[List[str]]: + """For each example, returns the list of head.""" + return cls._get_attribute(decoded_batch, "head") + + @classmethod + def deprel(cls, decoded_batch: List[stanza.Document]) -> List[List[str]]: + """For each example, returns the list of deprel.""" + return cls._get_attribute(decoded_batch, "deprel") + + @classmethod + def misc(cls, decoded_batch: List[stanza.Document]) -> List[List[str]]: + """For each example, returns the list of misc.""" + return cls._get_attribute(decoded_batch, "misc") + + @classmethod + def entities(cls, decoded_batch: List[stanza.Document]) -> List[List[str]]: + """For each example, returns the list of entities.""" + return [doc.entities for doc in decoded_batch] + + @classmethod + def id(cls, decoded_batch: List[stanza.Document]) -> List[List[str]]: + """For each example, returns the list of ids.""" + return cls._get_attribute(decoded_batch, "id") diff --git a/robustnessgym/cachedops/strip_text.py b/robustnessgym/cachedops/strip_text.py new file mode 100644 index 00000000..836519d9 --- /dev/null +++ b/robustnessgym/cachedops/strip_text.py @@ -0,0 +1,21 @@ +from typing import List + +from robustnessgym.core.cachedops import SingleColumnCachedOperation + + +class StripText(SingleColumnCachedOperation): + def __init__(self): + super(StripText, self).__init__() + + def single_column_apply(self, column_batch: List, *args, **kwargs) -> List: + # Clean up each text with a simple function and return the stripped text + return list( + map( + lambda text: text.lower() + .replace(".", "") + .replace("?", "") + .replace("!", "") + .replace(",", ""), + column_batch, + ) + ) diff --git a/robustnessgym/cachedops/textblob.py b/robustnessgym/cachedops/textblob.py new file mode 100644 index 00000000..3a80a05a --- /dev/null +++ b/robustnessgym/cachedops/textblob.py @@ -0,0 +1,45 @@ +from typing import Dict, List, Optional, Union + +from robustnessgym.core.cachedops import SingleColumnCachedOperation +from robustnessgym.core.identifier import Identifier + +try: + import textblob +except ImportError: + _textblob_available = False +else: + _textblob_available = True + + +class TextBlob(SingleColumnCachedOperation): + def __init__(self): + if not _textblob_available: + raise ImportError( + "TextBlob not available for import. Install using " + "\npip install textblob\npython -m textblob.download_corpora" + ) + # TODO(karan): requires running `python -m textblob.download_corpora` + super(TextBlob, self).__init__() + + @classmethod + def encode(cls, obj: textblob.TextBlob) -> str: + # Dump the TextBlob object to JSON + # This loses a lot of information + # Unfortunately, TextBlob provides no way to serialize/deserialize objects + return obj.to_json() + + @classmethod + def retrieve( + cls, + batch: Dict[str, List], + columns: List[str], + identifier: Union[str, Identifier] = None, + reapply: bool = False, + **kwargs + ) -> Optional[Dict[str, List]]: + # Default to reapplying the TextBlob op when retrieving + return super().retrieve(batch, columns, identifier, reapply=True, **kwargs) + + def single_column_apply(self, column_batch: List, *args, **kwargs) -> List: + # Create a TextBlob for each example + return [textblob.TextBlob(text) for text in column_batch] diff --git a/robustnessgym/core/__init__.py b/robustnessgym/core/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/robustnessgym/core/cachedops.py b/robustnessgym/core/cachedops.py new file mode 100644 index 00000000..b29b64f0 --- /dev/null +++ b/robustnessgym/core/cachedops.py @@ -0,0 +1,587 @@ +import pathlib +from functools import partial +from typing import Callable, Dict, List, Optional, Union + +from robustnessgym.core.constants import CACHEDOPS +from robustnessgym.core.dataset import Batch, BatchOrDataset, Dataset +from robustnessgym.core.decorators import singlecolumn +from robustnessgym.core.identifier import Identifier +from robustnessgym.core.operation import Operation +from robustnessgym.core.tools import ( + class_or_instancemethod, + persistent_hash, + recmerge, + strings_as_json, +) + + +class CachedOperation(Operation): + """Class to create CachedOperations.""" + + # Path to a log directory + logdir: pathlib.Path = pathlib.Path.home() / "robustnessgym/cachedops/" + + # Create a directory + logdir.mkdir(parents=True, exist_ok=True) + + def __init__( + self, apply_fn: Callable = None, identifier: Identifier = None, *args, **kwargs + ): + + super(CachedOperation, self).__init__( + apply_fn=apply_fn, + identifiers=[identifier] if identifier else None, + num_outputs=1, + *args, + **kwargs, + ) + + def __repr__(self): + """Representation of a cached operation object. + + Returns: string representation + """ + return str(self.identifier) + + @property + def identifier(self): + return self.identifiers[0] + + @staticmethod + def store(batch: Batch, updates: List[Dict]) -> Batch: + """Updates the cache of preprocessed information stored with each + example in a batch. + + Args: + batch: a batch of data + updates: a list of dictionaries, one per example + + Returns: updated batch + """ + if "cache" not in batch: + batch["cache"] = [{} for _ in range(len(batch["index"]))] + + # For each example, recursively merge the example's original cache dictionary + # with the update dictionary + batch["cache"] = [ + recmerge(cache_dict, update_dict) + for cache_dict, update_dict in zip(batch["cache"], updates) + ] + + return batch + + @class_or_instancemethod + def retrieve( + self_or_cls, + batch: Batch, + columns: Union[List[str], List[List[str]]], + proc_fns: Union[str, Callable, List[Union[str, Callable]]] = None, + identifier: Union[str, Identifier] = None, + reapply: bool = False, + **kwargs, + ) -> Optional[Union[Batch, List[Batch]]]: + """Retrieve information from the cache. + + Args: + batch: a batch of data + columns: list of columns to retrieve cached information for + proc_fns: list of processing functions to be executed left to right on + the cached data + identifier: name of the identifier to retrieve + reapply: whether to recompute the cached operation at retrieval + + Returns: dict mapping a column to a list of length len(batch) + """ + if not reapply: + # Nothing to return if there's no cache + if "cache" not in batch: + raise ValueError( + "`cache` key missing: nothing has been cached yet. " + "Are you sure you ran a CachedOperation?" + ) + + # Infer the most relevant key to retrieve if an identifier is not specified + if not identifier: + if isinstance(self_or_cls, type): + # cls + target_ident_key = self_or_cls.__name__ + else: + # self + target_ident_key = str(self_or_cls.identifier) + + # TODO(karan): iterate over all keys and pick the best match, + # rather than breaking + for ident_key in batch["cache"][0].keys(): + # Pick the first key that matches the cls name or instance + # identifier + if ident_key.startswith(target_ident_key): + identifier = ident_key + break + + # Still no identifier + if not identifier: + raise ValueError( + f"Retrieval failed: couldn't find a key called " + f"{target_ident_key} in cache." + ) + + try: + if isinstance(columns[0], str): + retrieval = { + strings_as_json(columns): [ + self_or_cls.decode( + cache[str(identifier)][strings_as_json(columns)] + ) + for cache in batch["cache"] + ] + } + else: + retrieval = { + strings_as_json(cols_): [ + self_or_cls.decode( + cache[str(identifier)][strings_as_json(cols_)] + ) + for cache in batch["cache"] + ] + for cols_ in columns + } + + except KeyError: + raise KeyError( + "Could not retrieve information for all columns. " + "If you're trying to retrieve information for multiple columns, " + "use columns=[[col_1], [col_2], ..] " + "instead of columns=[col_1, col_2, ..]." + ) + + # Check if the retrieved information needs to be processed + if not proc_fns: + return retrieval + + # Resolve the str proc_fns to callable(s) + if isinstance(proc_fns, str): + proc_fns = getattr(self_or_cls, proc_fns) + elif isinstance(proc_fns, List): + proc_fns = [ + proc_fn + if isinstance(proc_fn, Callable) + else getattr(self_or_cls, proc_fn) + for proc_fn in proc_fns + ] + + # Process and return the retrieved information + if isinstance(proc_fns, Callable): + return {k: proc_fns(v) for k, v in retrieval.items()} + + return [ + {k: proc_fn(v) for k, v in retrieval.items()} for proc_fn in proc_fns + ] + + else: + if proc_fns: + print("Warning: proc_fns has no effect when reapply=True.") + + # Run the operation on the fly + # TODO(karan): does this work for ops that require process_dataset + if isinstance(columns[0], str): + return { + strings_as_json(columns): self_or_cls(**kwargs).apply( + batch=batch, columns=columns + ) + if isinstance(self_or_cls, type) + else self_or_cls.apply(batch=batch, columns=columns) + } + return { + strings_as_json(cols_): self_or_cls(**kwargs).apply( + batch=batch, columns=cols_ + ) + if isinstance(self_or_cls, type) + else self_or_cls.apply(batch=batch, columns=cols_) + for cols_ in columns + } + + def get_cache_hash(self, columns: Optional[List[str]] = None): + """Construct a hash that will be used to identify the application of a + cached operation to the columns of a dataset.""" + + val = hash(self) + if columns: + for key in columns: + val ^= persistent_hash(key) + return val + + def get_cache_file_name(self, columns=None): + """Construct a file name for caching.""" + return "cache-" + str(abs(self.get_cache_hash(columns=columns))) + ".arrow" + + def prepare_batch(self, batch: Batch, columns: List[str]) -> Batch: + """Preparation that is applied before the CachedOperation. + + This is provided as a convenience function that can be called by + prepare_dataset. + + Args: + batch: batch of examples + columns: list of columns + + Returns: updated batch + """ + return batch + + def prepare_dataset( + self, dataset: Dataset, columns: List[str], batch_size: int = 32 + ) -> Dataset: + """Preparation that is applied before the CachedOperation. + + Many CachedOperations require a full pass over the dataset to precompute some + variables before the core operation can actually be applied e.g. to create a + Bag-of-Words representation, constructing a dataset vocabulary to keep only + tokens that are frequently seen across the dataset. + + Args: + dataset: Dataset + columns: list of columns + batch_size: batch size for .map(..) + + Returns: updated Dataset + """ + + # Apply preparation to the dataset + # TODO(karan): this is similar to the try except block for slicebuilders, + # refactor + try: + return dataset.map( + partial(self.prepare_batch, columns=columns), + batched=True, + batch_size=batch_size, + # The cache file name is a XOR of the interaction history and the + # current operation + # FIXME(karan): this is repeated + cache_file_name=str( + dataset.logdir + / ( + "cache-" + + str( + abs( + persistent_hash(str(dataset.identifier)) + ^ dataset.hash_interactions() + ^ persistent_hash( + str(self.identifier) + str(strings_as_json(columns)) + ) + ) + ) + + "-prep.arrow" + ) + ), + ) + except: # TypeError or PicklingError or AttributeError: # noqa + # Batch the dataset, and process each batch + all_batches = [ + self.prepare_batch( + batch=batch, + columns=columns, + ) + for batch in dataset.batch(batch_size) + ] + + return dataset.map( + lambda examples, indices: all_batches[indices[0] // batch_size], + batched=True, + batch_size=batch_size, + with_indices=True, + load_from_cache_file=False, + # The cache file name is a XOR of the interaction history and the + # current operation + # FIXME(karan): this is repeated + cache_file_name=str( + dataset.logdir + / ( + "cache-" + + str( + abs( + persistent_hash(str(dataset.identifier)) + ^ dataset.hash_interactions() + ^ persistent_hash( + str(self.identifier) + str(strings_as_json(columns)) + ) + ) + ) + + "-prep.arrow" + ) + ), + ) + + def apply(self, batch: Batch, columns: List[str], *args, **kwargs) -> List: + """Implements the core functionality of the cached operation.""" + pass + + def process_batch(self, batch: Batch, columns: List[str]) -> Batch: + """Apply the cached operation to a batch.""" + assert ( + len(set(columns) - set(batch.keys())) == 0 + ), "Any column in 'columns' must be present in 'batch'." + + # Run the cached operation, and encode outputs (defaults to json.dumps) + encoded_outputs = [ + self.encode(example_output) + for example_output in self.apply(batch=batch, columns=columns) + ] + + # Construct updates + updates = self.construct_updates( + encoded_outputs=encoded_outputs, columns=columns + ) + + # Update the cache and return the updated batch + return self.store(batch=batch, updates=updates) + + def process_dataset( + self, dataset: Dataset, columns: List[str], batch_size: int = 32 + ) -> Dataset: + """Apply the cached operation to a dataset.""" + + # Prepare to apply the CachedOperation to the dataset + dataset = self.prepare_dataset( + dataset=dataset, + columns=columns, + batch_size=batch_size, + ) + + try: + return dataset.map( + partial(self.process_batch, columns=columns), + batched=True, + batch_size=batch_size, + # The cache file name is a XOR of the interaction history and the + # current operation + cache_file_name=str( + dataset.logdir + / ( + "cache-" + + str( + abs( + persistent_hash(str(dataset.identifier)) + ^ dataset.hash_interactions() + ^ persistent_hash( + str(self.identifier) + str(strings_as_json(columns)) + ) + ) + ) + + ".arrow" + ) + ), + # self.get_cache_file_name(columns=columns), + ) + except: # noqa + # Batch the dataset, and process each batch + all_batches = [ + self.process_batch( + batch=batch, + columns=columns, + ) + for batch in dataset.batch(batch_size) + ] + + return dataset.map( + lambda examples, indices: all_batches[indices[0] // batch_size], + batched=True, + batch_size=batch_size, + with_indices=True, + load_from_cache_file=False, + # The cache file name is a XOR of the interaction history and the + # current operation + cache_file_name=str( + dataset.logdir + / ( + "cache-" + + str( + abs( + persistent_hash(str(dataset.identifier)) + ^ dataset.hash_interactions() + ^ persistent_hash( + str(self.identifier) + str(strings_as_json(columns)) + ) + ) + ) + + ".arrow" + ) + ), + ) + + def construct_updates(self, encoded_outputs: List[str], columns: List[str]): + return [ + {str(self.identifier): {strings_as_json(columns): val}} + for val in encoded_outputs + ] + + @classmethod + def available(cls, batch: Batch): + # Check if the cached operation is available to retrieve in the batch + if "cache" not in batch: + return False + return any([key.startswith(cls.__name__) for key in batch["cache"][0].keys()]) + + def __call__( + self, batch_or_dataset: BatchOrDataset, columns: List[str], batch_size: int = 32 + ) -> BatchOrDataset: + + if isinstance(batch_or_dataset, Dataset): + + # Check the InteractionTape to see if the CachedOperation was applied + if batch_or_dataset.check_tape( + path=[CACHEDOPS], + identifiers=self.identifier, + columns=columns, + ): + return batch_or_dataset + + # Apply the CachedOperation to the dataset + dataset = self.process_dataset( + dataset=batch_or_dataset, + columns=columns, + batch_size=batch_size, + ) + + # Update the InteractionTape with the applied CachedOperation + dataset.update_tape( + path=[CACHEDOPS], + identifiers=self.identifier, + columns=columns, + ) + + return dataset + + elif isinstance(batch_or_dataset, Dict): + + # Apply the CachedOperation + return self.process_batch(batch=batch_or_dataset, columns=columns) + else: + raise NotImplementedError + + +class SingleColumnCachedOperation(CachedOperation): + def __call__( + self, batch_or_dataset: BatchOrDataset, columns: List[str], batch_size: int = 32 + ) -> BatchOrDataset: + """Apply independently to each column. + + Args: + batch_or_dataset: + columns: + + Returns: + """ + # Iterate over the columns and apply + for column in columns: + batch_or_dataset = super(SingleColumnCachedOperation, self).__call__( + batch_or_dataset=batch_or_dataset, + columns=[column], + batch_size=batch_size, + ) + + return batch_or_dataset + + @singlecolumn + def apply(self, batch: Batch, columns: List[str], *args, **kwargs) -> List: + return self.single_column_apply(batch[columns[0]]) + + def single_column_apply(self, column_batch: List, **kwargs) -> List: + raise NotImplementedError("Must implement single_column_apply.") + + +class ScoreOperation(CachedOperation): + def apply( + self, batch: Batch, columns: List[str], *args, **kwargs + ) -> List[Union[int, float]]: + return super().apply(batch, columns, *args, **kwargs) + + +def stow( + dataset: Dataset, + cached_ops: Dict[CachedOperation, List[List[str]]], + batch_size: int = 32, + load_from_cache_file: bool = True, +): + """Apply a list of cached operations in sequence.""" + + # Check the InteractionTape to remove CachedOperations that have already been stowed + for cached_op, list_of_columns in list(cached_ops.items()): + indices_to_remove = [] + for i, columns in enumerate(list(list_of_columns)): + if dataset.check_tape( + path=[CACHEDOPS], + identifiers=cached_op.identifier, + columns=columns, + ): + # Remove the columns at index i + indices_to_remove.append(i) + + # Remove the columns that are already cached + for index in sorted(indices_to_remove, reverse=True): + columns = cached_ops[cached_op].pop(index) + print(f"skipped: {cached_op.identifier} -> {columns}", flush=True) + + # Check if list_of_columns is now empty + if not cached_ops[cached_op]: + # Remove the op entirely + cached_ops.pop(cached_op) + + for cached_op, list_of_columns in cached_ops.items(): + for columns in list_of_columns: + dataset = cached_op(dataset, columns=columns, batch_size=batch_size) + + # def _map_fn(batch: Batch): + # """ + # Consolidate the application of the CachedOperations passed to stow into a + # single mappable function. + # """ + # for cached_op, list_of_columns in cached_ops.items(): + # for columns in list_of_columns: + # batch = cached_op(batch, columns=columns) + # + # return batch + # + # # Compute the hash value + # val = 0 + # for cached_op, list_of_columns in cached_ops.items(): + # for columns in list_of_columns: + # val ^= cached_op.get_cache_hash(columns=columns) + # + # # Combine with the hash for the dataset on which the cached ops are applied + # val ^= persistent_hash( + # # TODO(karan): move this to Dataset + # "-".join( + # "-".join(str(k) + "-" + str(v) for k, v in f.items()) for f in + # dataset._data_files + # ) + # ) + # + # # Map the cached operations over the dataset + # try: + # dataset = dataset.map( + # _map_fn, + # batched=True, + # batch_size=32, + # cache_file_name='cache-' + str(abs(val)) + '.arrow', + # load_from_cache_file=load_from_cache_file + # ) + # except TypeError: + # # Batch the dataset, and process each batch + # all_batches = [_map_fn(batch=batch) for batch in dataset.batch(batch_size)] + # + # # Update the dataset efficiently by reusing all_batches + # dataset = dataset.map( + # lambda examples, indices: all_batches[indices[0] // batch_size], + # batched=True, + # batch_size=batch_size, + # with_indices=True, + # ) + + # Update the Dataset history + for cached_op, list_of_columns in cached_ops.items(): + for columns in list_of_columns: + dataset.update_tape( + path=[CACHEDOPS], identifiers=cached_op.identifier, columns=columns + ) + + return dataset diff --git a/robustnessgym/core/constants.py b/robustnessgym/core/constants.py new file mode 100644 index 00000000..0fd0b31d --- /dev/null +++ b/robustnessgym/core/constants.py @@ -0,0 +1,9 @@ +CACHEDOPS = "cachedoperations" +SLICEBUILDERS = "slicebuilders" + +GENERIC = "slicebuilder" +SUBPOPULATION = "subpopulation" +ATTACK = "attack" +AUGMENTATION = "augmentation" +TRANSFORMATION = "transformation" +CURATION = "curated" diff --git a/robustnessgym/core/dataset.py b/robustnessgym/core/dataset.py new file mode 100644 index 00000000..1e78c1bf --- /dev/null +++ b/robustnessgym/core/dataset.py @@ -0,0 +1,617 @@ +from __future__ import annotations + +import json +import os +import pathlib +import pickle +from copy import deepcopy +from typing import Callable, Dict, List, Optional, Sequence, Union + +import cytoolz as tz +import datasets +from datasets import Features +from datasets.arrow_writer import ArrowWriter +from pyarrow import json as jsonarrow +from pyarrow import table +from tqdm import tqdm + +from robustnessgym.core.constants import ( + ATTACK, + CACHEDOPS, + SLICEBUILDERS, + SUBPOPULATION, + TRANSFORMATION, +) +from robustnessgym.core.identifier import Identifier +from robustnessgym.core.tools import persistent_hash, strings_as_json + + +class InteractionTape: + def __init__(self): + # Keep track of the history + self.history = {} + + def __repr__(self): + return f"{self.__class__.__name__}(interactions={len(self.history)})" + + def __hash__(self): + val = 0 + for (identifier, json_columns) in self.history: + val ^= persistent_hash(str(identifier) + str(json_columns)) + return val + + def dumps(self): + return json.dumps( + { + json.dumps((identifier.dumps(), json_columns)): idx + for (identifier, json_columns), idx in self.history.items() + } + ) + + @classmethod + def loads(cls, s: str): + tape = InteractionTape() + history = json.loads(s) + history = { + tuple(json.loads(json_tuple)): idx for json_tuple, idx in history.items() + } + tape.history = { + (Identifier.loads(identifier), json_columns): idx + for (identifier, json_columns), idx in history.items() + } + + return tape + + def update(self, identifier: Union[str, Identifier], columns: List[str]) -> None: + """Update the interaction tape with information about an interaction. + + Args: + identifier: Identifier for the interaction used. + columns: list of columns on which the interaction was applied. + + Returns: True if the interaction was added to the tape, False if it was + already applied before. + """ + if isinstance(identifier, str): + identifier = Identifier(_name=identifier) + elif isinstance(identifier, Identifier): + pass + else: + raise ValueError( + f"Parameter `identifier` should be an instance of class Identifier " + f"or str, " + f"not {type(identifier)}." + ) + + # Dump the column names to JSON + json_columns = strings_as_json(strings=columns) + + # Check if the entry is not in the history + if (identifier, json_columns) not in self.history: + # Give it the next index + self.history[(identifier, json_columns)] = len(self.history) + + def check(self, identifier: Union[str, Identifier], columns: List[str]) -> bool: + """ + + Args: + identifier: + columns: + + Returns: + + """ + if not (isinstance(identifier, str) or isinstance(identifier, Identifier)): + raise ValueError( + f"Parameter `identifier` should be an instance of class Identifier " + f"or str, " + f"not {type(identifier)}." + ) + + # Dump the column names to JSON + json_columns = strings_as_json(strings=columns) + + # Check if the entry is already in the history + if (identifier, json_columns) in self.history: + return True + return False + + +class InteractionTapeHierarchyMixin: + def __init__(self): + self.interactions = { + CACHEDOPS: InteractionTape(), + SLICEBUILDERS: { + SUBPOPULATION: InteractionTape(), + TRANSFORMATION: InteractionTape(), + ATTACK: InteractionTape(), + }, + } + + def hash_interactions(self): + v = 0 + for path in [ + [CACHEDOPS], + [SLICEBUILDERS, SUBPOPULATION], + [SLICEBUILDERS, TRANSFORMATION], + [SLICEBUILDERS, ATTACK], + ]: + v ^= self.fetch_tape(path=path).__hash__() + return v + + def dumps_interactions(self): + return json.dumps( + { + CACHEDOPS: self.interactions[CACHEDOPS].dumps(), + SLICEBUILDERS: { + SUBPOPULATION: self.interactions[SLICEBUILDERS][ + SUBPOPULATION + ].dumps(), + TRANSFORMATION: self.interactions[SLICEBUILDERS][ + TRANSFORMATION + ].dumps(), + ATTACK: self.interactions[SLICEBUILDERS][ATTACK].dumps(), + }, + } + ) + + @classmethod + def loads_interactions(cls, s: str) -> InteractionTapeHierarchyMixin: + tape_hierarchy = InteractionTapeHierarchyMixin() + interactions = json.loads(s) + tape_hierarchy.interactions = { + CACHEDOPS: InteractionTape.loads(interactions[CACHEDOPS]), + SLICEBUILDERS: { + SUBPOPULATION: InteractionTape.loads( + interactions[SLICEBUILDERS][SUBPOPULATION] + ), + TRANSFORMATION: InteractionTape.loads( + interactions[SLICEBUILDERS][TRANSFORMATION] + ), + ATTACK: InteractionTape.loads(interactions[SLICEBUILDERS][ATTACK]), + }, + } + return tape_hierarchy + + def update_tape( + self, + path: List[str], + identifiers: Union[Identifier, List[Identifier]], + columns: List[str], + ): + """Update the tape. + + Args: + path: Location of the InteractionTape in the hierarchy. + identifiers: + columns: + + Returns: + """ + # Fetch the tape + tape = self.fetch_tape(path=path) + + # Update it + if isinstance(identifiers, Identifier) or isinstance(identifiers, str): + return tape.update(identifier=identifiers, columns=columns) + else: + return [ + tape.update(identifier=identifier, columns=columns) + for identifier in identifiers + ] + + def check_tape( + self, + path: List[str], + identifiers: Union[Identifier, List[Identifier]], + columns: List[str], + ): + """Check the tape. + + Args: + + path: + identifiers: + columns: + + Returns: + """ + # Fetch the tape + tape = self.fetch_tape(path=path) + + # Check it + if isinstance(identifiers, Identifier) or isinstance(identifiers, str): + return tape.check(identifier=identifiers, columns=columns) + else: + return [ + tape.check(identifier=identifier, columns=columns) + for identifier in identifiers + ] + + def fetch_tape(self, path: List[str]) -> InteractionTape: + """Fetch an InteractionTape. + + Args: + path: + + Returns: + """ + return tz.get_in(path, self.interactions) + + +Batch = Dict[str, List] +BatchOrDataset = Union[Batch, "Dataset"] + + +class Dataset(datasets.Dataset, InteractionTapeHierarchyMixin): + # Path to a log directory + logdir: pathlib.Path = pathlib.Path.home() / "robustnessgym/datasets/" + + # Create a directory + logdir.mkdir(parents=True, exist_ok=True) + + def __init__(self, *args, identifier: Identifier = None, **kwargs): + + if len(args) == 1 and isinstance(args[0], datasets.Dataset): + # Create a Dataset directly from an datasets.Dataset object + self.__dict__ = args[0].__dict__.copy() + else: + super(Dataset, self).__init__(*args, **kwargs) + + # Call the superclass constructor + InteractionTapeHierarchyMixin.__init__(self) + + self.identifier = ( + Identifier( + _name=self.info.builder_name, + split=str(self.split), + version=self.version, + ) + if not identifier + else identifier + ) + + # Keep track of the original dataset keys + self.original_columns = list(self.features.keys()) + + # Add an index to the dataset + dataset = self.map(self.add_index, with_indices=True) + self.__dict__.update(dataset.__dict__) + + # TODO(karan): fix the identifier settings for Dataset + if self.identifier is not None and not str(self.identifier).startswith("None"): + self.logdir /= str(self.identifier) + self.logdir.mkdir(parents=True, exist_ok=True) + + @staticmethod + def add_index(example, index): + if "index" not in example: + example["index"] = str(index) + return example + + def __repr__(self): + return ( + f"RobustnessGym{self.__class__.__name__}(num_rows: {self.num_rows}, " + f"interactions: {self.interactions})" + ) + + @classmethod + def uncached_batch(cls, batch: Batch, copy=True) -> Batch: + """Return batch with the "cache" and "slices" columns removed.""" + return tz.keyfilter( + lambda k: k not in ["cache", "slices"], deepcopy(batch) if copy else batch + ) + + @classmethod + def uncached_example(cls, example: Dict, copy=True) -> Dict: + """Return example with the "cache" and "slices" columns removed.""" + return tz.keyfilter( + lambda k: k not in ["cache", "slices"], + deepcopy(example) if copy else example, + ) + + @classmethod + def from_huggingface(cls, dataset: datasets.Dataset): + """Create a Dataset from a Huggingface datasets.Dataset.""" + return cls(dataset.info.builder_name, dataset) + + @classmethod + def list_datasets(cls) -> List[str]: + """List datasets on Huggingface. + + Returns: list of datasets + """ + return datasets.list_datasets() + + @classmethod + def load_dataset(cls, *args, **kwargs): + """Create a Dataset from any Huggingface nlp dataset source. + + Use this instead of datasets.load_dataset, so that + + dict_of_datasets = datasets.load_dataset('boolq') + + becomes + + dict_of_datasets = Dataset.load_dataset('boolq') + """ + # Load the dataset + dataset = datasets.load_dataset(*args, **kwargs) + + if isinstance(dataset, dict): + return dict( + map( + lambda t: ( + t[0], + cls( + t[1], + identifier=Identifier( + _name=t[1].info.builder_name, + split=str(t[1].split), + version=t[1].version, + ), + ), + ), + dataset.items(), + ) + ) + else: + return cls( + dataset, + identifier=Identifier( + _name=dataset.info.builder_name, + split=str(dataset.split), + version=dataset.version, + ), + ) + + @classmethod + def from_json(cls, json_path: str, identifier: Identifier) -> Dataset: + """Load a dataset from a JSON file on disk, where each line of the json + file consists of a single example.""" + return cls( + jsonarrow.read_json(json_path), + identifier=identifier, + ) + + @classmethod + def from_slice(cls): + pass + + @classmethod + def from_batch(cls, batch: Batch, identifier: Identifier) -> Dataset: + """Convert a batch to a Dataset. + + TODO(karan): disable preprocessing in this case + """ + return cls(table(batch), identifier=identifier) + + @classmethod + def from_batches( + cls, batches: Sequence[Batch], identifier: Identifier = None + ) -> Dataset: + """Convert a list of batches to a dataset.""" + return cls.from_batch( + tz.merge_with(tz.concat, *batches), + identifier=identifier, + ) + + def batch(self, batch_size: int = 32): + """Batch the dataset. + + Args: + batch_size: integer batch size + + Returns: + """ + for i in range(0, len(self), batch_size): + yield self[i : i + batch_size] + + def map( + self, + function: Optional[Callable] = None, + with_indices: bool = False, + input_columns: Optional[Union[str, List[str]]] = None, + batched: bool = False, + batch_size: Optional[int] = 1000, + drop_last_batch: bool = False, + remove_columns: Optional[List[str]] = None, + keep_in_memory: bool = False, + load_from_cache_file: bool = True, + cache_file_name: Optional[str] = None, + writer_batch_size: Optional[int] = 1000, + features: Optional[Features] = None, + disable_nullable: bool = False, + fn_kwargs: Optional[dict] = None, + num_proc: Optional[int] = None, + suffix_template: str = "_{rank:05d}_of_{num_proc:05d}", + new_fingerprint: Optional[str] = None, + **kwargs, + ) -> Dataset: + """Wrap map.""" + + # Compute the map using datasets.Dataset's .map() + output = datasets.Dataset.map( + self, + function, + with_indices, + input_columns, + batched, + batch_size, + drop_last_batch, + remove_columns, + keep_in_memory, + load_from_cache_file, + cache_file_name, + writer_batch_size, + features, + disable_nullable, + fn_kwargs, + num_proc, + suffix_template, + new_fingerprint, + ) + + if isinstance(output, datasets.Dataset): + dataset = deepcopy(self) + dataset.__dict__ = tz.merge(dataset.__dict__, output.__dict__) + return dataset + else: + return output + + @classmethod + def load(cls, path: str) -> Optional[Dataset]: + try: + with open(os.path.join(path, "split.p"), "rb") as f: + return cls.from_file( + filename=os.path.join(path, "data.arrow"), + info=datasets.DatasetInfo.from_directory(path), + split=pickle.load(f), + ) + except: # noqa + return None + + # def save_to_disk(self, dataset_path: str): + # return super(Dataset, self).save_to_disk(dataset_path) + + def save(self, path: str) -> None: + # Make all the directories to the path + os.makedirs(path, exist_ok=True) + + # Taken from Huggingface datasets.Dataset + # Prepare output buffer and batched writer in memory or on file if we update + # the table + writer = ArrowWriter( + features=self.features, + path=os.path.join(path, "data.arrow"), + writer_batch_size=1000, + ) + + # Loop over single examples or batches and write to buffer/file if examples + # are to be updated + for i, example in tqdm(enumerate(self)): + writer.write(example) + + writer.finalize() + + # Write DatasetInfo + self.info.write_to_directory(path) + + # Write split to file + with open(os.path.join(path, "split.p"), "wb") as f: + pickle.dump(self.split, f) + + @classmethod + def from_tfds(cls): + # TODO(karan): v1 of robustness gym. Use it for image-based tasks, like clevr. + pass + + @classmethod + def interleave(cls, datasets: List[Dataset], identifier: Identifier) -> Dataset: + """Interleave a list of datasets.""" + return cls.from_batch( + tz.merge_with(tz.interleave, *[dataset[:] for dataset in datasets]), + identifier=identifier, + ) + + @classmethod + def chain(cls, datasets: List[Dataset], identifier: Identifier) -> Dataset: + """Chain a list of datasets.""" + return cls.from_batch( + tz.merge_with(tz.concat, *[dataset[:] for dataset in datasets]), + identifier=identifier, + ) + + def __getstate__(self): + state = super(Dataset, self).__getstate__() + if "interactions" in state and not isinstance(state["interactions"], str): + state["interactions"] = self.dumps_interactions() + if "identifier" in state and isinstance(state["identifier"], Identifier): + state["identifier"] = state["identifier"].dumps() + if "_identifier" in state and isinstance(state["_identifier"], Identifier): + state["_identifier"] = state["_identifier"].dumps() + if "lineage" in state: + state["lineage"] = [ + tuple(t[:1]) + (t[1].dumps(),) + (tuple(t[2:]) if len(t) > 2 else ()) + for t in state["lineage"] + ] + if "logdir" in state: + state["logdir"] = "" + return state + + def __setstate__(self, state): + state = dict(state) + if "interactions" in state and isinstance(state["interactions"], str): + state["interactions"] = self.loads_interactions( + state["interactions"] + ).interactions + if "identifier" in state and isinstance(state["identifier"], str): + state["identifier"] = Identifier.loads(state["identifier"]) + if "_identifier" in state: + try: + state["_identifier"] = Identifier.loads(state["_identifier"]) + except: # noqa + pass + if "lineage" in state: + try: + state["lineage"] = [ + tuple(t[:1]) + + (Identifier.loads(t[1]),) + + (tuple(t[2:]) if len(t) > 2 else ()) + for t in state["lineage"] + ] + except: # noqa + pass + if "logdir" in state: + try: + state["logdir"] = ( + pathlib.Path.home() + / f"robustnessgym/datasets/{str(state['identifier'])}" + ) + except: # noqa + state["logdir"] = ( + pathlib.Path.home() + / f"robustnessgym/datasets/{str(state['_identifier'])}" + ) + super(Dataset, self).__setstate__(state) + + @classmethod + def load_from_disk(cls, dataset_path: str) -> Dataset: + """Load the dataset from a dataset directory. + + Args: + dataset_path (``str``): path of the dataset directory where the dataset + will be loaded from + """ + with open(os.path.join(dataset_path, "state.json"), "r") as state_file: + state = json.load(state_file) + with open( + os.path.join(dataset_path, "dataset_info.json"), "r" + ) as dataset_info_file: + dataset_info = json.load(dataset_info_file) + state["_info"] = json.dumps(dataset_info) + dataset = cls.from_dict({}) + state = { + k: state[k] for k in dataset.__dict__.keys() + } # in case we add new fields + # Change path to absolute path + for data_file in state.get("_data_files", []) + state.get( + "_indices_data_files", [] + ): + data_file["filename"] = os.path.join(dataset_path, data_file["filename"]) + dataset.__setstate__(state) + dataset.logdir = ( + pathlib.Path.home() / f"robustnessgym/datasets/{str(dataset.identifier)}" + ) + return dataset + + +def transpose_batch(batch: Batch): + """Transpose a batch of data from a dict of lists to a list of dicts. + + Args: + batch: batch of data which is a dictionary mapping columns to lists + + Returns: list of dicts, each dict corresponding to a single example + """ + return [dict(zip(batch, t)) for t in zip(*batch.values())] diff --git a/robustnessgym/core/dataset_to_task.py b/robustnessgym/core/dataset_to_task.py new file mode 100644 index 00000000..423ab74e --- /dev/null +++ b/robustnessgym/core/dataset_to_task.py @@ -0,0 +1,9 @@ +from robustnessgym.tasks.task import QuestionAnswering, TernaryNaturalLanguageInference + +dataset_to_task = { + # Natural Language Inference + # Ternary + "snli": TernaryNaturalLanguageInference, + # Question Answering + "squad": QuestionAnswering, +} diff --git a/robustnessgym/core/decorators.py b/robustnessgym/core/decorators.py new file mode 100644 index 00000000..a0157cda --- /dev/null +++ b/robustnessgym/core/decorators.py @@ -0,0 +1,78 @@ +from copy import deepcopy +from typing import Callable, Dict, List + + +def singlecolumn(func: Callable): + """Assert that func is called with a single column. + + Mainly used with .apply(..) methods for CachedOperation and SliceBuilder. + + Args: + func: function to wrap + + Returns: decorated function + """ + + def _singlecolumn( + self, batch: Dict[str, List], columns: List[str], *args, **kwargs + ): + assert len(columns) == 1, "Must pass in a single column." + return func(self, batch, columns, *args, **kwargs) + + return _singlecolumn + + +def prerequisites(*args): + """ + Decorator to add a prerequisites attribute to any class. + Args: + *args: list of prerequisites + + Returns: a decorator + """ + + def _decorator(cls): + _old_init = deepcopy(cls.__init__) + + def _new_init(self, *_args, **kwargs): + _old_init(self, *_args, **kwargs) + self.prerequisites = set(args) + + cls.__init__ = _new_init + + return cls + + return _decorator + + +def function_register(): + registry = {} + + def registrar(func): + # Register the function + registry[func.__name__] = func + + # Mark the fact that the function is decorated + func.decorator = registrar + + return func + + registrar.all = registry + return registrar + + +# Create processors that keep track of batch and dataset operations +batch_processing = function_register() +dataset_processing = function_register() + + +def methods_with_decorator(cls, decorator): + """Returns all methods in cls with decorator as the outermost decorator. + + Credit: https://stackoverflow.com/questions/5910703/how-to-get-all-methods-of-a + -python-class-with-given-decorator + """ + for maybe_decorated in cls.__dict__.values(): + if hasattr(maybe_decorated, "decorator"): + if maybe_decorated.decorator == decorator: + yield maybe_decorated diff --git a/robustnessgym/core/identifier.py b/robustnessgym/core/identifier.py new file mode 100644 index 00000000..9be3f702 --- /dev/null +++ b/robustnessgym/core/identifier.py @@ -0,0 +1,69 @@ +"""Identifiers for objects in Robustness Gym.""" +from __future__ import annotations + +import json +from typing import Callable, List, Union + +from robustnessgym.core.tools import persistent_hash + + +class Identifier: + """Class for creating identifiers for objects in Robustness Gym.""" + + def __init__(self, _name: str, _index: Union[str, int] = None, **kwargs): + + self._name = _name + self._index = str(_index) if _index is not None else None + self._parameters = kwargs + + for param, value in self.parameters.items(): + if isinstance(value, Callable): + self.parameters[param] = ".".join( + [str(value.__module__), str(value.__name__)] + ) + else: + self.parameters[param] = str(value) + + @property + def name(self): + return self._name + + @property + def index(self): + return self._index + + @property + def parameters(self): + return self._parameters + + @classmethod + def range(cls, n: int, _name: str, **kwargs) -> List[Identifier]: + + if n > 1: + return [cls(_name=_name, _index=i, **kwargs) for i in range(1, n + 1)] + return [cls(_name=_name, **kwargs)] + + def __repr__(self): + params = ", ".join([f"{k}={v}" for k, v in self.parameters.items()]) + if self.index is not None: + return ( + f"{self.name}-{self.index}({params})" + if len(params) > 0 + else f"{self.name}-{self.index}" + ) + return f"{self.name}({params})" if len(params) > 0 else f"{self.name}" + + def __hash__(self): + return persistent_hash(str(self)) + + def __eq__(self, other): + return str(self) == str(other) + + def dumps(self): + return json.dumps(self.__dict__) + + @classmethod + def loads(cls, s: str): + identifier = Identifier(_name="") + identifier.__dict__ = json.loads(s) + return identifier diff --git a/robustnessgym/core/metrics.py b/robustnessgym/core/metrics.py new file mode 100644 index 00000000..b82c4ef5 --- /dev/null +++ b/robustnessgym/core/metrics.py @@ -0,0 +1,99 @@ +import re +import statistics +from typing import Sequence, Union + +import nltk +import pytorch_lightning.metrics.functional as lightning_metrics +import torch +from rouge_score import rouge_scorer + + +# TODO Refactor into separate class for each metric +# TODO change signature of compute_metric +def compute_metric( + metric: str, + predictions: Union[Sequence, torch.Tensor], + labels: Union[Sequence, torch.Tensor], + num_classes: int, +): + """Compute metric given predictions and target labels + Args: + metric: name of metric + predictions: A sequence of predictions (rouge metrics) or a torch Tensor + (other metrics) containing predictions + labels: A sequence of labels (rouge metrics) or a torch Tensor (other metrics) + containing target labels + num_classes: number of classes + """ + + if metric == "accuracy": + # Calculate the accuracy + if not isinstance(predictions, torch.Tensor): + predictions = torch.Tensor(predictions) + if not isinstance(labels, torch.Tensor): + labels = torch.Tensor(labels) + score = lightning_metrics.accuracy( + pred=predictions, + target=labels, + num_classes=num_classes, + ).item() + elif metric == "f1": + # Calculate the f1 + if not isinstance(predictions, torch.Tensor): + predictions = torch.Tensor(predictions) + if not isinstance(labels, torch.Tensor): + labels = torch.Tensor(labels) + score = lightning_metrics.f1_score( + pred=predictions, + target=labels, + num_classes=num_classes, + ).item() + elif metric in ("Rouge-1", "Rouge-2", "Rouge-L"): + # Calculate rouge scores + if metric == "Rouge-1": + metric_id = "rouge1" + elif metric == "Rouge-2": + metric_id = "rouge2" + else: + metric_id = "rougeLsum" + scorer = rouge_scorer.RougeScorer([metric_id], use_stemmer=True) + # TODO Remove summarizaton-specific 'format_summary' call + # TODO Don't call scorer.score separately for each metric + score = statistics.mean( + scorer.score(format_summary(reference), format_summary(pred))[ + metric + ].fmeasure + for reference, pred in zip(labels, predictions) + ) + + elif metric == "class_dist": + # Calculate class distribution + if not isinstance(labels, torch.Tensor): + labels = torch.Tensor(labels) + score = ( + lightning_metrics.to_onehot(tensor=labels, num_classes=num_classes) + .double() + .mean(dim=0) + .tolist() + ) + + elif metric == "pred_dist": + # Calculate predicted class distribution + if not isinstance(predictions, torch.Tensor): + predictions = torch.Tensor(predictions) + score = ( + lightning_metrics.to_onehot(tensor=predictions, num_classes=num_classes) + .double() + .mean(dim=0) + .tolist() + ) + else: + raise NotImplementedError + + return score + + +def format_summary(x: str) -> str: + """Format summary text for computing rouge.""" + re.sub("", "", x) # remove pegasus newline char + return "\n".join(nltk.sent_tokenize(x)) diff --git a/robustnessgym/core/model.py b/robustnessgym/core/model.py new file mode 100644 index 00000000..8867c303 --- /dev/null +++ b/robustnessgym/core/model.py @@ -0,0 +1,369 @@ +import itertools +import re +from typing import Callable, Collection, Dict, List, Optional + +import cytoolz as tz +import nltk +import torch +from transformers import ( + AutoModel, + AutoModelForSeq2SeqLM, + AutoModelForSequenceClassification, + AutoTokenizer, +) + +from robustnessgym.core.dataset import Dataset +from robustnessgym.core.metrics import compute_metric +from robustnessgym.tasks.task import Task + + +class Model: + def __init__( + self, + identifier: str, + task: Task, + model=None, + evaluation_fn=None, + device: str = None, + is_classifier: bool = None, + ): + + # TODO(karan): improve this wrapper around models + # TODO(karan): add some human-readble identifier to this as optional + self.identifier = identifier + self.task = task + self.model = model + + if evaluation_fn is not None: + self.evaluate = evaluation_fn + + if self.task is None: + if is_classifier is None: + raise ValueError("'is_classifier' required when task not passed") + else: + is_classifier = self.task.classification() + + if is_classifier: + self.outputs = { + "probs", + "logits", + "pred", + # 'embeddings', + # TODO(karan): other information from the model e.g. embeddings which + # aren't task related? + } + else: + self.outputs = { + "pred", + # 'embeddings', + # TODO(karan): other information from the model e.g. embeddings which + # aren't task related? + } + + if not device: + self.device = "cpu" + if torch.cuda.is_available(): + self.device = "cuda:0" + + def to(self, device: str): + self.device = device + return self.model.to(device) + + def __call__( + self, + dataset: Dataset, + input_columns: List[str], + output_columns: List[str], + batch_size: int = 32, + coerce_fn: Callable = None, + *args, + **kwargs + ): + + return self.evaluate( + dataset, + input_columns, + output_columns, + batch_size, + coerce_fn, + *args, + **kwargs + ) + + @classmethod + def huggingface( + cls, + identifier: str, + task: Task = None, + model: Optional[AutoModel] = None, + tokenizer: Optional[AutoTokenizer] = None, + is_classifier=None, + ): + """ + + Args: + identifier: + task: + model: + tokenizer: + + Returns: + + Examples: + >>> Model.huggingface(identifier='', task=TernaryNaturalLanguageInference()) + >>> Model.huggingface(identifier='', \ + model=AutoModelForSequenceClassification.from_pretrained(''), + tokenizer=AutoTokenizer.from_pretrained('')) + + """ + + return HuggingfaceModel( + identifier=identifier, + task=task, + model=model, + tokenizer=tokenizer, + is_classifier=is_classifier, + ) + + def forward(self, input_batch: Dict) -> Dict: + raise NotImplementedError + + def evaluate( + self, + dataset: Dataset, + input_columns: List[str], + output_columns: List[str], + batch_size: int = 32, + coerce_fn: Callable = None, + ): + raise NotImplementedError + + @staticmethod + def remap_labels(output_dict: Dict, label_map: List[int]) -> Dict: + """Map the output labels of the model. + + Example: 3-way classificaiton, with label_map = [1, 2, 0] + => (model label 0 -> dataset label 1, model label 1 -> dataset label 2, ...). + """ + + # Check the number of classes + num_classes = len(label_map) + + # Remap the columns of all outputs that have # columns = num_classes + for key in output_dict: + if output_dict[key].shape[-1] == num_classes: + output_dict[key] = output_dict[key][..., label_map] + + # Remap the pred key + inverse_label_map = [ + t[1] for t in sorted([(label, i) for i, label in enumerate(label_map)]) + ] + output_dict["pred"] = torch.tensor(inverse_label_map)[output_dict["pred"]] + + return output_dict + + +class HuggingfaceModel(Model): + def __init__( + self, + identifier: str, + task: Task = None, + model: Optional[AutoModel] = None, + tokenizer: Optional[AutoTokenizer] = None, + device: str = None, + is_classifier=None, + ): + + super(HuggingfaceModel, self).__init__( + identifier=identifier, task=task, device=device, is_classifier=is_classifier + ) + + self.tokenizer = tokenizer + if tokenizer is None: + # Load the tokenizer + self.tokenizer = AutoTokenizer.from_pretrained(self.identifier) + + self.model = model + if model is None: + # Load the model + if self.task is None: + if is_classifier is None: + raise ValueError("'is_classifier' required when task not specified") + else: + is_classifier = self.task.classification() + if is_classifier: + self.model = AutoModelForSequenceClassification.from_pretrained( + self.identifier + ) + else: + self.model = AutoModelForSeq2SeqLM.from_pretrained(self.identifier) + + self.task = task + + # Move the model to device + self.to(self.device) + + def forward(self, input_batch: Dict) -> Dict: + # Create the required outputs + output_dict = {k: None for k in self.outputs} + + if self.task.classification(): + # Run the model on the input_batch + # TODO(karan): allow outputs to generically contain side information ( + # embeddings, attention, etc.) + with torch.no_grad(): + outputs = self.model(**input_batch) + + # The logits are at the 0th index + logits = outputs[0] + + # TODO(karan): these are still on GPU, do metric computation on GPU then + # move to CPU + # TODO(karan): incrementally compute metrics? + if "logits" in self.outputs: + output_dict["logits"] = logits.to("cpu") + + if "probs" in self.outputs: + output_dict["probs"] = torch.nn.functional.softmax(logits, dim=-1).to( + "cpu" + ) + + if "pred" in self.outputs: + output_dict["pred"] = logits.argmax(dim=-1).to("cpu") + else: + with torch.no_grad(): + summary_token_ids = self.model.generate(**input_batch) + summaries = [ + self.tokenizer.decode( + token_id_list, + skip_special_tokens=True, + clean_up_tokenization_spaces=False, + ) + for token_id_list in summary_token_ids + ] + output_dict["pred"] = summaries + + return output_dict + + def encode_batch(self, batch: Dict[str, List], columns: Collection[str], **kwargs): + # TODO(karan): Automatically writing this encoder for a variety of tasks + return self.tokenizer( + *[batch[key] for key in columns], truncation=True, padding=True, **kwargs + ) + + def predict_batch(self, batch: Dict[str, List], input_columns: Collection[str]): + + # Tokenize the batch + input_batch = self.encode_batch(batch=batch, columns=input_columns) + + # Convert the batch to torch.Tensor + input_batch = tz.valmap( + lambda v: torch.tensor(v).to(device=self.device), input_batch + ) + + # Apply the model to the batch + return self.forward(input_batch) + + def evaluate( + self, + dataset: Dataset, + input_columns: List[str], + output_columns: List[str], + batch_size: int = 32, + metrics: List[str] = None, + coerce_fn: Callable = None, + ): + + # TODO(karan): generalize to TF2 + + # Reset the dataset format + dataset.reset_format() + dataset.set_format(columns=input_columns + output_columns) + + # TODO(karan): check that the Dataset conforms to the task definition + # TODO(karan): figure out how the output_columns will be used by the metrics + pass + + predictions = [] + targets = [] + + # Loop and apply the prediction function + # TODO(karan): not using .map() here in order to get more fine-grained + # control over devices + for idx in range(0, len(dataset), batch_size): + # Create the batch + batch = dataset[idx : idx + batch_size] + + # Predict on the batch + prediction_dict = self.predict_batch( + batch=batch, input_columns=input_columns + ) + + # Coerce the predictions + if coerce_fn: + prediction_dict = coerce_fn(prediction_dict) + + # Grab the raw target key/values + target_dict = tz.keyfilter(lambda k: k in output_columns, batch) + + # TODO(karan): general version for non-classification problems + # TODO(karan): move this to the right device + if self.task.classification(): + target_dict = tz.valmap(lambda v: torch.tensor(v), target_dict) + + # TODO(karan): incremental metric computation here + # Append the predictions and targets + predictions.append(prediction_dict) + targets.append(target_dict) + + # Consolidate the predictions and targets + if self.task.classification(): + # TODO(karan): Need to store predictions and outputs from the model + predictions = tz.merge_with(lambda v: torch.cat(v).to("cpu"), *predictions) + targets = tz.merge_with(lambda v: torch.cat(v).to("cpu"), *targets) + else: + predictions = tz.merge_with( + lambda x: list(itertools.chain.from_iterable(x)), *predictions + ) + targets = tz.merge_with( + lambda x: list(itertools.chain.from_iterable(x)), *targets + ) + + # Compute the metrics + # TODO(karan): generalize this code to support metric computation for any task + + # Assumes classification, so the output_columns contains a single key for the + # label + if self.task.classification(): + assert len(output_columns) == 1 # , "Only supports classification." + num_classes = self.task.output_schema.features[ + list(self.task.output_schema.keys())[0] + ].num_classes + + labels = targets[list(targets.keys())[0]] + + if metrics is None: + if self.task is None: + raise ValueError( + "Must specify metrics if model not associated with task" + ) + metrics = self.task.metrics + + pred = predictions["pred"].to(self.device) + target = labels.to(self.device) + + evaluation_dict = { + metric: compute_metric(metric, pred, target, num_classes) + for metric in metrics + } + + # Reset the data format + dataset.reset_format() + + return evaluation_dict + + +def format_summary(x: str) -> str: + """Format summary text for computing rouge.""" + re.sub("", "", x) # remove pegasus newline char + return "\n".join(nltk.sent_tokenize(x)) diff --git a/robustnessgym/core/operation.py b/robustnessgym/core/operation.py new file mode 100644 index 00000000..990d794a --- /dev/null +++ b/robustnessgym/core/operation.py @@ -0,0 +1,264 @@ +"""Implementation of the Operation abstract base class.""" +import json +from abc import ABC, abstractmethod +from typing import Callable, List + +from robustnessgym.core.identifier import Identifier +from robustnessgym.core.tools import persistent_hash + + +class Operation(ABC): + """Abstract base class for operations in Robustness Gym.""" + + def __init__( + self, + apply_fn: Callable = None, + identifiers: List[Identifier] = None, + num_outputs: int = None, + *args, + **kwargs + ): + + if not identifiers: + assert ( + num_outputs + ), "Must pass in num_outputs if no identifiers are specified." + + # Set the identifiers for the outputs of the Operation + self._identifiers = ( + Identifier.range(n=num_outputs, _name=self.__class__.__name__, **kwargs) + if not identifiers + else identifiers + ) + + # Assign the apply_fn + if apply_fn: + self.apply = apply_fn + + # # Find the batch and dataset processors + # self._batch_processors = {method.__name__ for method in + # methods_with_decorator(self.__class__, + # batch_processing)} + # self._dataset_processors = {method.__name__ for method in + # methods_with_decorator(self.__class__, + # dataset_processing)} + + @property + def identifiers(self): + return self._identifiers + + # @property + # @abstractmethod + # def processors(self): + # raise NotImplementedError("Must specify the order in which processors are + # applied.") + # + # @property + # def batch_processors(self): + # return self._batch_processors + # + # @property + # def dataset_processors(self): + # return self._dataset_processors + + def __hash__(self): + """Compute a hash value for the cached operation object.""" + val = 0 + for identifier in self.identifiers: + val ^= persistent_hash(str(identifier)) + return val + + # def get_cache_hash(self, + # columns: List[str], + # processor: str = None): + # """ + # Construct a hash that will be used to identify the application of a + # Operation to the columns of a dataset. + # """ + # + # # Hash the Operation + # val = hash(self) + # + # # Combine with the hash for each column + # for column in columns: + # val ^= persistent_hash(column) + # + # # Combine with the hash for the processor + # if processor: + # val ^= persistent_hash(processor) + # + # return val + # + # def get_cache_file_name(self, + # columns: List[str], + # processor: str = None) -> str: + # """ + # Construct a file name for caching. + # """ + # return 'cache-' + str(abs(self.get_cache_hash(columns=columns, + # processor=processor))) + '.arrow' + + # # FIXME: temporary + # def __call__(self, + # batch_or_dataset: BatchOrDataset, + # columns: List[str], + # mask: List[int] = None, + # *args, + # **kwargs) -> BatchOrDataset: + # + # if isinstance(batch_or_dataset, Dataset): + # # Check the Dataset's InteractionTape to see if the Operation was + # previously applied + # if not mask: + # # This infers a mask that specifies which outputs of the Operation + # are not required + # mask = batch_or_dataset.check_tape( + # path=[self.__class__.__name__], + # identifiers=self.identifiers, + # columns=columns + # ) + # + # # If all outputs of the Operation were previously present in the + # Dataset, simply return + # if all(mask): + # return batch_or_dataset + # + # # Apply the CachedOperation to the dataset + # dataset = self.process_dataset( + # dataset=batch_or_dataset, + # columns=columns, + # ) + # + # # Update the InteractionTape with the applied CachedOperation + # dataset.update_tape( + # path=[CACHED_OPS], + # identifiers=self.identifiers, + # columns=columns, + # ) + # + # return dataset + # + # elif isinstance(batch_or_dataset, Dict): + # + # assert len(self.dataset_processors) == 0, \ + # f"Cannot apply {self.__class__.__name__} to a batch, " \ + # f"since it has dataset processors: {self.dataset_processors}. " \ + # f"Use Dataset.from_batch(batch) before calling { + # self.__class__.__name__}." + # + # # Apply the Operation + # return self.process_batch( + # batch=batch_or_dataset, + # columns=columns + # ) + # else: + # raise NotImplementedError + # + # def wrap_batch_processor(self, + # batch_processor: Callable) -> Callable: + # + # def _wrap_batch_processor(batch: Batch, + # columns: List[str], + # **kwargs): + # + # return batch_processor(batch=batch, columns=columns, **kwargs) + # + # return _wrap_batch_processor + # + # def process_dataset(self, + # dataset: Dataset, + # columns: List[str], + # batch_size: int = 32) -> Dataset: + # """ + # Apply the Operation to a dataset. + # """ + # + # # Apply them in order + # for method in self.processors: + # + # # Apply batch processors by .map(..) over the dataset + # if method.__name__ in self.batch_processors: + # dataset = dataset.map( + # partial(method, columns=columns), + # batched=True, + # batch_size=batch_size, + # cache_file_name=self.get_cache_file_name(columns=columns, + # processor=method) + # ) + # # Apply dataset processors directly + # elif method.__name__ in self.dataset_processors: + # dataset = method( + # dataset=dataset, + # columns=columns, + # ) + # else: + # raise RuntimeError(f"{method} is not a processor. " + # f"Please remove {method} from the `processors` + # property or decorate it.") + # + # return dataset + # + # def process_batch(self, + # batch: Batch, + # columns: List[str]) -> Batch: + # """ + # Apply the cached operation to a batch. + # """ + # assert len(set(columns) - set(batch.keys())) == 0, "Any column in 'columns' + # must be present in 'batch'." + # + # # Run the cached operation, and encode outputs (defaults to json.dumps) + # encoded_outputs = [ + # self.encode(example_output) + # for example_output in self.apply(batch=batch, columns=columns) + # ] + # + # # Construct updates + # updates = self.construct_updates( + # encoded_outputs=encoded_outputs, + # columns=columns + # ) + # + # # Update the cache and return the updated batch + # return self.store(batch=batch, updates=updates) + + @classmethod + def identify(cls, **kwargs): + return Identifier(_name=cls.__name__, **kwargs) + + @classmethod + def encode(cls, obj) -> str: + """ + + Args: + obj: + + Returns: + + """ + return json.dumps(obj) + + @classmethod + def decode(cls, s: str): + """ + + Args: + s: + + Returns: + + """ + return json.loads(s) + + @abstractmethod + def apply(self, *args, **kwargs): + """ + + Args: + *args: + **kwargs: + + Returns: + + """ + pass diff --git a/robustnessgym/core/report.py b/robustnessgym/core/report.py new file mode 100644 index 00000000..aafe43a0 --- /dev/null +++ b/robustnessgym/core/report.py @@ -0,0 +1,486 @@ +from __future__ import annotations + +import itertools +from functools import partial +from typing import Dict, List + +import dill +import numpy as np +import pandas as pd +import plotly.figure_factory as ff +import plotly.graph_objects as go +from plotly.graph_objs import Figure +from plotly.subplots import make_subplots + + +class ReportColumn: + """A single column in the Robustness Report.""" + + def __init__(self, title: str): + self.title = title + + def set_title(self, title: str): + self.title = title + + +class ScoreColumn(ReportColumn): + """A column for numeric scores in the Robustness Report, displayed as a bar + chart.""" + + def __init__( + self, title: str, min_val: float, max_val: float, is_0_to_1: bool = False + ): + super(ScoreColumn, self).__init__(title) + self.min_val = min_val + self.max_val = max_val + self.is_0_to_1 = is_0_to_1 + + def set_min(self, min_val: float): + self.min_val = min_val + + def set_max(self, max_val: float): + self.max_val = max_val + + +class ClassDistributionColumn(ReportColumn): + """A column for discrete class distributions in the Robustness Report, + displayed as a heatmap.""" + + def __init__(self, title: str, class_codes: List[str]): + super(ClassDistributionColumn, self).__init__(title) + self.class_codes = class_codes + + def set_class_codes(self, class_codes: List[str]): + self.class_codes = class_codes + + +class NumericColumn(ReportColumn): + """A column for numeric data in the Robustness Report, displayed as the raw + value.""" + + def __init__(self, title: str): + super(NumericColumn, self).__init__(title) + + +class Report: + """Class for Robustness Gym Report. + Args: + data: Pandas dataframe in the following format: + column 1: category name + column 2: slice name + columns 3-N: data corresponding to passed columns parameter + columns: ReportColumn objects specifying format of columns 3-N in data + model_name (optional): model name to show in report + dataset_name (optional): dataset name to show in report + **kwargs (optional): any additional config paramters + """ + + def __init__( + self, + data: pd.DataFrame, + columns: List[ReportColumn], + model_name: str = None, + dataset_name: str = None, + **kwargs, + ): + + # Make a copy of data since may be modified by methods below + self.data = data.copy() + + self.columns = columns + self.model_name = model_name + self.dataset_name = dataset_name + + self.config = { + "color_scheme": ["#ec7734", "#3499ec", "#ec34c1", "#9cec34"], + "score_color_complement": "#F3F4F7", + "text_fill_color": "#F3F4F7", + "text_border_color": "#BEC4CE", + "distribution_color_scale": [[0.0, "#FBF5F2"], [1.0, "#EC7734"]], + "col_spacing": 0.035, + "row_height": 24, + "category_padding": 24, + "header_padding": 80, + "score_col_width": 0.6, + "class_dist_col_width": 0.35, + "numeric_col_width": 0.25, + "layout_width": 960, + "font_size_dist": 12, + "font_size_data": 13, + "font_size_heading": 14, + "font_size_category": 14, + } + + self.update_config(**kwargs) + + def sort( + self, category_order: Dict[str, int] = None, slice_order: Dict[str, int] = None + ): + """Sort rows in report by category / slice alphabetically, or using + specified order. + + Args: + category_order (optional): map from category name to sorting rank. If None, + sort categories alphabetically. + slice_order (optional): map from slice name to sorting rank. If None, sort + slices alphabetically (within a category). + """ + + if category_order is None: + category_order = {} + + if slice_order is None: + slice_order = {} + + for col_name in ["sort-order-category", "sort-order-slice"]: + if col_name in self.data: + raise ValueError(f"Column name '{col_name}' is reserved") + + self.data["sort-order-category"] = self.data[0].map( + lambda x: (category_order.get(x, 2 ** 10000), x) + ) + self.data["sort-order-slice"] = self.data[1].map( + lambda x: (slice_order.get(x, 2 ** 10000), x) + ) + + self.data = self.data.sort_values( + by=["sort-order-category", "sort-order-slice"] + ).drop(["sort-order-category", "sort-order-slice"], axis="columns") + + self.data.reset_index(inplace=True, drop=True) + + def filter(self, categories: List[str] = None, slices: List[str] = None): + """Filter report to specific categories AND slices + Args: + categories (optional): list of category names to filter by + slices (optional):list of slice names to filter by + """ + if categories is not None: + # self.data = self.data.loc(self.data[0].isin(categories)) + self.data = self.data[self.data[0].isin(categories)] + if slices is not None: + self.data = self.data[self.data[1].isin(slices)] + self.data.reset_index(inplace=True, drop=True) + + def rename(self, category_map: Dict[str, str], slice_map: Dict[str, str]): + """Rename categories, slices + Args: + category_map (optional): map from old to new category name + slice_map (optional): map from old to new slice name + """ + if category_map is not None: + self.data[0] = self.data[0].map(lambda x: category_map.get(x, x)) + if slice_map is not None: + self.data[1] = self.data[1].map(lambda x: slice_map.get(x, x)) + + def set_class_codes(self, class_cds: List[str]): + """Set single-letter class codes used for class distribution + columns.""" + for col in self.columns: + if isinstance(col, ClassDistributionColumn): + col.set_class_codes(class_cds) + + def set_model_name(self, model_name): + """Set model name displayed on report.""" + self.model_name = model_name + + def set_dataset_name(self, dataset_name): + """Set dataset name displayed on report.""" + self.dataset_name = dataset_name + + def set_range(self, col_title: str, min_val: float = None, max_val: float = None): + """Set min and max values for score columns + Args: + col_title: title of column to update + min_val: minimum value + max_val: maximum value + """ + for col in self.columns: + if isinstance(col, ScoreColumn) and col.title == col_title: + if min_val is not None: + col.min_val = min_val + if max_val is not None: + col.max_val = max_val + + def update_config(self, **kwargs): + for k, v in kwargs.items(): + if k not in self.config: + raise ValueError(f"Invalid config param: '{k}'") + self.config[k] = v + + def round(self): + # Round everything + self.data = self.data.round(3) + self.data.class_dist = self.data.class_dist.apply(partial(np.round, decimals=3)) + self.data.pred_dist = self.data.pred_dist.apply(partial(np.round, decimals=3)) + + @classmethod + def load(cls, path: str) -> Report: + obj = dill.load(open(path, "rb")) + assert isinstance(obj, Report), ( + f"dill loaded an instance of {type(obj)}, " f"must load {cls.__name__}." + ) + return obj + + def save(self, path: str): + return dill.dump(self, open(path, "wb")) + + def figure(self, show_title=False) -> Figure: + + # Verify that rows are grouped by category + row_categories = self.data[0].tolist() + save_cat_groups = set() # Previous category groupings already encountered + prev_cat = None + # Loop through each row and see if a category is encountered outside of first + # identified group for that category + for cat in row_categories: + if cat != prev_cat: # category changes + if cat in save_cat_groups: # if new category previously encountered + raise ValueError("Rows must be grouped by category.") + prev_cat = cat + save_cat_groups.add(cat) + + categories = [] + category_sizes = [] # Num rows in each category + for category, group in itertools.groupby(self.data[0]): # column 0 is category + categories.append(category) + category_sizes.append(len(list(group))) + n_rows = sum(category_sizes) + height = ( + n_rows * self.config["row_height"] + + len(categories) * self.config["category_padding"] + + self.config["header_padding"] + ) + col_widths = [] + for col in self.columns: + if isinstance(col, ScoreColumn): + col_width = self.config["score_col_width"] + elif isinstance(col, ClassDistributionColumn): + col_width = self.config["class_dist_col_width"] + else: + col_width = self.config["numeric_col_width"] + col_widths.append(col_width) + + fig = make_subplots( + rows=len(categories), + row_titles=categories, + cols=len(self.columns), + shared_yaxes=True, + subplot_titles=[col.title for col in self.columns], + horizontal_spacing=self.config["col_spacing"], + vertical_spacing=self.config["category_padding"] / height, + row_width=list(reversed(category_sizes)), + column_width=col_widths, + ) + + hms = [] + coords = [] + category_ndx = 1 + # Group data by category + for category, category_data in self.data.groupby(0, sort=False): + score_col_ndx = 0 + slice_names = category_data[1] + slice_names = [s + " " * 3 for s in slice_names] + for col_ndx, col in enumerate(self.columns): + df_col_ndx = col_ndx + 2 + # Dataframe has two leading columns with category, slice + fig_col_ndx = col_ndx + 1 # figure columns are 1-indexed + x = category_data[df_col_ndx].tolist() + if isinstance(col, ScoreColumn): + if col.is_0_to_1: + x = [100 * x_i for x_i in x] + col_max = col.max_val + if col.is_0_to_1: + col_max = 100 * col.max_val + fig.add_trace( + go.Bar( + x=x, + y=slice_names, + orientation="h", + marker=dict(color=self.get_color(score_col_ndx)), + showlegend=False, + text=[f"{x_i:.1f}" for x_i in x], + textposition="inside", + width=0.95, + textfont=dict(color="white"), + ), + row=category_ndx, + col=fig_col_ndx, + ) + # Add marker for gray fill + fig.add_trace( + go.Bar( + x=[col_max - x_i for x_i in x], + y=slice_names, + orientation="h", + marker=dict(color=self.config["score_color_complement"]), + showlegend=False, + width=0.9, + ), + row=category_ndx, + col=fig_col_ndx, + ) + score_col_ndx += 1 + elif isinstance(col, ClassDistributionColumn): + annotation_text = [ + [f"{int(round(z * 100)):d}" for z in rw] for rw in x + ] + hm = ff.create_annotated_heatmap( + x, + x=col.class_codes, + xgap=1, + ygap=1, + annotation_text=annotation_text, + colorscale=self.config["distribution_color_scale"], + zmin=0, + zmax=1, + ) + hms.append(hm) + # Save annotation data for special code related to heatmaps at end + coords.append(len(self.columns) * (category_ndx - 1) + fig_col_ndx) + fig.add_trace( + hm.data[0], + row=category_ndx, + col=fig_col_ndx, + ) + elif isinstance(col, NumericColumn): + # Repurpose bar chart as text field. + fig.add_trace( + go.Bar( + x=[1] * len(x), + y=slice_names, + orientation="h", + marker=dict( + color=self.config["text_fill_color"], + line=dict( + width=0, color=self.config["text_border_color"] + ), + ), + showlegend=False, + text=[human_format(x_i) for x_i in x], + textposition="inside", + insidetextanchor="middle", + width=0.9, + ), + row=category_ndx, + col=fig_col_ndx, + ) + else: + raise ValueError("Invalid col type") + category_ndx += 1 + + for category_ndx in range(1, len(categories) + 1): + if category_ndx == len(categories): + show_x_axis = True + else: + show_x_axis = False + for col_ndx, col in enumerate(self.columns): + fig_col_ndx = col_ndx + 1 # plotly cols are 1-indexed + fig.update_yaxes(autorange="reversed", automargin=True) + if isinstance(col, ScoreColumn): + if col.is_0_to_1: + col_min, col_max = 100 * col.min_val, 100 * col.max_val + else: + col_min, col_max = col.min_val, col.max_val + + fig.update_xaxes( + range=[col_min, col_max], + row=category_ndx, + col=fig_col_ndx, + tickvals=[col_min, col_max], + showticklabels=show_x_axis, + ) + elif isinstance(col, ClassDistributionColumn): + fig.update_xaxes( + row=category_ndx, col=fig_col_ndx, showticklabels=show_x_axis + ) + elif isinstance(col, NumericColumn): + fig.update_xaxes( + range=[0, 1], + row=category_ndx, + col=fig_col_ndx, + showticklabels=False, + ) + + fig.update_layout( + height=height, + width=self.config["layout_width"], + barmode="stack", + plot_bgcolor="rgba(0, 0, 0, 0)", + paper_bgcolor="rgba(0, 0, 0, 0)", + font=dict(size=self.config["font_size_data"]), + yaxis={"autorange": "reversed"}, + margin=go.layout.Margin( + r=0, b=0, t=20 # right margin # bottom margin # top margin + ), + ) + + # Use low-level plotly interface to update padding / font size + for a in fig["layout"]["annotations"]: + # If label for group + if a["text"] in categories: + a["x"] = 0.99 # Add padding + a["font"] = dict(size=self.config["font_size_category"]) + else: + a["font"] = dict( + size=self.config["font_size_heading"] + ) # Adjust font size for non-category labels + + # Due to a quirk in plotly, need to do some special low-level coding + # Code from https://community.plotly.com/t/how-to-create-annotated-heatmaps + # -in-subplots/36686/25 + newfont = [ + go.layout.Annotation(font_size=self.config["font_size_heading"]) + ] * len(fig.layout.annotations) + fig_annots = [newfont] + [hm.layout.annotations for hm in hms] + for col_ndx in range(1, len(fig_annots)): + for k in range(len(fig_annots[col_ndx])): + coord = coords[col_ndx - 1] + fig_annots[col_ndx][k]["xref"] = f"x{coord}" + fig_annots[col_ndx][k]["yref"] = f"y{coord}" + fig_annots[col_ndx][k]["font_size"] = self.config["font_size_dist"] + + def recursive_extend(mylist, nr): + # mylist is a list of lists + result = [] + if nr == 1: + result.extend(mylist[nr - 1]) + else: + result.extend(mylist[nr - 1]) + result.extend(recursive_extend(mylist, nr - 1)) + return result + + new_annotations = recursive_extend(fig_annots[::-1], len(fig_annots)) + fig.update_layout(annotations=new_annotations) + + if show_title: + title = { + "text": f"{self.dataset_name or ''} {self.model_name or ''} " + f"Robustness Report", + "x": 0.5, + "xanchor": "center", + } + else: + title = None + fig.update_layout( + title=title, + margin=go.layout.Margin( + r=0, b=0, t=80 # right margin # bottom margin # top margin + ), + ) + + return fig + + def get_color(self, col_ndx): + return self.config["color_scheme"][col_ndx % len(self.config["color_scheme"])] + + +def human_format(num): + num = float("{:.3g}".format(num)) + magnitude = 0 + while abs(num) >= 1000: + magnitude += 1 + num /= 1000.0 + return "{}{}".format( + "{:f}".format(num).rstrip("0").rstrip("."), ["", "K", "M", "B", "T"][magnitude] + ) diff --git a/robustnessgym/core/slice.py b/robustnessgym/core/slice.py new file mode 100644 index 00000000..d9fd143c --- /dev/null +++ b/robustnessgym/core/slice.py @@ -0,0 +1,64 @@ +from __future__ import annotations + +import json +from json import JSONDecodeError + +from robustnessgym.core.constants import CURATION +from robustnessgym.core.dataset import Dataset +from robustnessgym.core.identifier import Identifier + + +class Slice(Dataset): + def __init__( + self, *args, identifier: str = None, dataset: Dataset = None, **kwargs + ): + + if dataset is not None: + # Create a Slice directly from the Dataset object + self.__dict__ = dataset.__dict__.copy() + self._identifier = identifier or dataset.identifier + self.lineage = [(str(Dataset.__name__), dataset.identifier)] + else: + super(Slice, self).__init__(*args, **kwargs) + + # Set the identifier + self._identifier = identifier + + # A slice has a lineage + self.lineage = [] + + # Set the category of the slice: defaults to 'curated' + self.category = CURATION + + def __repr__(self): + return ( + f"{self.__class__.__name__}[category: {self.category}, " + f"num_rows: {self.num_rows}]({self.identifier})" + ) + + @property + def identifier(self): + if self._identifier: + return self._identifier + if self.lineage: + short_lineage = [] + for entry in self.lineage: + if len(entry) == 3: + try: + columns = json.loads(entry[2]) + except JSONDecodeError: + columns = entry[2] + short_lineage.append(str(entry[1]) + " @ " + str(columns)) + else: + short_lineage.append(str(entry[1])) + self._identifier = Identifier(_name=" -> ".join(short_lineage)) + return self._identifier + return None + + @identifier.setter + def identifier(self, value): + self._identifier = value + + @classmethod + def from_dataset(cls, dataset: Dataset): + return cls(dataset=dataset) diff --git a/robustnessgym/core/storage.py b/robustnessgym/core/storage.py new file mode 100644 index 00000000..991b26e2 --- /dev/null +++ b/robustnessgym/core/storage.py @@ -0,0 +1,15 @@ +import dill as pickle + + +class StorageMixin: + def __init__(self, *args, **kwargs): + super(StorageMixin, self).__init__(*args, **kwargs) + + def save(self, path: str) -> None: + """Save the object.""" + pickle.dump(self, open(path, "wb")) + + @classmethod + def load(cls, path: str): + """Load the object from the path.""" + return pickle.load(open(path, "rb")) diff --git a/robustnessgym/core/testbench.py b/robustnessgym/core/testbench.py new file mode 100644 index 00000000..b55dcacb --- /dev/null +++ b/robustnessgym/core/testbench.py @@ -0,0 +1,611 @@ +from __future__ import annotations + +import json +import pathlib +from typing import Callable, Collection, Dict, List, Optional, Sequence, Union + +import dill +import pandas as pd +import torch +from fuzzywuzzy import process +from tqdm import tqdm + +from robustnessgym.core.constants import ( + ATTACK, + AUGMENTATION, + CURATION, + GENERIC, + SUBPOPULATION, +) +from robustnessgym.core.metrics import compute_metric +from robustnessgym.core.model import Model +from robustnessgym.core.report import ( + ClassDistributionColumn, + NumericColumn, + Report, + ScoreColumn, +) +from robustnessgym.core.slice import Slice +from robustnessgym.core.tools import persistent_hash +from robustnessgym.core.version import SemanticVersionerMixin +from robustnessgym.tasks.task import Task + + +# TODO(karan): make the TestBench hashable +class TestBench(SemanticVersionerMixin): + def __init__( + self, + identifier: str, + task: Task = None, + slices: Collection[Slice] = None, + version: str = "0.0.1", + dataset_id: str = None, + class_names: Collection[str] = None, + ): + + # Call the superclass + super(TestBench, self).__init__(version=version) + + # An identifier for the TestBench + self.identifier = identifier + + # Set the task + self.task = task + + # Create the collection of slices + self.slices = set() + self.slice_identifiers = set() + self._slice_table = {} + + # Add slices if any + if slices: + self.add_slices(slices) + + # The testbench internally tracks metrics + self.metrics = {} + + # The schema tells the testbench which columns to extract from the slices for + # evaluation + self.schema_type = "default" + + self.dataset_id = dataset_id + + self.class_names = class_names + + def digest(self) -> str: + return json.dumps([str(sl) for sl in self.slices]) + + @classmethod + def for_dataset( + cls, dataset: str, task: Optional[Union[str, Task]] = None, version: str = None + ): + """Create a test bench for a dataset. + + Args: + dataset: + task: + version: + + Returns: + """ + + inferred_task = None + if task is not None: + # Infer the task from the dataset + inferred_task = Task.lookup(dataset=dataset)() + # Check that the inferred task matches the task argument + if task is not None and task != inferred_task: + raise AssertionError( + f"Dataset {dataset} is only compatible with {inferred_task}, " + f"not {task}." + ) + + return TestBench( + identifier=f"{dataset}-{task}-{version}", + task=inferred_task, + slices=[], + ) + + @classmethod + def for_task( + cls, + task: Union[str, Task], + version: str = None, + ): + return TestBench( + identifier=f"{task}-{version}", + task=task, + slices=[], + ) + + def _human_readable_identifiers(self): + # Temporary function to generate human readable names + groups = {} + for ident in self.slice_identifiers: + if "->" in str(ident): + builder_ident = str(ident).split(" -> ")[-1] + builder_ident, cols = builder_ident.split(" @ ") + name = builder_ident.split("(")[0] + if name not in groups: + groups[name] = set() + groups[name].add((builder_ident, cols)) + + group_info = {} + for key, group in groups.items(): + if len(group) == 1: + group_info[key] = "name" + else: + only_single_column = len(set([t[1] for t in group])) == 1 + if only_single_column: + group_info[key] = "builder_ident" + else: + group_info[key] = "full" + + ident_mapping = {} + for ident in self.slice_identifiers: + if "->" in str(ident): + builder_ident = str(ident).split(" -> ")[-1] + builder_ident, cols = builder_ident.split(" @ ") + name = builder_ident.split("(")[0] + + if group_info[name] == "name": + new_ident = name + elif group_info[name] == "builder_ident": + new_ident = builder_ident + elif group_info[name] == "full": + new_ident = str(ident).split(" -> ")[-1] + + if new_ident.startswith("NlpAugTransformation"): + new_ident = new_ident.split("NlpAugTransformation(pipeline=[")[ + 1 + ].split("])")[0] + + else: + new_ident = str(ident).split("(")[0] + + ident_mapping[ident] = new_ident + + self.ident_mapping = ident_mapping + + def add_slices(self, slices: Collection[Slice]): + """Add slices to the testbench. + + Args: + slices: collection of Slice objects + + Returns: + """ + if isinstance(slices, Slice): + slices = [slices] + + # Only add slices that aren't already present in the testbench and have + # non-zero length + for sl in slices: + if sl.identifier not in self.slice_identifiers and len(sl) > 0: + self.slices.add(sl) + self.slice_identifiers.add(sl.identifier) + self._slice_table[sl.identifier] = sl + + def evaluate( + self, + model: Model, + batch_size: int = 32, + coerce_fn: Callable = None, + input_columns: List[str] = None, + output_columns: List[str] = None, + ) -> Dict: + """Evaluate a model using the test bench and cache results. + + Args: + model: model to evaluate + batch_size: batch size for inference + coerce_fn: function to coerce the model's outputs. Useful if the model's + outputs cannot directly be compared to the targets. + input_columns: columns for input schema. Required if task is None. + output_columns: columns for output schema. Required if task is None. + + Returns: dict mapping slice identifiers to evaluation metrics. + """ + + if self.task is None: + if input_columns is None or output_columns is None: + raise ValueError( + "Input and output columns required when no task specified." + ) + else: + # Set the schema using the task + # TODO Is the remapping required when not using a task + self.set_schema("task") + input_columns = self.task.input_schema.keys() + output_columns = self.task.output_schema.keys() + + # TODO(karan): Uncomment and fix this assert on the type of outputs that + # model(..) returns + # # Grab 2 examples from the first slice, run it through the model and check + # that the output is a dictionary + # output = model(dataset=Dataset.from_batch(self.slices[0][:2]), + # input_keys=self.task.input_schema.keys(), + # output_keys=self.task.output_schema.keys(), + # batch_size=2, + # coerce_fn=coerce_fn) + # print(output) + # assert isinstance(output, Sequence) and isinstance(output[0], Mapping), \ + # "model(..) must return a list of dictionaries. Each dictionary should + # map metric names to values." + + # Store the model_metrics + if model.identifier not in self.metrics: + self.metrics[model.identifier] = {} + + # Run the model on all the slices + # TODO(karan): For slices that are subpopulations, the same example can be in + # multiple slices + # and will be run through the model multiple times. Create a UnionSlice? + for sl in tqdm(self.slices): + if sl.identifier not in self.metrics[model.identifier]: + # Evaluate on the slice + # TODO Why not update existing results? + self.metrics[model.identifier][sl.identifier] = model.evaluate( + dataset=sl, + input_columns=input_columns, + output_columns=output_columns, + batch_size=batch_size, + coerce_fn=coerce_fn, + ) + + return self.metrics[model.identifier] + + def add_predictions( + self, + model: Union[Model, str], + predictions: Dict[str, Union[Sequence, torch.Tensor]], + output_columns: List[str] = None, + num_classes=None, + metrics: List[str] = None, + ) -> Dict: + """Compute and cache metrics for pre-computed model predictions + Args: + model: Model or model id + predictions: Map from slice id to sequence or torch Tensor of predictions + metric (optional): list of metrics. If None, use the metrics specified in + the task. + output_columns (optional): names of output columns. Required if testbench + does not have associated task. + num_classes (optional): number of classes. Required if testbench does not + have associated task. + Returns: computed metrics + """ + + if self.task is None: + if output_columns is None: + raise ValueError( + "'output_columns' is required if testbench does not have " + "associated task." + ) + if num_classes is None: + raise ValueError( + "'num_classes' is required if testbench does not have associated " + "task." + ) + if metrics is None: + raise ValueError( + "'metrics' is required if testbench does not have associated task." + ) + else: + output_columns = self.task.output_schema.keys() + num_classes = self.task.output_schema.features[ + list(self.task.output_schema.keys())[0] + ].num_classes + if self.task.classification(): + assert len(output_columns) == 1 # , "Only supports classification." + if metrics is None: + metrics = self.task.metrics + + if len(output_columns) > 1: + raise NotImplementedError("Only single output column supported") + + if isinstance(model, Model): + model = model.identifier + if model not in self.metrics: + self.metrics[model] = {} + for sl in tqdm(self.slices): + if sl.identifier not in self.metrics[model]: + # Evaluate on the slice + # TODO Why not update existing results? + # slice_predictions = predictions[sl.identifier] + evaluation_dict = {} + # Temporarily expose prediction columns + # sl.set_format(columns=output_columns() + # slice_predictions = predictions[sl.identifier] + # TODO Optimize + # labels = list(zip(*[sl[col] for col in output_columns])) + labels = sl[output_columns[0]] + for metric in metrics: + evaluation_dict[metric] = compute_metric( + metric=metric, + predictions=predictions[sl.identifier], + labels=labels, + num_classes=num_classes, + ) + # sl.reset_format() + self.metrics[model][sl.identifier] = evaluation_dict + + return evaluation_dict + + def add_metrics(self, model: Union[Model, str], metrics: Dict[str, float]): + """Cache pre-computed metrics for model + Args: + model: Model or model id. + metrics: map from metric name to value + """ + if isinstance(model, Model): + model = model.identifier + self.metrics[model] = metrics + + def create_report( + self, + model: Union[Model, str], + metric_ids: List[str] = None, + ) -> Report: + """Generate report from cached metrics for a model + Args: + model: Model or model id. Metrics must have already been computed for + this model. + metric_ids (optional): list of metric ids to include in desired order. + If None, take metrics from sample slice. + Returns: + report + """ + + if len(self.slices) == 0: + raise ValueError("Cannot create report for empty testbench") + + if isinstance(model, Model): + model = model.identifier + if model not in self.metrics: + raise ValueError( + f"Metrics for model {model} have not been computed yet." + f" You must first execute one of " + "the following methods for this model: 'evaluate', " + "'add_predictions', 'add_metrics'" + ) + + # TODO(Jesse): Need a category for test set + + model_metrics = self.metrics[model] + + # TODO(Jesse): where to put this? Should only need to be called once + self._human_readable_identifiers() + + if metric_ids is None: + sample_slice = list(self.slices)[0].identifier + metric_ids = list(model_metrics[sample_slice].keys()) + sorted_metric_ids = sorted( + [ + metric_id + for metric_id in metric_ids + if metric_id not in ("class_dist", "pred_dist") + ] + ) + if "class_dist" in metric_ids: + sorted_metric_ids.append("class_dist") + if "pred_dist" in metric_ids: + sorted_metric_ids.append("pred_dist") + metric_ids = sorted_metric_ids + + # Populate columns + columns = [] + for metric_id in metric_ids: + if metric_id in ("class_dist", "pred_dist"): + if self.task is None: + class_cds = None + else: + class_names = self.task.output_schema.features[ + list(self.task.output_schema.keys())[0] + ].names + class_cds = [name[0].upper() for name in class_names] + columns.append(ClassDistributionColumn(metric_id, class_cds)) + else: + columns.append( + ScoreColumn(metric_id, min_val=0, max_val=1, is_0_to_1=True) + ) + columns.append(NumericColumn("Size")) + + category_names = { + GENERIC: "Slice", + SUBPOPULATION: "SubPop", + ATTACK: "Attack", + AUGMENTATION: "Augment", + CURATION: "Eval", + } + + # Populate data + data = [] + for sl in self.slices: + slice_name = self.ident_mapping[sl.identifier] + slice_size = len(sl) + slice_category = category_names.get(sl.category, sl.category.capitalize()) + row = [] + row.append(slice_category) + row.append(slice_name) + if sl.identifier not in model_metrics: + raise ValueError( + f"Metrics for model {model} and slice {sl.identifier}" + f"have not yet been computed." + ) + slice_metrics = model_metrics[sl.identifier] + for metric_id in metric_ids: + row.append(slice_metrics[metric_id]) + row.append(slice_size) + data.append(row) + + # TODO(karan): generalize aggregation + # slice_metrics = tz.merge_with(np.mean, slice_metrics) + # Task-dependent model predictions + # TODO(karan): e.g. average class distribution predicted, figure out how to + # put this in + # Task-dependent sl information + # TODO(karan): e.g. class distribution + + df = pd.DataFrame(data) + + report = Report( + data=df, columns=columns, model_name=model, dataset_name=self.dataset_id + ) + report.sort( + category_order=dict( + (cat, i) + for i, cat in enumerate( + [SUBPOPULATION, AUGMENTATION, CURATION, ATTACK, GENERIC] + ) + ) + ) + return report + + def set_schema(self, schema_type: str): + assert schema_type in {"default", "task"} + + if self.schema_type == schema_type: + return + + if schema_type == "task": + self.slices = {self.task.remap_schema(slice) for slice in self.slices} + self.schema_type = schema_type + elif schema_type == "default": + # TODO(karan): undo the schema standardization + raise NotImplementedError + + def search(self, keyword: str, limit: int = 3): + return [ + self._slice_table[t[0]] + for t in process.extract(keyword, self.slice_identifiers, limit=limit) + ] + + def save(self, path: str) -> None: + """Save the current testbench to disk. This will save all slices in the + testbench to disk, as well as metrics and other metadata associated + with this testbench. + + Args: + path: string path to the save directory + + Returns: None + + >>> testbench = TestBench(identifier='my-testbench', + task=TernaryNaturalLanguageInference()) + # Save to the current directory + >>> testbench.save('.') + # Load back the testbench + >>> testbench = TestBench.load('my-testbench') + """ + + # Path to the save directory + savedir = pathlib.Path(path) / f"{self.identifier}" + + # Create a directory inside savedir for the slices + (savedir / "slices").mkdir(parents=True, exist_ok=True) + + # Save all the slices + pbar = tqdm(self.slices) + for sl in pbar: + pbar.set_description(f"Saving slice {str(sl.identifier)[:100]}...") + sl.save_to_disk( + str(savedir / "slices" / str(persistent_hash(str(sl.identifier)))) + ) + + # Save metrics + dill.dump(self.metrics, open(str(savedir / "metrics.dill"), "wb")) + + # Save metadata + dill.dump( + { + "task": self.task, + "identifier": self.identifier, + "dataset_id": self.dataset_id, + }, + open(str(savedir / "metadata.dill"), "wb"), + ) + + # Save version info + with open(str(savedir / "version.dill"), "wb") as f: + f.write(self._dumps_version()) + + @classmethod + def available(cls, path: str) -> List[str]: + """Check the list of available testbenches in a directory. + + Args: + path: string path to a directory. The testbenches available inside this + directory will be returned. + + Returns: list of available testbenches + """ + + # Path to the save directory + savedir = pathlib.Path(path) + + # Loop over the folders + testbench_identifiers = [] + for maybe_testbench in savedir.glob("*"): + if ( + maybe_testbench.is_dir() + and (maybe_testbench / "metadata.dill").exists() + ): + testbench_identifiers.append(maybe_testbench.name) + + return testbench_identifiers + + @classmethod + def load(cls, path: str) -> TestBench: + """Load a testbench from disk. + + Args: + path: string path to the testbench directory + + Returns: + """ + + # Path to the save directory + savedir = pathlib.Path(path) + + # Load all the slices + slices = [] + for sl_path in tqdm(list((savedir / "slices").glob("*"))): + try: + slices.append(Slice.load_from_disk(str(sl_path))) + except FileNotFoundError: + continue + + # Load metrics + metrics = dill.load(open(str(savedir / "metrics.dill"), "rb")) + + # Load metadata + metadata = dill.load(open(str(savedir / "metadata.dill"), "rb")) + + # Create the testbench + testbench = cls( + identifier=metadata["identifier"], + task=metadata["task"], + slices=slices, + ) + + # Set previously stored metrics + testbench.metrics = metrics + + # Load version info + with open(str(savedir / "version.dill"), "rb") as f: + testbench._loads_version(f.read()) + + return testbench + + def make(self, identifier: str): + # Resolve the location of the TestBench + + # Pull the TestBench + return self.pull(identifier) + + def pull(self, identifier: str): + pass + + def publish(self): + pass diff --git a/robustnessgym/core/tools.py b/robustnessgym/core/tools.py new file mode 100644 index 00000000..931958fa --- /dev/null +++ b/robustnessgym/core/tools.py @@ -0,0 +1,190 @@ +import hashlib +import inspect +import json +from functools import partial +from typing import List, Mapping, Sequence + +import cytoolz as tz +import progressbar +import yaml + + +def recmerge(*objs, merge_sequences=False): + """Recursively merge an arbitrary number of collections. For conflicting + values, later collections to the right are given priority. By default + (merge_sequences=False), sequences are treated as a normal value and not + merged. + + Args: + *objs: collections to merge + merge_sequences: whether to merge values that are sequences + + Returns: merged collection + """ + if isinstance(objs, tuple) and len(objs) == 1: + # A squeeze operation since merge_with generates tuple(list_of_objs,) + objs = objs[0] + if all([isinstance(obj, Mapping) for obj in objs]): + # Merges all the collections, recursively applies merging to the combined values + return tz.merge_with(partial(recmerge, merge_sequences=merge_sequences), *objs) + elif all([isinstance(obj, Sequence) for obj in objs]) and merge_sequences: + # Merges sequence values by concatenation + return list(tz.concat(objs)) + else: + # If colls does not contain mappings, simply pick the last one + return tz.last(objs) + + +def persistent_hash(s: str): + """Compute a hash that persists across multiple Python sessions for a + string.""" + return int(hashlib.sha224(s.encode()).hexdigest(), 16) + + +def strings_as_json(strings: List[str]): + """Convert a list of strings to JSON. + + Args: + strings: A list of str. + + Returns: JSON dump of the strings. + """ + return json.dumps(strings) if len(strings) > 1 else strings[0] + + +def get_default_args(func) -> dict: + """Inspect a function to get arguments that have default values. + + Args: + func: a Python function + + Returns: dictionary where keys correspond to arguments, and values correspond to + their defaults. + """ + signature = inspect.signature(func) + return { + k: v.default + for k, v in signature.parameters.items() + if v.default is not inspect.Parameter.empty + } + + +class DownloadProgressBar: + def __init__(self): + self.pbar = None + + def __call__(self, block_num, block_size, total_size): + if not self.pbar: + self.pbar = progressbar.ProgressBar( + maxval=total_size if total_size > 0 else 1e-2 + ) + self.pbar.start() + + downloaded = block_num * block_size + if downloaded < total_size: + self.pbar.update(downloaded) + else: + self.pbar.finish() + + +def prettyprint(s: str) -> None: + """Prettyprint with YAML. + + Args: + s: string + """ + if hasattr(s, "__dict__"): + print(yaml.dump(s.__dict__)) + elif isinstance(s, dict): + print(yaml.dump(s)) + else: + print(s) + + +def get_all_leaf_paths(coll): + """Returns a list of paths to all leaf nodes in a nested dict. + + Paths can travel through lists and the index is inserted into the + path. + """ + if isinstance(coll, Mapping): + return list( + tz.concat( + map( + lambda t: list(map(lambda p: [t[0]] + p, get_all_leaf_paths(t[1]))), + coll.items(), + ) + ) + ) + + elif isinstance(coll, list): + return list( + tz.concat( + map( + lambda t: list(map(lambda p: [t[0]] + p, get_all_leaf_paths(t[1]))), + enumerate(coll), + ) + ) + ) + else: + return [[]] + + +def get_all_paths(coll, prefix_path=(), stop_at=None, stop_below=None): + """Given a collection, by default returns paths to all the leaf nodes. + + Use stop_at to truncate paths at the given key. Use stop_below to + truncate paths one level below the given key. + """ + assert ( + stop_at is None or stop_below is None + ), "Only one of stop_at or stop_below can be used." + if stop_below is not None and stop_below in str( + tz.last(tz.take(len(prefix_path) - 1, prefix_path)) + ): + return [[]] + if stop_at is not None and stop_at in str(tz.last(prefix_path)): + return [[]] + if isinstance(coll, Mapping) or isinstance(coll, list): + if isinstance(coll, Mapping): + items = coll.items() + else: + items = enumerate(coll) + + return list( + tz.concat( + map( + lambda t: list( + map( + lambda p: [t[0]] + p, + get_all_paths( + t[1], + prefix_path=list(prefix_path) + [t[0]], + stop_at=stop_at, + stop_below=stop_below, + ), + ) + ), + items, + ) + ) + ) + else: + return [[]] + + +def get_only_paths(coll, pred, prefix_path=(), stop_at=None, stop_below=None): + """Get all paths that satisfy the predicate fn pred. + + First gets all paths and then filters them based on pred. + """ + all_paths = get_all_paths( + coll, prefix_path=prefix_path, stop_at=stop_at, stop_below=stop_below + ) + return list(filter(pred, all_paths)) + + +class class_or_instancemethod(classmethod): + def __get__(self, instance, type_): + descr_get = super().__get__ if instance is None else self.__func__.__get__ + return descr_get(instance, type_) diff --git a/robustnessgym/core/version.py b/robustnessgym/core/version.py new file mode 100644 index 00000000..2731e09d --- /dev/null +++ b/robustnessgym/core/version.py @@ -0,0 +1,86 @@ +from types import SimpleNamespace + +import dill as pickle +from semver import VersionInfo as Version + + +class SemanticVersionerMixin: + """Simple mixin that adds semantic versioning to any class.""" + + def __init__(self, version: str = "0.0.1", *args, **kwargs): + super(SemanticVersionerMixin, self).__init__(*args, **kwargs) + self._version = Version.parse(version) + self._version_history = {} + self._last_digest = None + + # TODO(karan): implement more features for commit-then-bump, add diffing + + @property + def version(self): + return str(self._version) + + @property + def version_history(self): + return self._version_history + + @property + def major(self): + return self._version.major + + @property + def minor(self): + return self._version.minor + + @property + def patch(self): + return self._version.patch + + def bump_major(self): + """Commit the current version and bump the major version.""" + self.commit() + self._version = self._version.bump_major() + self._last_digest = self.digest() + + def bump_minor(self): + """Commit the current version and bump the minor version.""" + self.commit() + self._version = self._version.bump_minor() + self._last_digest = self.digest() + + def bump_patch(self): + """Commit the current version and bump the patch.""" + self.commit() + self._version = self._version.bump_major() + self._last_digest = self.digest() + + def commit(self): + """Commit the current version to history. + + Multiple commits on the same version overwrite each other. + """ + self._version_history[self.version] = self.digest() + + def digest(self) -> str: + """Compute a digest for the object.""" + raise NotImplementedError( + "Must implement a digest for the object that is being versioned." + ) + + def diff(self, digest: str, otherdigest: str) -> bool: + """Check if digests have changed.""" + return digest != otherdigest + + def _dumps_version(self) -> str: + return pickle.dumps( + SimpleNamespace( + version=self.version, + history=self._version_history, + last_digest=self._last_digest, + ) + ) + + def _loads_version(self, s: str): + namespace = pickle.loads(s) + self._version = namespace.version + self._version_history = namespace.history + self._last_digest = namespace.last_digest diff --git a/robustnessgym/slicebuilders/__init__.py b/robustnessgym/slicebuilders/__init__.py new file mode 100644 index 00000000..ac986069 --- /dev/null +++ b/robustnessgym/slicebuilders/__init__.py @@ -0,0 +1,4 @@ +from .attack import Attack # noqa +from .slicebuilder import SliceBuilder # noqa +from .subpopulation import Subpopulation, SubpopulationCollection # noqa +from .transformation import Transformation # noqa diff --git a/robustnessgym/slicebuilders/attack.py b/robustnessgym/slicebuilders/attack.py new file mode 100644 index 00000000..11fae7e6 --- /dev/null +++ b/robustnessgym/slicebuilders/attack.py @@ -0,0 +1,21 @@ +"""Generic wrapper for adversarial attacks.""" +from typing import List + +from robustnessgym.core.constants import ATTACK +from robustnessgym.core.identifier import Identifier +from robustnessgym.slicebuilders.transformation import Transformation + + +class Attack(Transformation): + """Class for adversarial attacks.""" + + def __init__( + self, + identifiers: List[Identifier], + apply_fn=None, + ): + super(Attack, self).__init__( + category=ATTACK, + identifiers=identifiers, + apply_fn=apply_fn, + ) diff --git a/robustnessgym/slicebuilders/attacks/__init__.py b/robustnessgym/slicebuilders/attacks/__init__.py new file mode 100644 index 00000000..ba5e739b --- /dev/null +++ b/robustnessgym/slicebuilders/attacks/__init__.py @@ -0,0 +1,2 @@ +from robustnessgym.slicebuilders.attacks.morpheus import Morpheus # noqa +from robustnessgym.slicebuilders.attacks.textattack import TextAttack # noqa diff --git a/robustnessgym/slicebuilders/attacks/morpheus.py b/robustnessgym/slicebuilders/attacks/morpheus.py new file mode 100644 index 00000000..c5ac3de0 --- /dev/null +++ b/robustnessgym/slicebuilders/attacks/morpheus.py @@ -0,0 +1,118 @@ +from typing import Dict, List, Tuple + +import numpy as np + +from robustnessgym.core.dataset import transpose_batch +from robustnessgym.core.identifier import Identifier +from robustnessgym.slicebuilders.attack import Attack + +try: + from morpheus import MorpheusHuggingfaceNLI, MorpheusHuggingfaceQA +except ImportError: + _morpheus_available = False +else: + _morpheus_available = True + + +class Morpheus(Attack): + def __init__(self, dataset: str, model: str, constrain_pos: bool = True, **kwargs): + + if not _morpheus_available: + raise ImportError("Please install morpheus.") + + super().__init__( + identifiers=[ + Identifier( + self.__class__.__name__, + dataset=dataset, + model=model, + ) + ], + ) + + self.constrain_pos = constrain_pos + + self.dataset = dataset.lower() + if self.dataset == "mnli": + self.attack = MorpheusHuggingfaceNLI(model) + elif "squad" in self.dataset: + is_squad2 = "2" in self.dataset + self.attack = MorpheusHuggingfaceQA(model, squad2=is_squad2) + elif self.dataset == "cnn_dailymail" or self.dataset == "xsum": + rouge_type = kwargs.get("rouge_type", "rougeL") + max_input_tokens = kwargs.get("max_input_tokens", 1024) + self.attack = MorpheusHuggingfaceQA( + model, rouge_type=rouge_type, max_input_tokens=max_input_tokens + ) + else: + raise NotImplementedError + + def apply( + self, + skeleton_batches: List[Dict[str, List]], + slice_membership: np.ndarray, + batch: Dict[str, List], + columns: List[str], + *args, + **kwargs + ) -> Tuple[List[Dict[str, List]], np.ndarray]: + + for i, example in enumerate(transpose_batch(batch)): + if self.dataset == "mnli": + # Assume column order is [premise, hypothesis, label] + prem_col, hypo_col, label_col = columns + text_label = self.get_NLI_text_label(example[label_col]) + + new_prem, new_hypo, predicted_label, _ = self.attack.morph( + example[prem_col], + example[hypo_col], + example[label_col], + constrain_pos=self.constrain_pos, + ) + if predicted_label != text_label: + skeleton_batches[0][prem_col][i] = new_prem + skeleton_batches[0][hypo_col][i] = new_hypo + else: + slice_membership[i, 0] = 0 + elif "squad" in self.dataset: + question_col = columns[0] + # NOTE: assume first element in columns is question_col + # Ignoring the rest since example['answers'] is another Dict + question_dict = self.prepare_question_dict(example, question_col) + new_question, predicted_answer = self.attack.morph( + question_dict, example["context"], constrain_pos=self.constrain_pos + ) + if predicted_answer not in example["answers"]["text"]: + skeleton_batches[0][question_col][i] = new_question + else: + slice_membership[i, 0] = 0 + elif self.dataset == "cnn_dailymail" or self.dataset == "xsum": + # Assume column order is [article_col, summary_col] + article_col, summary_col = columns + new_article, predicted_summary, _ = self.attack.morph( + example[article_col], + example[summary_col], + constrain_pos=self.constrain_pos, + ) + if predicted_summary != example[summary_col]: + skeleton_batches[0][article_col][i] = new_article + else: + slice_membership[i, 0] = 0 + else: + raise NotImplementedError + return skeleton_batches, slice_membership + + # No type hint since the values can be ints or strings: Dict[str,] + @classmethod + def prepare_question_dict(cls, example, question_col): + question_dict = {"question": example[question_col]} + question_dict["answers"] = [ + {"answer_start": i[0], "text": i[1]} + for i in zip(example["answers"]["answer_start"], example["answers"]["text"]) + ] + question_dict["is_impossible"] = len(example["answers"]["text"]) == 0 + return question_dict + + def get_NLI_text_label(self, label: int) -> str: + hf_labels = ["entailment", "neutral", "contradiction"] + return hf_labels[label] diff --git a/robustnessgym/slicebuilders/attacks/textattack.py b/robustnessgym/slicebuilders/attacks/textattack.py new file mode 100644 index 00000000..c5e8e625 --- /dev/null +++ b/robustnessgym/slicebuilders/attacks/textattack.py @@ -0,0 +1,93 @@ +from collections import OrderedDict +from typing import Dict, List, Tuple + +import cytoolz as tz +import numpy as np +import textattack.attack_recipes as attack_recipes +from textattack.attack_recipes import AttackRecipe +from textattack.models.wrappers import HuggingFaceModelWrapper, ModelWrapper + +from robustnessgym.core.identifier import Identifier +from robustnessgym.core.model import Model +from robustnessgym.slicebuilders.attack import Attack + + +class TextAttack(Attack): + def __init__( + self, + attack: AttackRecipe, + ): + super(TextAttack, self).__init__( + identifiers=[ + Identifier( + self.__class__.__name__, + attack=attack, + ) + ], + ) + + self.attack = attack + + @classmethod + def recipes(cls): + recipes = [] + for possible_recipe_name in dir(attack_recipes): + possible_recipe = getattr(attack_recipes, possible_recipe_name) + if hasattr(possible_recipe, "mro"): + for _cls in possible_recipe.mro(): + if _cls == AttackRecipe and possible_recipe != AttackRecipe: + recipes.append(possible_recipe_name) + return recipes + + def apply( + self, + skeleton_batches: List[Dict[str, List]], + slice_membership: np.ndarray, + batch: Dict[str, List], + columns: List[str], + *args, + **kwargs + ) -> Tuple[List[Dict[str, List]], np.ndarray]: + + # Group the batch into inputs and output + batch_inputs = tz.keyfilter(lambda k: k in columns[:-1], batch) + batch_inputs = [ + OrderedDict(zip(batch_inputs, t)) for t in zip(*batch_inputs.values()) + ] + + batch_output = [int(e) for e in batch[columns[-1]]] + + # Create a fake dataset for textattack + fake_dataset = list(zip(batch_inputs, batch_output)) + + # Attack the dataset + outputs = list(self.attack.attack_dataset(fake_dataset)) + + for i, output in enumerate(outputs): + # Check if the goal succeeded + if output.perturbed_result.goal_status == 0: + # If success, fill out the skeleton batch + for ( + key, + val, + ) in output.perturbed_result.attacked_text._text_input.items(): + # TODO(karan): support num_attacked_texts > 1 + skeleton_batches[0][key][i] = val + + # # Fill the perturbed output: *this was incorrect, removing this + # statement* + # # TODO(karan): delete this snippet + # skeleton_batches[0][columns[-1]][i] = output.perturbed_result.output + else: + # Unable to attack the example: set its slice membership to zero + slice_membership[i, 0] = 0 + + return skeleton_batches, slice_membership + + @classmethod + def from_recipe(cls, recipe: str, model: ModelWrapper): + return cls(attack=getattr(attack_recipes, recipe).build(model=model)) + + @classmethod + def wrap_huggingface_model(cls, model: Model) -> ModelWrapper: + return HuggingFaceModelWrapper(model=model.model, tokenizer=model.tokenizer) diff --git a/robustnessgym/slicebuilders/curator.py b/robustnessgym/slicebuilders/curator.py new file mode 100644 index 00000000..42be40e2 --- /dev/null +++ b/robustnessgym/slicebuilders/curator.py @@ -0,0 +1,23 @@ +from typing import List + +from robustnessgym import Identifier, SliceBuilder +from robustnessgym.core.constants import CURATION + + +class Curator(SliceBuilder): + def __init__( + self, + identifiers: List[Identifier], + apply_fn, + ): + super(Curator, self).__init__( + category=CURATION, + identifiers=identifiers, + apply_fn=apply_fn, + ) + + def __call__(self, *args, **kwargs): + pass + + def apply(self, *args, **kwargs): + pass diff --git a/robustnessgym/slicebuilders/slicebuilder.py b/robustnessgym/slicebuilders/slicebuilder.py new file mode 100644 index 00000000..70f9d9cc --- /dev/null +++ b/robustnessgym/slicebuilders/slicebuilder.py @@ -0,0 +1,694 @@ +from __future__ import annotations + +import pathlib +from functools import partial +from itertools import compress +from typing import Callable, Dict, List, Optional, Sequence, Tuple, Union + +import cytoolz as tz +import numpy as np +import tqdm +from multiprocess.pool import Pool + +from robustnessgym.core.constants import ( + ATTACK, + CURATION, + GENERIC, + SLICEBUILDERS, + SUBPOPULATION, + TRANSFORMATION, +) +from robustnessgym.core.dataset import Batch, BatchOrDataset, Dataset +from robustnessgym.core.identifier import Identifier +from robustnessgym.core.slice import Slice +from robustnessgym.core.storage import StorageMixin +from robustnessgym.core.tools import persistent_hash, recmerge, strings_as_json + + +class SliceBuilder(StorageMixin): + """Base class for builders that output slices.""" + + # Path to a log directory + logdir: pathlib.Path = pathlib.Path.home() / "robustnessgym" / SLICEBUILDERS + + # Create the log directory + logdir.mkdir(parents=True, exist_ok=True) + + CATEGORIES = [ + GENERIC, + SUBPOPULATION, + ATTACK, + TRANSFORMATION, + CURATION, + ] + + def __init__( + self, + category: str, + identifiers: List[Identifier], + apply_fn: Callable = None, + *args, + **kwargs, + ): + + super(SliceBuilder, self).__init__(*args, **kwargs) + + # The SliceMaker belongs to a category + assert ( + category in self.CATEGORIES + ), f"argument category must be one of {self.CATEGORIES}" + self.category = category + + # Each identifier corresponds to a single output Slice generated by this + # SliceBuilder + self.identifiers = identifiers + + # Keep track of the CachedOperation dependencies + self.prerequisites = ( + set() if "prerequisites" not in kwargs else kwargs["prerequisites"] + ) + # TODO(karan): remove comment + # for base in self.__class__.__bases__: + # for cls in base.__mro__: + # if str(CachedOperation.__name__) in str(cls): + # self.prerequisites.add(base) + + if apply_fn: + # Assign to the method + self.apply = apply_fn + + def __call__( + self, + batch_or_dataset: BatchOrDataset, + columns: List[str], + mask: List[int] = None, + store_compressed: bool = None, + store: bool = None, + num_proc: int = None, + *args, + **kwargs, + ): + + # Check that prerequisites are satisfied + self.prerequisites_handler(batch_or_dataset) + + if isinstance(batch_or_dataset, Dataset): + + # Slice a dataset + dataset, slices, slice_membership = self.process_dataset( + dataset=batch_or_dataset, + columns=columns, + # Automatically infer the mask from the Dataset if it's not specified + mask=batch_or_dataset.check_tape( + path=[SLICEBUILDERS, self.category], + identifiers=self.identifiers, + columns=columns, + ) + if not mask + else mask, + store_compressed=True if store_compressed is None else store_compressed, + store=True if store is None else store, + num_proc=num_proc, + *args, + **kwargs, + ) + + # Update the Dataset's history + # TODO(karan): use mask to figure out what is actually applied + dataset.update_tape( + path=[SLICEBUILDERS, self.category], + identifiers=self.identifiers, + columns=columns, + ) + + return dataset, slices, slice_membership + + elif isinstance(batch_or_dataset, Dict): + if store_compressed is True: + print( + "Compressed storage cannot be used on a batch. " + "Please use Dataset.from_batch(batch) before " + "applying the SliceBuilder." + ) + # Slice a batch + return self.process_batch( + batch=batch_or_dataset, + columns=columns, + mask=mask, + # Don't allow compressed storage for __call__ on a batch + store_compressed=False, + # Don't store by default + store=False if store is None else store, + *args, + **kwargs, + ) + else: + raise NotImplementedError + + def __repr__(self): + return ( + f"{self.category}[{self.__class__.__name__}(num_slices={self.num_slices})]" + ) + + @property + def num_slices(self): + return len(self.identifiers) + + def __getitem__(self, item: int): + return self.identifiers[item] + + def __iter__(self): + yield from self.identifiers + + def prerequisites_handler(self, batch_or_dataset: BatchOrDataset): + if isinstance(batch_or_dataset, Dataset): + batch = batch_or_dataset[:2] + else: + batch = batch_or_dataset + + # Check if pre-requisites are satisfied + # TODO(karan): move to a method + if "cache" not in batch: + pending = self.prerequisites + else: + pending = { + prerequisite + for prerequisite in self.prerequisites + if not prerequisite.available(batch) + } + + # TODO(karan): Automatically run the pending pre-requisites + if pending: + raise RuntimeError( + f"Cannot run SliceBuilder, prerequisites {pending} not satisfied." + ) + + @staticmethod + def store(batch: Dict[str, List], updates: List[Dict]) -> Dict[str, List]: + """Update a batch of examples with slice information.""" + if "slices" not in batch: + batch["slices"] = [{} for _ in range(len(batch["index"]))] + + # For each example, recursively merge the example's original cache dictionary + # with the update dictionary + batch["slices"] = [ + recmerge(example_dict, update_dict, merge_sequences=True) + for example_dict, update_dict in zip(batch["slices"], updates) + ] + + return batch + + def prepare_batch( + self, + batch: Batch, + columns: List[str], + mask: List[int] = None, + store_compressed: bool = True, + store: bool = True, + *args, + **kwargs, + ) -> Batch: + return batch + + def prepare_dataset( + self, + dataset: Dataset, + columns: List[str], + batch_size: int = 32, + mask: List[int] = None, + store_compressed: bool = True, + store: bool = True, + *args, + **kwargs, + ) -> Dataset: + + # Compute the hash for this operation + # FIXME(karan): this is repeated inside process_dataset + val = persistent_hash(str(dataset.identifier)) ^ dataset.hash_interactions() + for i, identifier in enumerate(self.identifiers): + if not mask[i]: + val ^= persistent_hash(str(identifier) + str(strings_as_json(columns))) + + try: + return dataset.map( + partial( + self.prepare_batch, + columns=columns, + mask=mask, + store_compressed=store_compressed, + store=store, + *args, + **kwargs, + ), + batched=True, + batch_size=batch_size, + load_from_cache_file=False, + cache_file_name=str( + dataset.logdir / ("cache-" + str(abs(val)) + "-prep.arrow") + ), + ) + except: # TypeError or PicklingError or AttributeError: # noqa + # Batch the dataset, and process each batch + all_batches = [ + self.prepare_batch( + batch=batch, + columns=columns, + mask=mask, + store_compressed=store_compressed, + store=store, + *args, + **kwargs, + ) + for batch in dataset.batch(batch_size) + ] + + # Update the dataset efficiently by reusing all_batches + return dataset.map( + lambda examples, indices: all_batches[indices[0] // batch_size], + batched=True, + batch_size=batch_size, + with_indices=True, + load_from_cache_file=False, + cache_file_name=str( + dataset.logdir / ("cache-" + str(abs(val)) + "-prep.arrow") + ), + ) + + def process_dataset( + self, + dataset: Dataset, + columns: List[str], + batch_size: int = 32, + mask: List[int] = None, + store_compressed: bool = True, + store: bool = True, + num_proc: int = None, + *args, + **kwargs, + ) -> Tuple[Dataset, List[Slice], np.ndarray]: + """Apply a SliceBuilder to a dataset. + + Args: + dataset: Dataset + columns: list of columns + batch_size: integer batch size + mask: boolean or integer mask array, mask[i] = True means that the ith + slice will be masked out + store_compressed: whether to store in a compressed format + store: whether to store the results along with the example in Dataset + num_proc: num processes for multiprocessing + *args: optional additional arguments + **kwargs: optional additional keyword arguments + + Returns: tuple of (Dataset, list of Slices, matrix of (example, + slice) membership) + """ + # Prepare the dataset + dataset = self.prepare_dataset( + dataset=dataset, + columns=columns, + batch_size=batch_size, + mask=mask, + store_compressed=store_compressed, + store=store, + *args, + **kwargs, + ) + + # Compute a hash + val = persistent_hash(str(dataset.identifier)) ^ dataset.hash_interactions() + for i, identifier in enumerate(self.identifiers): + if not mask[i]: + val ^= persistent_hash(str(identifier) + str(strings_as_json(columns))) + + try: + # Map the SliceBuilder over the dataset + all_sliced_batches = [] + all_slice_memberships = [] + + def _map_fn(batch): + """Map function for processing batches. + + Note that using this map_fn in a stateful way is + dangerous, since every invocation of this function + appends to the all_slice_batches list. The .map() + function will invoke this once for testing before + performing the map, so we discard the first entry + inserted into all_sliced_batches. + """ + batch, sliced_batches, slice_membership = self.process_batch( + batch=batch, + columns=columns, + mask=mask, + store_compressed=store_compressed, + store=store, + *args, + **kwargs, + ) + all_sliced_batches.append(sliced_batches) + all_slice_memberships.append(slice_membership) + return batch + + dataset = dataset.map( + _map_fn, + batched=True, + batch_size=batch_size, + # FIXME(karan): enable this by adding logic for generating + # all_sliced_batches and all_slice_memberships + # when loading from cache file + load_from_cache_file=False, + # The cache file name is a XOR of the interaction history and the + # current operation + cache_file_name=str( + dataset.logdir / ("cache-" + str(abs(val)) + ".arrow") + ), + ) + + # Remove the first entry (see _map_fn) + all_sliced_batches = all_sliced_batches[1:] + all_slice_memberships = all_slice_memberships[1:] + + except: # noqa + # Batch the dataset, and process each batch + all_batches, all_sliced_batches, all_slice_memberships = zip( + *[ + self.process_batch( + batch=batch, + columns=columns, + mask=mask, + store_compressed=store_compressed, + store=store, + *args, + **kwargs, + ) + for batch in dataset.batch(batch_size) + ] + ) + + # Update the dataset efficiently by reusing all_batches + dataset = dataset.map( + lambda examples, indices: all_batches[indices[0] // batch_size], + batched=True, + batch_size=batch_size, + with_indices=True, + load_from_cache_file=False, + # The cache file name is a XOR of the interaction history and the + # current operation + cache_file_name=str( + dataset.logdir / ("cache-" + str(abs(val)) + ".arrow") + ), + ) + + # Create a single slice label matrix + slice_membership = np.concatenate(all_slice_memberships, axis=0) + + slice_cache_hashes = [] + for identifier in self.identifiers: + slice_cache_hashes.append(val ^ persistent_hash(str(identifier))) + + if not num_proc or num_proc == 1: + # Construct slices + slices = [] + for i, slice_batches in enumerate(zip(*all_sliced_batches)): + slices.append( + create_slice( + ( + dataset, + slice_membership, + slice_batches, + i, + batch_size, + slice_cache_hashes[i], + ) + ) + ) + else: + # Parallelized slice construction + with Pool(num_proc) as pool: + slices = pool.map( + create_slice, + [ + ( + dataset, + slice_membership, + slice_batches, + i, + batch_size, + slice_cache_hashes[i], + ) + for i, slice_batches in enumerate(zip(*all_sliced_batches)) + ], + ) + + # TODO(karan): make this more systematic + # TODO(karan): fix bug when slicing a Slice + for i, sl in enumerate(slices): + # # Set the Slice features + # sl.info.features = dataset.features + + # Set the Slice category using the SliceBuilder's category + sl.category = self.category + + # Create the lineage + sl.lineage = [ + (str(Dataset.__name__), dataset.identifier), + ( + str(self.category.capitalize()), + self.identifiers[i], + strings_as_json(columns), + ), + ] + if isinstance(dataset, Slice): + # Prepend the Slice's lineage instead, if the dataset was a slice + sl.lineage = dataset.lineage + [ + ( + str(self.category.capitalize()), + self.identifiers[i], + strings_as_json(columns), + ) + ] + + return dataset, slices, slice_membership + + def process_batch( + self, + batch: Dict[str, List], + columns: List[str], + mask: List[int] = None, + store_compressed: bool = True, + store: bool = True, + *args, + **kwargs, + ) -> Tuple[Dict[str, List], List[Dict[str, List]], Optional[np.ndarray]]: + """Apply a SliceBuilder to a batch of data. + + Args: + batch: a batch of data + columns: list of columns + mask: boolean or integer mask array, mask[i] = True means that the ith + slice will be masked out + store_compressed: whether to store in a compressed format + store: whether to store the results along with the example in Dataset + *args: optional additional arguments + **kwargs: optional additional keyword arguments + + Returns: tuple of (batch, list of slices (as batches), matrix of (example, + slice) membership)) + """ + return batch, [batch], None + + def postprocess_dataset( + self, dataset: Dataset, columns: List[str], batch_size: int = 32 + ) -> Dataset: + pass + + def apply(self, *args, **kwargs): + raise NotImplementedError("Must implement apply.") + + @classmethod + def join(cls, *slicemakers: SliceBuilder) -> Sequence[SliceBuilder]: + """Join many slicemakers. + + By default, just returns the slicemakers. + """ + return slicemakers + + def masked(self, mask: List[int]): + pass + + def unmasked(self): + pass + + @staticmethod + def filter_batch_by_slice_membership( + batch: Dict[str, List], slice_membership: np.ndarray + ) -> List[Dict[str, List]]: + """Use a matrix of slice membership labels to select the subset of + examples in each slice. + + Returns a list. Each element in the list corresponds to a single + slice, and contains the subset of examples in 'batch' that lies + in that slice. + """ + return [ + tz.valmap(lambda v: list(compress(v, s)), batch) for s in slice_membership.T + ] + + @classmethod + def retrieve( + cls, + batch: Batch, + columns: Union[List[str], List[List[str]]], + proc_fns: Union[str, Callable, List[Union[str, Callable]]] = None, + identifier: Union[str, Identifier] = None, + reapply: bool = False, + **kwargs, + ) -> Optional[Union[Batch, List[Batch]]]: + if not reapply: + if "slices" not in batch: + return None + + # Infer the most relevant key to retrieve if an identifier is not specified + if not identifier: + for ident_key in batch["slices"][0].keys(): + # Pick the first key that matches the cls name + if ident_key.startswith(cls.__name__): + identifier = ident_key + break + + try: + if isinstance(columns[0], str): + retrieval = { + strings_as_json(columns): [ + cls.decode(cache[str(identifier)][strings_as_json(columns)]) + for cache in batch["cache"] + ] + } + else: + retrieval = { + strings_as_json(cols_): [ + cls.decode(cache[str(identifier)][strings_as_json(cols_)]) + for cache in batch["cache"] + ] + for cols_ in columns + } + except KeyError: + raise ValueError("Could not retrieve information for all keys.") + + # Check if the retrieved information needs to be processed + if not proc_fns: + return retrieval + pass + else: + pass + + +class SliceBuilderCollection(SliceBuilder): + """Collection of Slice Builders.""" + + def __init__(self, slicebuilders: List[SliceBuilder], *args, **kwargs): + super(SliceBuilderCollection, self).__init__( + category=GENERIC, + identifiers=list( + tz.concat([slicebuilder.identifiers for slicebuilder in slicebuilders]) + ), + *args, + **kwargs, + ) + + # TODO(karan): some slicebuilders aren't compatible with each other (e.g. + # single column vs. multi column): + # add some smarter logic here to handle this + + # Store the subpopulations + self.slicebuilders = slicebuilders + + def __repr__(self): + # TODO(karan): format this nicely + return ( + f"{self.__class__.__name__}(" + f"{[str(slicebuilder) for slicebuilder in self.slicebuilders]})]" + ) + + def __call__( + self, + batch_or_dataset: BatchOrDataset, + columns: List[str], + mask: List[int] = None, + store_compressed: bool = None, + store: bool = None, + *args, + **kwargs, + ): + + if mask: + raise NotImplementedError( + "Mask not supported for SliceBuilderCollection yet." + ) + + slices = [] + slice_membership = [] + # Apply each slicebuilder in sequence + for i, slicebuilder in tqdm.tqdm(enumerate(self.slicebuilders)): + # Apply the slicebuilder + batch_or_dataset, slices_i, slice_membership_i = slicebuilder( + batch_or_dataset=batch_or_dataset, + columns=columns, + mask=mask, + store_compressed=store_compressed, + store=store, + *args, + **kwargs, + ) + + # Add in the slices and slice membership + slices.extend(slices_i) + slice_membership.append(slice_membership_i) + + slice_membership = np.concatenate(slice_membership, axis=1) + + return batch_or_dataset, slices, slice_membership + + +def create_slice(args): + # Unpack args + dataset, slice_membership, slice_batches, i, batch_size, slice_cache_hash = args + + # Create a new empty slice + sl = Slice.from_dict({}) + + # Create a Slice "copy" of the Dataset + sl.__dict__.update(dataset.__dict__) + sl._identifier = None + + # Filter + sl = sl.filter( + lambda example, idx: bool(slice_membership[idx, i]), + with_indices=True, + input_columns=["index"], + batch_size=batch_size, + cache_file_name=str( + dataset.logdir / ("cache-" + str(abs(slice_cache_hash)) + "-filter.arrow") + ), + ) + + slice_batch = tz.merge_with(tz.compose(list, tz.concat), slice_batches) + + # FIXME(karan): interaction tape history is wrong here, esp with augmenation/attacks + + # Map + if len(sl): + sl = sl.map( + lambda batch, indices: tz.valmap( + lambda v: v[indices[0] : indices[0] + batch_size], slice_batch + ), + batched=True, + batch_size=batch_size, + with_indices=True, + remove_columns=sl.column_names, + cache_file_name=str( + dataset.logdir / ("cache-" + str(abs(slice_cache_hash)) + ".arrow") + ), + ) + + return sl diff --git a/robustnessgym/slicebuilders/subpopulation.py b/robustnessgym/slicebuilders/subpopulation.py new file mode 100644 index 00000000..9ea7ec0f --- /dev/null +++ b/robustnessgym/slicebuilders/subpopulation.py @@ -0,0 +1,369 @@ +import json +from typing import Dict, List, Optional, Sequence, Tuple, Union + +import cytoolz as tz +import numpy as np +from multiprocess.pool import Pool +from tqdm import tqdm + +from robustnessgym.core.constants import SLICEBUILDERS, SUBPOPULATION +from robustnessgym.core.dataset import Dataset +from robustnessgym.core.identifier import Identifier +from robustnessgym.core.tools import recmerge +from robustnessgym.slicebuilders.slicebuilder import SliceBuilder + + +class Subpopulation(SliceBuilder): + def __init__(self, identifiers: List[Identifier], apply_fn=None, *args, **kwargs): + super(Subpopulation, self).__init__( + category=SUBPOPULATION, + identifiers=identifiers, + apply_fn=apply_fn, + *args, + **kwargs, + ) + + def apply( + self, + slice_membership: np.ndarray, + batch: Dict[str, List], + columns: List[str], + *args, + **kwargs + ) -> np.ndarray: + raise NotImplementedError + + def process_batch( + self, + batch: Dict[str, List], + columns: List[str], + mask: List[int] = None, + store_compressed: bool = True, + store: bool = True, + *args, + **kwargs + ) -> Tuple[Dict[str, List], List[Dict[str, List]], Optional[np.ndarray]]: + + # Determine the size of the batch + batch_size = len(batch[list(batch.keys())[0]]) + + # Construct the matrix of slice labels: (batch_size x num_slices) + slice_membership = np.zeros((batch_size, self.num_slices), dtype=np.int32) + + # Apply the SliceMaker's core functionality + slice_membership = self.apply(slice_membership, batch, columns, *args, **kwargs) + + # Store these slice labels + # TODO(karan): figure out how to set the alias + updates = self.construct_updates( + slice_membership=slice_membership, + columns=columns, + mask=mask, + compress=store_compressed, + ) + + if store: + batch = self.store( + batch=batch, + updates=updates, + ) + + return ( + batch, + self.filter_batch_by_slice_membership(batch, slice_membership), + slice_membership, + ) + + def construct_updates( + self, + slice_membership: np.ndarray, + columns: List[str], + mask: List[int] = None, + compress: bool = True, + ): + + # Mask out components + # TODO(karan): masking inside apply, but only if the components are computed + # independently + + # Construct a list of update dicts that contains the slice membership for + # each example + if compress: + # TODO(karan): this will overwrite a previous application of the same + # Slicer right now, need a merge operation + # Merge is just an append to whatever list already exists + return [ + { + self.category: { + self.__class__.__name__: { + json.dumps(columns) if len(columns) > 1 else columns[0]: row + } + } + } + for row in ( + slice_membership[:, np.logical_not(np.array(mask, dtype=bool))] + if mask + else slice_membership + ).tolist() + ] + + return [ + { + self.category: { + self.__class__.__name__: { + str(self.identifiers[i]): { + json.dumps(columns) + if len(columns) > 1 + else columns[0]: membership + } + for i, membership in enumerate(row) + if not mask or not mask[i] + }, + } + } + for row in slice_membership.tolist() + ] + + @classmethod + def union( + cls, *slicemakers: SliceBuilder, identifier: Identifier = None + ) -> SliceBuilder: + """Combine a list of slicers using a union.""" + # Group the slicers based on their class + grouped_slicers = tz.groupby(lambda s: s.__class__, slicemakers) + + # Join the slicers corresponding to each class, and flatten + slicemakers = list( + tz.concat( + tz.itemmap( + lambda item: (item[0], item[0].join(*item[1])), grouped_slicers + ).values() + ) + ) + + def apply_fn(slice_membership, batch, columns, *args, **kwargs): + # Determine the size of the batch + batch_size = len(batch[list(batch.keys())[0]]) + + # Keep track of all the slice labels + all_slice_membership = [] + + # Run each slicemaker on the batch + for slicemaker in slicemakers: + all_slice_membership.append( + slicemaker.apply( + slice_membership=np.zeros( + (batch_size, slicemaker.num_slices), dtype=np.int32 + ), + batch=batch, + columns=columns, + ) + ) + + # Concatenate all the slice labels + slice_membership = np.concatenate(all_slice_membership, axis=1) + + # Take the union over the slices (columns) + slice_membership = np.any(slice_membership, axis=1).astype(np.int32)[ + :, np.newaxis + ] + + return slice_membership + + return Subpopulation(identifiers=[identifier], apply_fn=apply_fn) + + @classmethod + def intersection( + cls, *slicemakers: SliceBuilder, identifier: Identifier = None + ) -> SliceBuilder: + """Combine a list of slicemakers using an intersection.""" + # Group the slicemakers based on their class + grouped_slicemakers = tz.groupby(lambda s: s.__class__, slicemakers) + + # Join the slicemakers corresponding to each class, and flatten + slicemakers = list( + tz.concat( + tz.itemmap( + lambda item: (item[0], item[0].join(*item[1])), grouped_slicemakers + ).values() + ) + ) + + def apply_fn(slice_membership, batch, columns, *args, **kwargs): + # Determine the size of the batch + batch_size = len(batch[list(batch.keys())[0]]) + + # Keep track of all the slice labels + all_slice_membership = [] + + # Run each slicemaker on the batch + for slicemaker in slicemakers: + all_slice_membership.append( + slicemaker.apply( + slice_membership=np.zeros( + (batch_size, slicemaker.num_slices), dtype=np.int32 + ), + batch=batch, + columns=columns, + ) + ) + + # Concatenate all the slice labels + slice_membership = np.concatenate(all_slice_membership, axis=1) + + # Take the union over the slices (columns) + slice_membership = np.all(slice_membership, axis=1).astype(np.int32)[ + :, np.newaxis + ] + + return slice_membership + + return Subpopulation(identifiers=[identifier], apply_fn=apply_fn) + + +class SubpopulationCollection(Subpopulation): + def __init__(self, subpopulations: Sequence[Subpopulation], *args, **kwargs): + + super(SubpopulationCollection, self).__init__( + identifiers=list( + tz.concat( + [subpopulation.identifiers for subpopulation in subpopulations] + ) + ), + *args, + **kwargs, + ) + + # TODO(karan): some subpopulations aren't compatible with each other (e.g. + # single column vs. multi column): + # add some smarter logic here to handle this + + # Store the subpopulations + self.subpopulations = subpopulations + + def __call__( + self, + batch_or_dataset: Union[Dict[str, List], Dataset], + columns: List[str], + mask: List[int] = None, + store_compressed: bool = None, + store: bool = None, + num_proc: int = None, + *args, + **kwargs + ): + + if mask: + raise NotImplementedError( + "Mask not supported for SubpopulationCollection yet." + ) + + if not num_proc or num_proc == 1: + slices = [] + slice_membership = [] + # Apply each slicebuilder in sequence + for i, slicebuilder in tqdm(enumerate(self.subpopulations)): + # Apply the slicebuilder + batch_or_dataset, slices_i, slice_membership_i = slicebuilder( + batch_or_dataset=batch_or_dataset, + columns=columns, + mask=mask, + store_compressed=store_compressed, + store=store, + *args, + **kwargs, + ) + + # Add in the slices and slice membership + slices.extend(slices_i) + slice_membership.append(slice_membership_i) + + else: + # TODO(karan): cleanup, make mp.Pool support simpler across the library + with Pool(num_proc) as pool: + batches_or_datasets, slices, slice_membership = zip( + *pool.map( + lambda sb: sb( + batch_or_dataset=batch_or_dataset, + columns=columns, + mask=mask, + store_compressed=store_compressed, + store=store, + *args, + **kwargs, + ), + [slicebuilder for slicebuilder in self.subpopulations], + ) + ) + + # Combine all the slices + slices = list(tz.concat(slices)) + + def _store_updates(batch, indices): + + # Each Subpopulation will generate slices + for i, subpopulation in enumerate(self.subpopulations): + updates = subpopulation.construct_updates( + slice_membership=slice_membership[i][indices], + columns=columns, + mask=mask, + # TODO(karan): this option should be set correctly + compress=True, + ) + + batch = subpopulation.store( + batch=batch, + updates=updates, + ) + + return batch + + if isinstance(batch_or_dataset, Dataset): + batch_or_dataset = batch_or_dataset.map( + _store_updates, + with_indices=True, + batched=True, + ) + + for subpopulation in self.subpopulations: + # Update the Dataset's history + batch_or_dataset.update_tape( + path=[SLICEBUILDERS, subpopulation.category], + identifiers=subpopulation.identifiers, + columns=columns, + ) + + else: + batch_or_dataset = recmerge(*batches_or_datasets, merge_sequences=True) + + # Combine all the slice membership matrices + slice_membership = np.concatenate(slice_membership, axis=1) + + return batch_or_dataset, slices, slice_membership + + def apply( + self, + slice_membership: np.ndarray, + batch: Dict[str, List], + columns: List[str], + *args, + **kwargs + ) -> np.ndarray: + # Each Subpopulation will generate slices + for subpopulation, end_idx in zip( + self.subpopulations, np.cumsum([s.num_slices for s in self.subpopulations]) + ): + # Fill out the slice_membership + slice_membership[ + :, end_idx - subpopulation.num_slices : end_idx + ] = subpopulation.apply( + slice_membership=slice_membership[ + :, end_idx - subpopulation.num_slices : end_idx + ], + batch=batch, + columns=columns, + ) + + return slice_membership + + # TODO(karan): add combinations for collections diff --git a/robustnessgym/slicebuilders/subpopulations/__init__.py b/robustnessgym/slicebuilders/subpopulations/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/robustnessgym/slicebuilders/subpopulations/constituency_overlap.py b/robustnessgym/slicebuilders/subpopulations/constituency_overlap.py new file mode 100644 index 00000000..7ec25fab --- /dev/null +++ b/robustnessgym/slicebuilders/subpopulations/constituency_overlap.py @@ -0,0 +1,136 @@ +from typing import Dict, List + +import fuzzywuzzy.fuzz as fuzz +import numpy as np +from nltk import Tree + +from robustnessgym.cachedops.allen.constituency_parser import AllenConstituencyParser +from robustnessgym.core.decorators import prerequisites +from robustnessgym.core.identifier import Identifier +from robustnessgym.slicebuilders.subpopulations.score import ScoreSubpopulation + + +@prerequisites(AllenConstituencyParser) +class ConstituencyOverlapSubpopulation(ScoreSubpopulation): + def score( + self, batch: Dict[str, List], columns: List[str], *args, **kwargs + ) -> np.ndarray: + # Require that the number of keys is exactly 2 + assert len(columns) == 2, "Must specify exactly 2 keys." + + # Retrieve the trees + trees = AllenConstituencyParser.retrieve( + batch=batch, columns=[[key] for key in columns] + ) + trees_0, trees_1 = trees[columns[0]], trees[columns[1]] + + # Fuzzy match the trees and return the `scores` + return np.array( + [ + fuzz.partial_token_set_ratio( + tree_0.replace("(", "").replace(")", "").replace(" ", ""), + tree_1.replace("(", "").replace(")", "").replace(" ", ""), + ) + for tree_0, tree_1 in zip(trees_0, trees_1) + ] + ) + + +@prerequisites(AllenConstituencyParser) +class ConstituencySubtreeSubpopulation(ScoreSubpopulation): + def __init__(self, *args, **kwargs): + super(ConstituencySubtreeSubpopulation, self).__init__( + intervals=[(1, 1)], + identifiers=[Identifier(_name=self.__class__.__name__)], + *args, + **kwargs + ) + + def score( + self, batch: Dict[str, List], columns: List[str], *args, **kwargs + ) -> np.ndarray: + # Require that the number of keys is exactly 2 + assert len(columns) == 2, "Must specify exactly 2 keys." + + # Retrieve the trees + trees = AllenConstituencyParser.retrieve( + batch=batch, columns=[[column] for column in columns] + ) + trees_0, trees_1 = trees[columns[0]], trees[columns[1]] + + # Convert the trees corresponding to key 0 to NLTK trees + trees_0 = [Tree.fromstring(tree) for tree in trees_0] + + # Find all subtrees of these trees + all_subtrees_0 = [ + set( + [ + str(t).replace("\n", "").replace(" ", "").lower() + for t in tree_0.subtrees() + ] + ) + for tree_0 in trees_0 + ] + + # Output a score of 1 if the tree corresponding to key 1 lies in any subtree + return np.array( + [ + int( + tree_1.replace(" ", "") + .replace("(..)", "") + .replace("(,,)", "") + .lower() + in subtrees_0 + ) + for tree_1, subtrees_0 in zip(trees_1, all_subtrees_0) + ] + ) + + +@prerequisites(AllenConstituencyParser) +class FuzzyConstituencySubtreeSubpopulation(ScoreSubpopulation): + def score( + self, batch: Dict[str, List], columns: List[str], *args, **kwargs + ) -> np.ndarray: + # Require that the number of keys is exactly 2 + assert len(columns) == 2, "Must specify exactly 2 keys." + + # Retrieve the trees + trees = AllenConstituencyParser.retrieve( + batch=batch, columns=[[column] for column in columns] + ) + trees_0, trees_1 = trees[columns[0]], trees[columns[1]] + + # Convert the trees corresponding to key 0 to NLTK trees + trees_0 = [Tree.fromstring(tree) for tree in trees_0] + + # Find all subtrees of these trees + all_subtrees_0 = [ + set( + [ + str(t).replace("\n", "").replace(" ", "").lower() + for t in tree_0.subtrees() + ] + ) + for tree_0 in trees_0 + ] + + # Output a fuzzy score if the tree corresponding to key 1 is similar to any + # subtree + return np.array( + [ + max( + [ + fuzz.partial_ratio( + tree_1.replace(" ", "") + .replace("(..)", "") + .replace("(,,)", "") + .lower(), + subtree, + ) + for subtree in subtrees_0 + ] + ) + for tree_1, subtrees_0 in zip(trees_1, all_subtrees_0) + ] + ) diff --git a/robustnessgym/slicebuilders/subpopulations/entity_frequency.py b/robustnessgym/slicebuilders/subpopulations/entity_frequency.py new file mode 100644 index 00000000..42bf9b1c --- /dev/null +++ b/robustnessgym/slicebuilders/subpopulations/entity_frequency.py @@ -0,0 +1,92 @@ +from __future__ import annotations + +from collections import Counter +from typing import Dict, List, Tuple + +import numpy as np + +from robustnessgym.cachedops.spacy import Spacy +from robustnessgym.core.dataset import Dataset +from robustnessgym.core.identifier import Identifier +from robustnessgym.slicebuilders.subpopulation import Subpopulation + + +# TODO(karan): Inherit from MultiScoreSubpopulation +class EntityFrequency(Subpopulation, Spacy): + def __init__(self, entity_thresholds: List[Tuple[str, List[int]]], *args, **kwargs): + + identifiers = [] + for entity_type, thresholds in entity_thresholds: + for threshold in thresholds: + identifiers.append( + Identifier( + _name=self.__class__.__name__, + entity_type=entity_type, + threshold=threshold, + ) + ) + + super(EntityFrequency, self).__init__(identifiers, *args, **kwargs) + + if len(entity_thresholds) == 0: + raise ValueError("At least one entity type required") + + for entity_type, _ in entity_thresholds: + if entity_type not in [ + "PERSON", + "NORP", + "FAC", + "ORG", + "GPE", + "LOC", + "PRODUCT", + "EVENT", + "WORK_OF_ART", + "LAW", + "LANGUAGE", + "DATE", + "TIME", + "PERCENT", + "MONEY", + "QUANTITY", + "ORDINAL", + "CARDINAL", + ]: + raise ValueError(f"Invalid entity type: {entity_type}") + + # List of tuples, each of which contains an entity type and a list of + # associated frequency thresholds + self.entity_thresholds = entity_thresholds + + @classmethod + def from_dataset( + cls, dataset: Dataset, entity_percentiles: List[Tuple[str, List[float]]] + ) -> EntityFrequency: + """Determine thresholds from dataset and specific percentiles.""" + raise NotImplementedError + + def apply( + self, + slice_membership: np.ndarray, + batch: Dict[str, List], + columns: List[str], + *args, + **kwargs, + ) -> np.ndarray: + + if len(columns) != 1: + raise ValueError("Only one key allowed") + key = columns[0] + + for i, cache_item in enumerate(batch["cache"]): + entities = cache_item["Spacy"][key]["ents"] + entity_types = [ent["label"] for ent in entities] + counts = Counter(entity_types) + slice_ndx = 0 + for entity_type, thresholds in self.entity_thresholds: + for threshold in thresholds: + if counts[entity_type] >= threshold: + slice_membership[i, slice_ndx] = 1 + slice_ndx += 1 + + return slice_membership diff --git a/robustnessgym/slicebuilders/subpopulations/hans.py b/robustnessgym/slicebuilders/subpopulations/hans.py new file mode 100644 index 00000000..36815709 --- /dev/null +++ b/robustnessgym/slicebuilders/subpopulations/hans.py @@ -0,0 +1,494 @@ +"""Taken from https://github.com/tommccoy1/hans/blob/master/templates.py.""" + +from robustnessgym.core.identifier import Identifier +from robustnessgym.slicebuilders.subpopulation import SubpopulationCollection +from robustnessgym.slicebuilders.subpopulations.phrase import HasAnyPhrase + + +class HansAllPhrases(SubpopulationCollection): + def __init__(self, *args, **kwargs): + super(HansAllPhrases, self).__init__( + subpopulations=HasAnyPhrase.join( + *[ + HansSingularNouns(), + HansPluralNouns(), + HansTransitiveVerbs(), + HansPassiveVerbs(), + HansIntransitiveVerbs(), + HansNPSVerbs(), + HansNPZVerbs(), + HansPluralNPZVerbs(), + HansPrepositions(), + HansConjs(), + HansPastParticiples(), + HansUnderstoodArgumentVerbs(), + HansNonEntQuotVerbs(), + HansQuestionEmbeddingVerbs(), + HansCalledObjects(), + HansToldObjects(), + HansFoodWords(), + HansLocationNounsA(), + HansLocationNounsB(), + HansWonObjects(), + HansReadWroteObjects(), + HansAdjectives(), + HansAdjectivesCompNonEnt(), + HansAdjectivesCompEnt(), + HansAdverbs(), + HansConstAdv(), + HansConstQuotEntailed(), + HansRelations(), + HansQuestions(), + HansNonEntComplementNouns(), + HansEntComplementNouns(), + HansAdvsNonEntailed(), + HansAdvsEntailed(), + ] + ), + *args, + **kwargs + ) + + +class HansSingularNouns(HasAnyPhrase): + def __init__(self): + super(HansSingularNouns, self).__init__( + phrase_groups=[ + [ + "professor", + "student", + "president", + "judge", + "senator", + "secretary", + "doctor", + "lawyer", + "scientist", + "banker", + "tourist", + "manager", + "artist", + "author", + "actor", + "athlete", + ] + ], + identifiers=[Identifier(_name=self.__class__.__name__)], + ) + + +class HansPluralNouns(HasAnyPhrase): + def __init__(self): + super(HansPluralNouns, self).__init__( + phrase_groups=[ + [ + "professors", + "students", + "presidents", + "judges", + "senators", + "secretaries", + "doctors", + "lawyers", + "scientists", + "bankers", + "tourists", + "managers", + "artists", + "authors", + "actors", + "athletes", + ] + ], + identifiers=[Identifier(_name=self.__class__.__name__)], + ) + + +class HansTransitiveVerbs(HasAnyPhrase): + def __init__(self): + super(HansTransitiveVerbs, self).__init__( + phrase_groups=[ + [ + "recommended", + "called", + "helped", + "supported", + "contacted", + "believed", + "avoided", + "advised", + "saw", + "stopped", + "introduced", + "mentioned", + "encouraged", + "thanked", + "recognized", + "admired", + ] + ], + identifiers=[Identifier(_name=self.__class__.__name__)], + ) + + +class HansPassiveVerbs(HasAnyPhrase): + def __init__(self): + super(HansPassiveVerbs, self).__init__( + phrase_groups=[ + [ + "recommended", + "helped", + "supported", + "contacted", + "believed", + "avoided", + "advised", + "stopped", + "introduced", + "mentioned", + "encouraged", + "thanked", + "recognized", + "admired", + ] + ], + identifiers=[Identifier(_name=self.__class__.__name__)], + ) + + +class HansIntransitiveVerbs(HasAnyPhrase): + def __init__(self): + super(HansIntransitiveVerbs, self).__init__( + phrase_groups=[ + [ + "slept", + "danced", + "ran", + "shouted", + "resigned", + "waited", + "arrived", + "performed", + ] + ], + identifiers=[Identifier(_name=self.__class__.__name__)], + ) + + +class HansNPSVerbs(HasAnyPhrase): + def __init__(self): + super(HansNPSVerbs, self).__init__( + phrase_groups=[ + [ + "believed", + "knew", + "heard", + "forgot", + "preferred", + "claimed", + "wanted", + "needed", + "found", + "suggested", + "expected", + ] + ], + identifiers=[Identifier(_name=self.__class__.__name__)], + ) + + +class HansNPZVerbs(HasAnyPhrase): + def __init__(self): + super(HansNPZVerbs, self).__init__( + phrase_groups=[["hid", "moved", "presented", "paid", "studied", "stopped"]], + identifiers=[Identifier(_name=self.__class__.__name__)], + ) + + +class HansPluralNPZVerbs(HasAnyPhrase): + def __init__(self): + super(HansPluralNPZVerbs, self).__init__( + phrase_groups=[ + [ + "fought", + "paid", + "changed", + "studied", + "answered", + "stopped", + "grew", + "moved", + "returned", + "left", + "improved", + "lost", + "visited", + "ate", + "played", + ] + ], + identifiers=[Identifier(_name=self.__class__.__name__)], + ) + + +class HansPrepositions(HasAnyPhrase): + def __init__(self): + super(HansPrepositions, self).__init__( + phrase_groups=[["near", "behind", "by", "in front of", "next to"]], + identifiers=[Identifier(_name=self.__class__.__name__)], + ) + + +class HansConjs(HasAnyPhrase): + def __init__(self): + super(HansConjs, self).__init__( + phrase_groups=[ + ["while", "after", "before", "when", "although", "because", "since"] + ], + identifiers=[Identifier(_name=self.__class__.__name__)], + ) + + +class HansPastParticiples(HasAnyPhrase): + def __init__(self): + super(HansPastParticiples, self).__init__( + phrase_groups=[["studied", "paid", "helped", "investigated", "presented"]], + identifiers=[Identifier(_name=self.__class__.__name__)], + ) + + +class HansUnderstoodArgumentVerbs(HasAnyPhrase): + def __init__(self): + super(HansUnderstoodArgumentVerbs, self).__init__( + phrase_groups=[["paid", "explored", "won", "wrote", "left", "read", "ate"]], + identifiers=[Identifier(_name=self.__class__.__name__)], + ) + + +class HansNonEntQuotVerbs(HasAnyPhrase): + def __init__(self): + super(HansNonEntQuotVerbs, self).__init__( + phrase_groups=[ + ["hoped", "claimed", "thought", "believed", "said", "assumed"] + ], + identifiers=[Identifier(_name=self.__class__.__name__)], + ) + + +class HansQuestionEmbeddingVerbs(HasAnyPhrase): + def __init__(self): + super(HansQuestionEmbeddingVerbs, self).__init__( + phrase_groups=[ + ["wondered", "understood", "knew", "asked", "explained", "realized"] + ], + identifiers=[Identifier(_name=self.__class__.__name__)], + ) + + +class HansCalledObjects(HasAnyPhrase): + def __init__(self): + super(HansCalledObjects, self).__init__( + phrase_groups=[["coward", "liar", "hero", "fool"]], + identifiers=[Identifier(_name=self.__class__.__name__)], + ) + + +class HansToldObjects(HasAnyPhrase): + def __init__(self): + super(HansToldObjects, self).__init__( + phrase_groups=[["story", "lie", "truth", "secret"]], + identifiers=[Identifier(_name=self.__class__.__name__)], + ) + + +class HansFoodWords(HasAnyPhrase): + def __init__(self): + super(HansFoodWords, self).__init__( + phrase_groups=[ + ["fruit", "salad", "broccoli", "sandwich", "rice", "corn", "ice cream"] + ], + identifiers=[Identifier(_name=self.__class__.__name__)], + ) + + +class HansLocationNounsA(HasAnyPhrase): + def __init__(self): + super(HansLocationNounsA, self).__init__( + phrase_groups=[ + [ + "neighborhood", + "region", + "country", + "town", + "valley", + "forest", + "garden", + "museum", + "desert", + "island", + "town", + ] + ], + identifiers=[Identifier(_name=self.__class__.__name__)], + ) + + +class HansLocationNounsB(HasAnyPhrase): + def __init__(self): + super(HansLocationNounsB, self).__init__( + phrase_groups=[["museum", "school", "library", "office", "laboratory"]], + identifiers=[Identifier(_name=self.__class__.__name__)], + ) + + +class HansWonObjects(HasAnyPhrase): + def __init__(self): + super(HansWonObjects, self).__init__( + phrase_groups=[ + [ + "race", + "contest", + "war", + "prize", + "competition", + "election", + "battle", + "award", + "tournament", + ] + ], + identifiers=[Identifier(_name=self.__class__.__name__)], + ) + + +class HansReadWroteObjects(HasAnyPhrase): + def __init__(self): + super(HansReadWroteObjects, self).__init__( + phrase_groups=[ + [ + "book", + "column", + "report", + "poem", + "letter", + "novel", + "story", + "play", + "speech", + ] + ], + identifiers=[Identifier(_name=self.__class__.__name__)], + ) + + +class HansAdjectives(HasAnyPhrase): + def __init__(self): + super(HansAdjectives, self).__init__( + phrase_groups=[ + [ + "important", + "popular", + "famous", + "young", + "happy", + "helpful", + "serious", + "angry", + ] + ], + identifiers=[Identifier(_name=self.__class__.__name__)], + ) + + +class HansAdjectivesCompNonEnt(HasAnyPhrase): + def __init__(self): + super(HansAdjectivesCompNonEnt, self).__init__( + phrase_groups=[["afraid", "sure", "certain"]], + identifiers=[Identifier(_name=self.__class__.__name__)], + ) + + +class HansAdjectivesCompEnt(HasAnyPhrase): + def __init__(self): + super(HansAdjectivesCompEnt, self).__init__( + phrase_groups=[["sorry", "aware", "glad"]], + identifiers=[Identifier(_name=self.__class__.__name__)], + ) + + +class HansAdverbs(HasAnyPhrase): + def __init__(self): + super(HansAdverbs, self).__init__( + phrase_groups=[ + ["quickly", "slowly", "happily", "easily", "quietly", "thoughtfully"] + ], + identifiers=[Identifier(_name=self.__class__.__name__)], + ) + + +class HansConstAdv(HasAnyPhrase): + def __init__(self): + super(HansConstAdv, self).__init__( + phrase_groups=[ + ["after", "before", "because", "although", "though", "since", "while"] + ], + identifiers=[Identifier(_name=self.__class__.__name__)], + ) + + +class HansConstQuotEntailed(HasAnyPhrase): + def __init__(self): + super(HansConstQuotEntailed, self).__init__( + phrase_groups=[["forgot", "learned", "remembered", "knew"]], + identifiers=[Identifier(_name=self.__class__.__name__)], + ) + + +class HansRelations(HasAnyPhrase): + def __init__(self): + super(HansRelations, self).__init__( + phrase_groups=[["who", "that"]], + identifiers=[Identifier(_name=self.__class__.__name__)], + ) + + +class HansQuestions(HasAnyPhrase): + def __init__(self): + super(HansQuestions, self).__init__( + phrase_groups=[["why", "how"]], + identifiers=[Identifier(_name=self.__class__.__name__)], + ) + + +class HansNonEntComplementNouns(HasAnyPhrase): + def __init__(self): + super(HansNonEntComplementNouns, self).__init__( + phrase_groups=[["feeling", "evidence", "idea", "belief"]], + identifiers=[Identifier(_name=self.__class__.__name__)], + ) + + +class HansEntComplementNouns(HasAnyPhrase): + def __init__(self): + super(HansEntComplementNouns, self).__init__( + phrase_groups=[["fact", "reason", "news", "time"]], + identifiers=[Identifier(_name=self.__class__.__name__)], + ) + + +class HansAdvsNonEntailed(HasAnyPhrase): + def __init__(self): + super(HansAdvsNonEntailed, self).__init__( + phrase_groups=[["supposedly", "probably", "maybe", "hopefully"]], + identifiers=[Identifier(_name=self.__class__.__name__)], + ) + + +class HansAdvsEntailed(HasAnyPhrase): + def __init__(self): + super(HansAdvsEntailed, self).__init__( + phrase_groups=[ + ["certainly", "definitely", "clearly", "obviously", "suddenly"] + ], + identifiers=[Identifier(_name=self.__class__.__name__)], + ) diff --git a/robustnessgym/slicebuilders/subpopulations/length.py b/robustnessgym/slicebuilders/subpopulations/length.py new file mode 100644 index 00000000..e822f559 --- /dev/null +++ b/robustnessgym/slicebuilders/subpopulations/length.py @@ -0,0 +1,62 @@ +from __future__ import annotations + +from typing import Callable, Dict, List, Tuple + +import cytoolz as tz +import numpy as np + +from robustnessgym.cachedops.spacy import Spacy +from robustnessgym.core.decorators import prerequisites +from robustnessgym.core.identifier import Identifier +from robustnessgym.slicebuilders.subpopulations.score import ScoreSubpopulation + + +@prerequisites(Spacy) +class LengthSubpopulation(ScoreSubpopulation): + """Class to compute subpopulations based on text length.""" + + def __init__( + self, + intervals: List[Tuple[int, int]], + reduction_fn: Callable = np.sum, + *args, + **kwargs + ): + super(LengthSubpopulation, self).__init__( + intervals=intervals, + identifiers=[ + Identifier( + _name=self.__class__.__name__, + gte=interval[0], + lte=interval[1], + reduction_fn=reduction_fn, + ) + for interval in intervals + ], + *args, + **kwargs, + ) + + # Assign the reduction fn + self.reduction_fn = reduction_fn + + def score( + self, batch: Dict[str, List], columns: List[str], *args, **kwargs + ) -> np.ndarray: + # Compute the length of each example under each key + lengths = [ + Spacy.retrieve( + batch=batch, + columns=[key], + proc_fns=tz.compose( + # Compute lengths (# of words) for each tokenized text in a batch + lambda l: np.array([len(t) for t in l]), + # Extract tokens using Spacy + Spacy.tokens, + ), + )[key] + for key in columns + ] + + # Reduction over the key axis + return self.reduction_fn(np.array(lengths), axis=0) diff --git a/robustnessgym/slicebuilders/subpopulations/lexical_overlap.py b/robustnessgym/slicebuilders/subpopulations/lexical_overlap.py new file mode 100644 index 00000000..79b1edb6 --- /dev/null +++ b/robustnessgym/slicebuilders/subpopulations/lexical_overlap.py @@ -0,0 +1,39 @@ +from typing import Dict, List + +import cytoolz as tz +import numpy as np + +from robustnessgym.cachedops.spacy import Spacy +from robustnessgym.slicebuilders.subpopulations.score import ScoreSubpopulation + + +class LexicalOverlapSubpopulation(ScoreSubpopulation): + def score( + self, batch: Dict[str, List], columns: List[str], *args, **kwargs + ) -> np.ndarray: + # Require that the number of keys is exactly 2 + assert len(columns) == 2, "Must specify exactly 2 keys." + + # Retrieve the tokens after lower-casing and placing into a set + tokens = Spacy.retrieve( + batch=batch, + columns=[[key] for key in columns], + proc_fns=tz.compose( + # Lower case and put the tokens in a set for each tokenized text in + # the batch + lambda l: np.array( + [set([str(tok).lower() for tok in toks]) for toks in l] + ), + # Tokenize + Spacy.tokens, + ), + ) + + # Compute the intersection over union score + return np.array( + [ + len(tokens_0.intersection(tokens_1)) + / float(len(tokens_0.union(tokens_1))) + for tokens_0, tokens_1 in zip(tokens[columns[0]], tokens[columns[1]]) + ] + ) diff --git a/robustnessgym/slicebuilders/subpopulations/phrase.py b/robustnessgym/slicebuilders/subpopulations/phrase.py new file mode 100644 index 00000000..99a8aacf --- /dev/null +++ b/robustnessgym/slicebuilders/subpopulations/phrase.py @@ -0,0 +1,426 @@ +from __future__ import annotations + +from typing import Any, Dict, List, Sequence + +import cytoolz as tz +import numpy as np +from ahocorasick import Automaton + +from robustnessgym.cachedops.spacy import Spacy +from robustnessgym.core.identifier import Identifier +from robustnessgym.slicebuilders.subpopulation import Subpopulation + + +class AhoCorasick: + def __init__(self, *args, **kwargs): + super(AhoCorasick, self).__init__(*args, **kwargs) + + # Use the Aho-Corasick search algorithm to speed up phrase lookups + self.automaton = Automaton() + + @classmethod + def from_phrases(cls, phrases: Dict[Any, str]) -> AhoCorasick: + # Create a new automaton + ahocorasick = cls() + + # Add all the phrases we want to search for + for key, phrase in phrases.items(): + # As values, we add the key of the phrase + ahocorasick.automaton.add_word(phrase, key) + + # Initialize Aho-Corasick + ahocorasick.automaton.make_automaton() + + return ahocorasick + + +class HasPhrase( + Subpopulation, + # Spacy +): + def __init__( + self, phrases=None, identifiers: List[Identifier] = None, *args, **kwargs + ): + + super(HasPhrase, self).__init__( + # One slice per phrase + identifiers=[ + Identifier(_name=self.__class__.__name__, phrase=phrase) + for phrase in phrases + ] + if not identifiers + else identifiers, + *args, + **kwargs + ) + + # This is the list of phrases that will be searched + self.phrases = phrases + if self.phrases is None: + self.phrases = [] + + # Create and populate Aho-Corasick automatons for words and phrases + self.word_ahocorasick = AhoCorasick.from_phrases( + {i: phrase for i, phrase in enumerate(self.phrases) if " " not in phrase} + ) + self.phrase_ahocorasick = AhoCorasick.from_phrases( + {i: phrase for i, phrase in enumerate(self.phrases) if " " in phrase} + ) + + @classmethod + def from_file(cls, path: str) -> Subpopulation: + """Load phrases from a file, one per line.""" + with open(path) as f: + phrases = [line.strip() for line in f.readlines()] + return cls(phrases=phrases) + + @classmethod + def default(cls) -> Subpopulation: + """A default vocabulary of phrases to search.""" + return cls(phrases=[]) + + @classmethod + def join(cls, *slicers: HasPhrase) -> Sequence[HasPhrase]: + """Join to combine multiple HasPhrase slicers.""" + return [ + HasPhrase(phrases=list(tz.concat([slicer.phrases for slicer in slicers]))) + ] + + def apply( + self, + slice_membership: np.ndarray, + batch: Dict[str, List], + columns: List[str], + *args, + **kwargs + ) -> np.ndarray: + + # Use the spacy cache to grab the tokens in each example (for each key) + tokenized_batch = Spacy.retrieve( + batch=batch, + columns=[[key] for key in columns], + proc_fns="tokens", + ) + + # Search for words + if len(self.word_ahocorasick.automaton) > 0: + for key, tokens_batch in tokenized_batch.items(): + for i, tokens in enumerate(tokens_batch): + # Get the values (indices) of all the matched tokens + matched_indices = [ + self.word_ahocorasick.automaton.get(token) + for token in tokens + if self.word_ahocorasick.automaton.exists(token) + ] + + # Fill in the slice labels for slices that are present + slice_membership[i, matched_indices] = 1 + + # Search for phrases + if len(self.phrase_ahocorasick.automaton) > 0: + for key in columns: + for i, example in enumerate(batch[key]): + # Get the values (indices) of all the matched phrases + matched_indices = [ + index + for _, index in self.phrase_ahocorasick.automaton.iter(example) + ] + + # Fill in the slice labels for slices that are present + slice_membership[i, matched_indices] = 1 + + return slice_membership + + +# class HasAnyPhrase(Subpopulation): +# +# def __init__(self, +# phrases: List[str] = None, +# identifier: Identifier = None, +# *args, +# **kwargs): +# # Take the union of the phrases +# subpopulation = Subpopulation.union( +# HasPhrase(phrases=phrases), +# identifier=Identifier( +# _name=self.__class__.__name__, +# phrases=set(phrases), +# ) if not identifier else identifier +# ) +# +# super(HasAnyPhrase, self).__init__( +# identifiers=subpopulation.identifiers, +# apply_fn=subpopulation.apply, +# *args, +# **kwargs +# ) + + +class HasAnyPhrase(Subpopulation): + def __init__( + self, + phrase_groups: List[List[str]] = None, + identifiers: List[Identifier] = None, + *args, + **kwargs + ): + + # Keep track of the phrase groups + self.phrase_groups = phrase_groups + + if identifiers: + assert len(identifiers) == len( + phrase_groups + ), "Must have one identifier per phrase group." + + self.subpopulations = [] + # For every phrase group + for i, phrases in enumerate(phrase_groups): + # Take the union of the phrases + self.subpopulations.append( + Subpopulation.union( + HasPhrase(phrases=phrases), + identifier=Identifier( + _name=self.__class__.__name__, + phrases=set(phrases), + ) + if not identifiers + else identifiers[i], + ) + ) + + super(HasAnyPhrase, self).__init__( + identifiers=list( + tz.concat( + [subpopulation.identifiers for subpopulation in self.subpopulations] + ) + ), + *args, + **kwargs + ) + + def apply( + self, + slice_membership: np.ndarray, + batch: Dict[str, List], + columns: List[str], + *args, + **kwargs + ) -> np.ndarray: + + # Run all the subpopulations in sequence to update the slice membership matrix + for i, subpopulation in enumerate(self.subpopulations): + slice_membership[:, i : i + 1] = subpopulation.apply( + slice_membership=slice_membership[:, i : i + 1], + batch=batch, + columns=columns, + *args, + **kwargs + ) + + return slice_membership + + @classmethod + def join(cls, *slicebuilders: HasAnyPhrase) -> Sequence[HasAnyPhrase]: + # Join all the slicebuilders + return [ + HasAnyPhrase( + phrase_groups=[ + phrases + for slicebuilder in slicebuilders + for phrases in slicebuilder.phrase_groups + ], + identifiers=[ + identifier + for slicebuilder in slicebuilders + for identifier in slicebuilder.identifiers + ], + ) + ] + + +class HasAllPhrases(Subpopulation): + def __init__( + self, + phrase_groups: List[List[str]] = None, + identifiers: List[Identifier] = None, + *args, + **kwargs + ): + + # Keep track of the phrase groups + self.phrase_groups = phrase_groups + + if identifiers: + assert len(identifiers) == len( + phrase_groups + ), "Must have one identifier per phrase group." + + self.subpopulations = [] + # For every phrase group + for i, phrases in enumerate(phrase_groups): + # Take the union of the phrases + self.subpopulations.append( + Subpopulation.intersection( + HasPhrase(phrases=phrases), + identifier=Identifier( + _name=self.__class__.__name__, + phrases=set(phrases), + ) + if not identifiers + else identifiers[i], + ) + ) + + super(HasAllPhrases, self).__init__( + identifiers=list( + tz.concat( + [subpopulation.identifiers for subpopulation in self.subpopulations] + ) + ), + *args, + **kwargs + ) + + def apply( + self, + slice_membership: np.ndarray, + batch: Dict[str, List], + columns: List[str], + *args, + **kwargs + ) -> np.ndarray: + + # Run all the subpopulations in sequence to update the slice membership matrix + for i, subpopulation in enumerate(self.subpopulations): + slice_membership[:, i : i + 1] = subpopulation.apply( + slice_membership=slice_membership[:, i : i + 1], + batch=batch, + columns=columns, + *args, + **kwargs + ) + + return slice_membership + + @classmethod + def join(cls, *slicebuilders: HasAllPhrases) -> Sequence[HasAllPhrases]: + # Join all the slicebuilders + return [ + HasAllPhrases( + phrase_groups=[ + phrases + for slicebuilder in slicebuilders + for phrases in slicebuilder.phrase_groups + ], + identifiers=[ + identifier + for slicebuilder in slicebuilders + for identifier in slicebuilder.identifiers + ], + ) + ] + + +# class HasAllPhrases(Subpopulation): +# +# def __init__(self, +# phrases=None, +# identifier: Identifier = None, +# *args, +# **kwargs): +# # Take the intersection of the phrases +# subpopulation = Subpopulation.intersection( +# HasPhrase(phrases=phrases), +# identifier=Identifier( +# _name=self.__class__.__name__, +# phrases=set(phrases), +# ) if not identifier else identifier, +# ) +# +# super(HasAllPhrases, self).__init__( +# identifiers=subpopulation.identifiers, +# apply_fn=subpopulation.apply, +# *args, +# **kwargs +# ) + + +class HasIndefiniteArticle(HasAnyPhrase): + def __init__(self): + super(HasIndefiniteArticle, self).__init__( + phrase_groups=[["a", "an"]], + identifiers=[Identifier(_name=self.__class__.__name__)], + ) + + +class HasDefiniteArticle(HasAnyPhrase): + def __init__(self): + super(HasDefiniteArticle, self).__init__( + phrase_groups=[["the"]], + identifiers=[Identifier(_name=self.__class__.__name__)], + ) + + +class HasTemporalPreposition(HasAnyPhrase): + def __init__(self): + super(HasTemporalPreposition, self).__init__( + phrase_groups=[["after", "before", "past"]], + identifiers=[Identifier(_name=self.__class__.__name__)], + ) + + +class HasPosessivePreposition(HasAnyPhrase): + def __init__(self): + super(HasPosessivePreposition, self).__init__( + phrase_groups=[["inside of", "with", "within"]], + identifiers=[Identifier(_name=self.__class__.__name__)], + ) + + +class HasComparison(HasAnyPhrase): + def __init__(self): + super(HasComparison, self).__init__( + phrase_groups=[["more", "less", "better", "worse", "bigger", "smaller"]], + identifiers=[Identifier(_name=self.__class__.__name__)], + ) + + +class HasQuantifier(HasAnyPhrase): + def __init__(self): + super(HasQuantifier, self).__init__( + phrase_groups=[["all", "some", "none"]], + identifiers=[Identifier(_name=self.__class__.__name__)], + ) + + +class HasNegation(HasAnyPhrase): + def __init__(self): + super(HasNegation, self).__init__( + phrase_groups=[ + [ + "no", + "not", + "none", + "noone ", + "nobody", + "nothing", + "neither", + "nowhere", + "never", + "hardly", + "scarcely", + "barely", + "doesnt", + "isnt", + "wasnt", + "shouldnt", + "wouldnt", + "couldnt", + "wont", + "cant", + "dont", + ] + ], + identifiers=[Identifier(_name=self.__class__.__name__)], + ) diff --git a/robustnessgym/slicebuilders/subpopulations/position.py b/robustnessgym/slicebuilders/subpopulations/position.py new file mode 100644 index 00000000..3ffcefcf --- /dev/null +++ b/robustnessgym/slicebuilders/subpopulations/position.py @@ -0,0 +1,61 @@ +# from typing import Callable, Dict, List, Tuple +# +# import numpy as np +# +# from robustnessgym.core.identifier import Identifier +# from robustnessgym.slicebuilders.subpopulation import Subpopulation +# +# +# class Position: +# def __init__( +# self, +# identifiers: List[Identifier] = None, +# position_fn: Callable = None, +# *args, +# **kwargs +# ): +# +# if not identifiers: +# identifiers = [ +# Identifier( +# _name=self.__class__.__name__, +# gte=interval[0], +# lte=interval[1], +# score_fn=score_fn, +# ) +# for interval in intervals +# ] +# +# # Set the intervals +# self.intervals = intervals +# self.left_limits = np.array([interval[0] for interval in intervals]) +# self.right_limits = np.array([interval[1] for interval in intervals]) +# +# # Assign the score fn +# if score_fn: +# self.score = score_fn +# +# def score( +# self, batch: Dict[str, List], keys: List[str], *args, **kwargs +# ) -> np.ndarray: +# raise NotImplementedError("Return a vector of float scores for each example.") +# +# def apply( +# self, +# slice_membership: np.ndarray, +# batch: Dict[str, List], +# keys: List[str], +# *args, +# **kwargs +# ) -> np.ndarray: +# # Keep track of the score of each example +# scores = self.score(batch=batch, keys=keys) +# +# assert scores.shape == ( +# slice_membership.shape[0], +# ), "Must have exactly one score per example." +# +# return ( +# (self.left_limits <= scores[:, np.newaxis]) +# & (scores[:, np.newaxis] <= self.right_limits) +# ).astype(np.int32) diff --git a/robustnessgym/slicebuilders/subpopulations/score.py b/robustnessgym/slicebuilders/subpopulations/score.py new file mode 100644 index 00000000..c80f273c --- /dev/null +++ b/robustnessgym/slicebuilders/subpopulations/score.py @@ -0,0 +1,284 @@ +from typing import Callable, Dict, List, Tuple, Union + +import numpy as np + +from robustnessgym.core.cachedops import ScoreOperation +from robustnessgym.core.dataset import Batch, Dataset +from robustnessgym.core.identifier import Identifier +from robustnessgym.slicebuilders.subpopulation import Subpopulation + + +class BinningMixin: + def __init__( + self, + intervals: List[Tuple[Union[int, float, str], Union[int, float, str]]], + bin_creation_fn: Callable = None, + bin_fn: Callable = None, + *args, + **kwargs + ): + super(BinningMixin, self).__init__(*args, **kwargs) + + # Set the intervals + self.intervals = intervals + self.left_limits = None + self.right_limits = None + + # Keep track of scores + self.scores = [] + + # Assign the bin fns + if bin_creation_fn and bin_fn: + self.create_bins = bin_creation_fn + self.bin = bin_fn + + def _reset_scores(self): + self.scores = [] + + def replace_percentile(self, limit): + if isinstance(limit, str) and limit.endswith("%"): + return np.percentile(self.scores, float(limit.replace("%", ""))) + elif isinstance(limit, float) or isinstance(limit, int): + return limit + else: + raise NotImplementedError + + def create_bins(self): + for i in range(len(self.intervals)): + (left_limit, right_limit) = self.intervals[i] + self.intervals[i] = ( + self.replace_percentile(left_limit), + self.replace_percentile(right_limit), + ) + + self.left_limits = np.array([interval[0] for interval in self.intervals]) + self.right_limits = np.array([interval[1] for interval in self.intervals]) + + def bin(self, scores: List[Union[int, float]]) -> np.ndarray: + # Convert to np.ndarry + scores = np.array(scores) + + # Bin the scores + return ( + (self.left_limits <= scores[:, np.newaxis]) + & (scores[:, np.newaxis] <= self.right_limits) + ).astype(np.int32) + + +class ScoreSubpopulation(Subpopulation, BinningMixin): + def __init__( + self, + intervals: List[Tuple[Union[int, float, str], Union[int, float, str]]], + identifiers: List[Identifier] = None, + score_fn: Callable = None, + bin_creation_fn: Callable = None, + bin_fn: Callable = None, + *args, + **kwargs + ): + + if not identifiers: + if score_fn: + identifiers = [ + Identifier( + _name=self.__class__.__name__, + gte=interval[0], + lte=interval[1], + score_fn=score_fn, + ) + for interval in intervals + ] + else: + identifiers = [ + Identifier( + _name=self.__class__.__name__, gte=interval[0], lte=interval[1] + ) + for interval in intervals + ] + + super(ScoreSubpopulation, self).__init__( + intervals=intervals, + identifiers=identifiers, + bin_creation_fn=bin_creation_fn, + bin_fn=bin_fn, + *args, + **kwargs, + ) + + # Assign the score fn + if score_fn: + self.score = score_fn + + def prepare_dataset( + self, + dataset: Dataset, + columns: List[str], + batch_size: int = 32, + mask: List[int] = None, + store_compressed: bool = True, + store: bool = True, + *args, + **kwargs + ) -> Dataset: + + # First reset the scores + self._reset_scores() + + # Prepare the dataset + dataset = super(ScoreSubpopulation, self).prepare_dataset( + dataset=dataset, + columns=columns, + batch_size=batch_size, + mask=mask, + store_compressed=store_compressed, + store=store, + ) + + # Create the bins + self.create_bins() + + return dataset + + def prepare_batch( + self, + batch: Batch, + columns: List[str], + mask: List[int] = None, + store_compressed: bool = True, + store: bool = True, + *args, + **kwargs + ) -> Batch: + + # Compute the scores + if isinstance(self.score, ScoreOperation): + self.scores.extend(self.score.retrieve(batch=batch, columns=columns)) + elif isinstance(self.score, Callable): + self.scores.extend(self.score(batch=batch, columns=columns)) + else: + raise RuntimeError("score function invalid.") + + return batch + + def score( + self, batch: Dict[str, List], columns: List[str], *args, **kwargs + ) -> np.ndarray: + raise NotImplementedError("Return a vector of float scores for each example.") + + def apply( + self, + slice_membership: np.ndarray, + batch: Dict[str, List], + columns: List[str], + *args, + **kwargs + ) -> np.ndarray: + # Keep track of the score of each example + if isinstance(self.score, ScoreOperation): + scores = self.score.retrieve(batch=batch, columns=columns) + elif isinstance(self.score, Callable): + scores = self.score(batch=batch, columns=columns) + else: + raise RuntimeError("score function invalid.") + + assert ( + len(scores) == slice_membership.shape[0] + ), "Must have exactly one score per example." + + return self.bin(scores=scores) + + +class MultiScoreSubpopulation(Subpopulation, BinningMixin): + def __init__( + self, + intervals: List[Tuple[Union[int, float, str], Union[int, float, str]]], + identifiers: List[Identifier] = None, + score_fn: Callable = None, + bin_creation_fn: Callable = None, + bin_fn: Callable = None, + *args, + **kwargs + ): + + if not identifiers: + if score_fn: + identifiers = [ + Identifier( + _name=self.__class__.__name__, + gte=interval[0], + lte=interval[1], + score_fn=score_fn, + ) + for interval in intervals + ] + else: + identifiers = [ + Identifier( + _name=self.__class__.__name__, gte=interval[0], lte=interval[1] + ) + for interval in intervals + ] + + super(MultiScoreSubpopulation, self).__init__( + intervals=intervals, + identifiers=identifiers, + bin_creation_fn=bin_creation_fn, + bin_fn=bin_fn, + *args, + **kwargs, + ) + + # Assign the score fn + if score_fn: + self.score = score_fn + + def prepare_dataset( + self, + dataset: Dataset, + columns: List[str], + batch_size: int = 32, + mask: List[int] = None, + store_compressed: bool = True, + store: bool = True, + *args, + **kwargs + ) -> Dataset: + + # First reset the scores + self._reset_scores() + + # Prepare the dataset + dataset = super(MultiScoreSubpopulation, self).prepare_dataset( + dataset=dataset, + columns=columns, + batch_size=batch_size, + mask=mask, + store_compressed=store_compressed, + store=store, + ) + + # Create the bins + self.create_bins() + + return dataset + + def prepare_batch( + self, + batch: Batch, + columns: List[str], + mask: List[int] = None, + store_compressed: bool = True, + store: bool = True, + *args, + **kwargs + ) -> Batch: + + # Compute the scores + if isinstance(self.score, ScoreOperation): + self.scores.extend(self.score.retrieve(batch=batch, columns=columns)) + elif isinstance(self.score, Callable): + self.scores.extend(self.score(batch=batch, columns=columns)) + else: + raise RuntimeError("score function invalid.") + + return batch diff --git a/robustnessgym/slicebuilders/subpopulations/similarity.py b/robustnessgym/slicebuilders/subpopulations/similarity.py new file mode 100644 index 00000000..35f34acd --- /dev/null +++ b/robustnessgym/slicebuilders/subpopulations/similarity.py @@ -0,0 +1,243 @@ +from functools import partial +from typing import Dict, List, Sequence, Tuple + +import numpy as np +from numpy.linalg import norm +from scipy.linalg import svd +from scipy.stats import spearmanr + +from robustnessgym.cachedops.similarity import RougeMatrix, RougeScore +from robustnessgym.core.identifier import Identifier +from robustnessgym.core.tools import strings_as_json +from robustnessgym.slicebuilders.subpopulations.score import ScoreSubpopulation + + +class RougeScoreSubpopulation(ScoreSubpopulation): + def __init__( + self, + intervals: List[Tuple[int, int]], + metric: Sequence[str] = ("rouge1", "fmeasure"), + *args, + **kwargs, + ): + super(RougeScoreSubpopulation, self).__init__( + intervals=intervals, + identifiers=[ + Identifier( + _name=self.__class__.__name__, + gte=interval[0], + lte=interval[1], + metric=metric, + ) + for interval in intervals + ], + *args, + **kwargs, + ) + + # Assign the metric + self.metric = metric + + def score( + self, batch: Dict[str, List], columns: List[str], *args, **kwargs + ) -> np.ndarray: + assert len(columns) == 2, "Must have exactly 2 columns." + + # Retrieve Rouge scores + scores = RougeScore.retrieve( + batch=batch, + columns=columns, + proc_fns=partial(RougeScore.select, metric=self.metric), + )[strings_as_json(columns)] + + return np.array(scores) + + +class Abstractiveness(RougeScoreSubpopulation): + def __init__( + self, + intervals: List[Tuple[int, int]], + metric: Sequence[str] = ("rouge1", "precision"), + ): + super(Abstractiveness, self).__init__( + intervals=intervals, + metric=metric, + ) + + +class Distillation(RougeScoreSubpopulation): + def __init__( + self, + intervals: List[Tuple[int, int]], + metric: Sequence[str] = ("rouge1", "recall"), + ): + super(Distillation, self).__init__( + intervals=intervals, + metric=metric, + ) + + +class RougeMatrixScoreSubpopulation(ScoreSubpopulation): + def __init__( + self, + intervals: List[Tuple[int, int]], + metric: Sequence[str] = ("rouge1", "fmeasure"), + *args, + **kwargs, + ): + assert ( + len(metric) == 2 + ), "Must pass in both rouge score and one of precision/recall/fmeasure." + super(RougeMatrixScoreSubpopulation, self).__init__( + intervals=intervals, + identifiers=[ + Identifier( + _name=self.__class__.__name__, + gte=interval[0], + lte=interval[1], + metric=metric, + ) + for interval in intervals + ], + *args, + **kwargs, + ) + + # Assign the metric + self.metric = metric + + def reduce(self, matrices: List[np.ndarray]) -> np.ndarray: + raise NotImplementedError + + def score( + self, batch: Dict[str, List], columns: List[str], *args, **kwargs + ) -> np.ndarray: + assert len(columns) == 2, "Must have exactly 2 columns." + + # Retrieve the relevant Rouge matrices + matrices = RougeMatrix.retrieve( + batch=batch, + columns=columns, + proc_fns=partial(RougeMatrix.select, metric=self.metric), + )[strings_as_json(columns)] + + return self.reduce(matrices) + + +class Position(RougeMatrixScoreSubpopulation): + def __init__( + self, + intervals: List[Tuple[int, int]], + metric: Sequence[str] = ("rouge1", "fmeasure"), + ): + super(Position, self).__init__( + intervals=intervals, + metric=metric, + ) + + def reduce(self, matrices: List[np.ndarray]) -> np.ndarray: + # Compute position of best-matched sentence in source document + # Then compute mean position, capturing where position mostly comes from + return np.array( + [np.mean(np.argmax(mat, axis=0)) / mat.shape[0] for mat in matrices] + ) + + +class Dispersion(RougeMatrixScoreSubpopulation): + def __init__( + self, + intervals: List[Tuple[int, int]], + metric: Sequence[str] = ("rouge1", "fmeasure"), + ): + super(Dispersion, self).__init__( + intervals=intervals, + metric=metric, + ) + + def reduce(self, matrices: List[np.ndarray]) -> np.ndarray: + # Compute position of best-matched sentence in source document + # Then compute std dev of position, capturing how spread out the positions are + return np.array( + [np.std(np.argmax(mat, axis=0) / mat.shape[0]) for mat in matrices] + ) + + +class Ordering(RougeMatrixScoreSubpopulation): + def __init__( + self, + intervals: List[Tuple[int, int]], + metric: Sequence[str] = ("rouge1", "fmeasure"), + ): + super(Ordering, self).__init__( + intervals=intervals, + metric=metric, + ) + + def reduce(self, matrices: List[np.ndarray]) -> np.ndarray: + # Compute position of best-matched sentence in source document + # Then compute spearman correlation of position with range(..), + # capturing whether the order of information is reversed + return np.array( + [ + spearmanr( + np.arange(mat.shape[1]) / mat.shape[0], + np.argmax(mat, axis=0) / mat.shape[0], + )[0] + for mat in matrices + ] + ) + + +class NuclearNorm(RougeMatrixScoreSubpopulation): + def __init__( + self, + intervals: List[Tuple[int, int]], + metric: Sequence[str] = ("rouge1", "fmeasure"), + ): + super(NuclearNorm, self).__init__( + intervals=intervals, + metric=metric, + ) + + def reduce(self, matrices: List[np.ndarray]) -> np.ndarray: + return np.array( + [ + np.sum(np.abs(svd(mat, full_matrices=False, compute_uv=False))) + for mat in matrices + ] + ) + + +class SpectralNorm(RougeMatrixScoreSubpopulation): + def __init__( + self, + intervals: List[Tuple[int, int]], + metric: Sequence[str] = ("rouge1", "fmeasure"), + ): + super(SpectralNorm, self).__init__( + intervals=intervals, + metric=metric, + ) + + def reduce(self, matrices: List[np.ndarray]) -> np.ndarray: + return np.array( + [ + np.max(np.abs(svd(mat, full_matrices=False, compute_uv=False))) + for mat in matrices + ] + ) + + +class FrobeniusNorm(RougeMatrixScoreSubpopulation): + def __init__( + self, + intervals: List[Tuple[int, int]], + metric: Sequence[str] = ("rouge1", "fmeasure"), + ): + super(FrobeniusNorm, self).__init__( + intervals=intervals, + metric=metric, + ) + + def reduce(self, matrices: List[np.ndarray]) -> np.ndarray: + return np.array([norm(mat) for mat in matrices]) diff --git a/robustnessgym/slicebuilders/subpopulations/wordlists.py b/robustnessgym/slicebuilders/subpopulations/wordlists.py new file mode 100644 index 00000000..83dec8ea --- /dev/null +++ b/robustnessgym/slicebuilders/subpopulations/wordlists.py @@ -0,0 +1,74 @@ +import urllib.request +import zipfile + +from robustnessgym.core.identifier import Identifier +from robustnessgym.core.tools import DownloadProgressBar +from robustnessgym.slicebuilders.subpopulations.phrase import HasAnyPhrase + + +class HasCategoryPhrase(HasAnyPhrase): + def __init__(self): + + # Fetch wordlists + self._fetch_sources() + self.categories_to_words = self._load_all() + + super(HasCategoryPhrase, self).__init__( + phrase_groups=[ + self.categories_to_words[supercategory][category] + for (supercategory, category) in self.categories + ], + identifiers=[ + Identifier( + _name=self.__class__.__name__, + supercategory=supercategory, + category=category, + ) + for (supercategory, category) in self.categories + ], + ) + + @property + def supercategories(self): + return list(self.categories_to_words.keys()) + + @property + def categories(self): + return sorted( + [ + (supercategory, category) + for supercategory in self.categories_to_words.keys() + for category in self.categories_to_words[supercategory] + ] + ) + + @classmethod + def _fetch_sources(cls): + if not (cls.logdir / "wordlists-master").exists(): + print("wordlists not found. Downloading..") + urllib.request.urlretrieve( + "https://github.com/imsky/wordlists/archive/master.zip", + filename=str(cls.logdir / "wordlists.zip"), + reporthook=DownloadProgressBar(), + ) + + with zipfile.ZipFile(str(cls.logdir / "wordlists.zip")) as zip_ref: + zip_ref.extractall(str(cls.logdir)) + + @classmethod + def _load_all(cls): + """Loads wordlists. + + Returns: + """ + category_to_words = { + supercategory: {} for supercategory in ["nouns", "verbs", "adjectives"] + } + for supercategory in ["nouns", "verbs", "adjectives"]: + for path in (cls.logdir / "wordlists-master" / supercategory).glob("*"): + with open(str(path)) as f: + category_to_words[supercategory][path.stem] = set( + f.read().splitlines() + ) + + return category_to_words diff --git a/robustnessgym/slicebuilders/transformation.py b/robustnessgym/slicebuilders/transformation.py new file mode 100644 index 00000000..6b2aa394 --- /dev/null +++ b/robustnessgym/slicebuilders/transformation.py @@ -0,0 +1,204 @@ +import json +from typing import Callable, List, Optional, Tuple + +import cytoolz as tz +import numpy as np + +from robustnessgym.core.constants import TRANSFORMATION +from robustnessgym.core.dataset import Batch, Dataset +from robustnessgym.core.identifier import Identifier +from robustnessgym.slicebuilders.slicebuilder import SliceBuilder + + +class Transformation(SliceBuilder): + def __init__( + self, + num_transformed: int = None, + identifiers: List[Identifier] = None, + apply_fn: Callable = None, + category: str = None, + ): + assert ( + num_transformed if not identifiers else True + ), "Must pass in num_transformed if no identifiers are given." + + super(Transformation, self).__init__( + category=category if category else TRANSFORMATION, + identifiers=[ + Identifier( + _name=f"{self.__class__.__name__}-{i + 1}", + ) + for i in range(num_transformed) + ] + if not identifiers + else identifiers, + apply_fn=apply_fn, + ) + + @property + def num_transformed(self): + return self.num_slices + + def apply( + self, + skeleton_batches: List[Batch], + slice_membership: np.ndarray, + batch: Batch, + columns: List[str], + *args, + **kwargs, + ) -> Tuple[List[Batch], np.ndarray]: + raise NotImplementedError + + def process_batch( + self, + batch: Batch, + columns: List[str], + mask: List[int] = None, + store_compressed: bool = True, + store: bool = True, + *args, + **kwargs, + ) -> Tuple[Batch, List[Batch], Optional[np.ndarray]]: + # Determine the size of the batch + batch_size = len(batch[list(batch.keys())[0]]) + + # Construct the matrix of slice labels: (batch_size x num_slices) + slice_membership = np.ones((batch_size, self.num_slices), dtype=np.int32) + + # Uncache the batch to construct the skeleton for transformed batches + skeleton_batches = [ + Dataset.uncached_batch(batch) for _ in range(self.num_slices) + ] + + # Set the index for the skeleton batches + for j, skeleton_batch in enumerate(skeleton_batches): + skeleton_batch["index"] = [ + f"{idx}-{self.identifiers[j]}" for idx in skeleton_batch["index"] + ] + + # Apply the SliceBuilder's core functionality + transformed_batches, slice_membership = self.apply( + skeleton_batches=skeleton_batches, + slice_membership=slice_membership, + batch=batch, + columns=columns, + *args, + **kwargs, + ) + + # Store the transformed examples + updates = self.construct_updates( + transformed_batches=transformed_batches, + slice_membership=slice_membership, + batch_size=batch_size, + columns=columns, + mask=mask, + compress=store_compressed, + ) + + # Remove transformed examples where slice_membership[i, :] = 0 before returning + transformed_batches = [ + self.filter_batch_by_slice_membership( + batch=transformed_batch, slice_membership=slice_membership[:, j : j + 1] + )[0] + for j, transformed_batch in enumerate(transformed_batches) + ] + + if store: + batch = self.store( + batch=batch, + updates=updates, + ) + + return batch, transformed_batches, slice_membership + + def construct_updates( + self, + transformed_batches: List[Batch], + slice_membership: np.ndarray, + batch_size: int, + columns: List[str], + mask: List[int] = None, + compress: bool = True, + ): + + if compress: + return [ + { + self.category: { + self.__class__.__name__: { + json.dumps(columns) + if len(columns) > 1 + else columns[0]: [ + tz.valmap(lambda v: v[i], transformed_batch) + for j, transformed_batch in enumerate( + transformed_batches + ) + if slice_membership[i, j] + ] + } + } + } + if np.any(slice_membership[i, :]) + else {} + for i in range(batch_size) + ] + + return [ + { + self.category: { + self.__class__.__name__: { + str(self.identifiers[j]): { + json.dumps(columns) + if len(columns) > 1 + else columns[0]: tz.valmap( + lambda v: v[i], transformed_batch + ) + } + for j, transformed_batch in enumerate(transformed_batches) + if (not mask or not mask[j]) and (slice_membership[i, j]) + } + } + } + if np.any(slice_membership[i, :]) + else {} + for i in range(batch_size) + ] + + +class SingleColumnTransformation(Transformation): + def single_column_apply(self, column_batch: List) -> List[List]: + raise NotImplementedError + + def apply( + self, + skeleton_batches: List[Batch], + slice_membership: np.ndarray, + batch: Batch, + columns: List[str], + *args, + **kwargs, + ) -> Tuple[List[Batch], np.ndarray]: + + # Independently apply the transformation over the columns + for column in columns: + try: + # Apply + transformed_batch = self.single_column_apply( + column_batch=batch[column], + ) + + assert len(transformed_batch) == len( + batch[column] + ), "Must output one list of augmentations per example." + + # Store the transformed text in the skeleton batches + for i in range(slice_membership.shape[0]): + for j, transformed in enumerate(transformed_batch[i]): + skeleton_batches[j][column][i] = transformed + except: # noqa + # Unable to transform: set all slice membership labels to zero + slice_membership[:, :] = 0 + + return skeleton_batches, slice_membership diff --git a/robustnessgym/slicebuilders/transformations/__init__.py b/robustnessgym/slicebuilders/transformations/__init__.py new file mode 100644 index 00000000..e123795d --- /dev/null +++ b/robustnessgym/slicebuilders/transformations/__init__.py @@ -0,0 +1,3 @@ +from .eda import EasyDataAugmentation # noqa +from .fairseq import FairseqBacktranslation # noqa +from .nlpaug import NlpAugTransformation # noqa diff --git a/robustnessgym/slicebuilders/transformations/_eda.py b/robustnessgym/slicebuilders/transformations/_eda.py new file mode 100644 index 00000000..614a53cc --- /dev/null +++ b/robustnessgym/slicebuilders/transformations/_eda.py @@ -0,0 +1,349 @@ +"""Easy data augmentation techniques for text classification. Jason Wei and Kai +Zou. + +Taken from https://github.com/jasonwei20/eda_nlp +""" + +import random +import re +from random import shuffle + +from nltk.corpus import wordnet + +random.seed(1) + +# stop words list +stop_words = [ + "i", + "me", + "my", + "myself", + "we", + "our", + "ours", + "ourselves", + "you", + "your", + "yours", + "yourself", + "yourselves", + "he", + "him", + "his", + "himself", + "she", + "her", + "hers", + "herself", + "it", + "its", + "itself", + "they", + "them", + "their", + "theirs", + "themselves", + "what", + "which", + "who", + "whom", + "this", + "that", + "these", + "those", + "am", + "is", + "are", + "was", + "were", + "be", + "been", + "being", + "have", + "has", + "had", + "having", + "do", + "does", + "did", + "doing", + "a", + "an", + "the", + "and", + "but", + "if", + "or", + "because", + "as", + "until", + "while", + "of", + "at", + "by", + "for", + "with", + "about", + "against", + "between", + "into", + "through", + "during", + "before", + "after", + "above", + "below", + "to", + "from", + "up", + "down", + "in", + "out", + "on", + "off", + "over", + "under", + "again", + "further", + "then", + "once", + "here", + "there", + "when", + "where", + "why", + "how", + "all", + "any", + "both", + "each", + "few", + "more", + "most", + "other", + "some", + "such", + "no", + "nor", + "not", + "only", + "own", + "same", + "so", + "than", + "too", + "very", + "s", + "t", + "can", + "will", + "just", + "don", + "should", + "now", + "", +] + + +# cleaning up text + + +def get_only_chars(line): + clean_line = "" + + line = line.replace("’", "") + line = line.replace("'", "") + line = line.replace("-", " ") # replace hyphens with spaces + line = line.replace("\t", " ") + line = line.replace("\n", " ") + line = line.lower() + + for char in line: + if char in "qwertyuiopasdfghjklzxcvbnm ": + clean_line += char + else: + clean_line += " " + + clean_line = re.sub(" +", " ", clean_line) # delete extra spaces + if clean_line[0] == " ": + clean_line = clean_line[1:] + return clean_line + + +######################################################################## +# Synonym replacement +# Replace n words in the sentence with synonyms from wordnet +######################################################################## + + +def synonym_replacement(words, n): + new_words = words.copy() + random_word_list = list(set([word for word in words if word not in stop_words])) + random.shuffle(random_word_list) + num_replaced = 0 + for random_word in random_word_list: + synonyms = get_synonyms(random_word) + if len(synonyms) >= 1: + synonym = random.choice(list(synonyms)) + new_words = [synonym if word == random_word else word for word in new_words] + # print("replaced", random_word, "with", synonym) + num_replaced += 1 + if num_replaced >= n: # only replace up to n words + break + + # this is stupid but we need it, trust me + sentence = " ".join(new_words) + new_words = sentence.split(" ") + + return new_words + + +def get_synonyms(word): + synonyms = set() + for syn in wordnet.synsets(word): + for l in syn.lemmas(): + synonym = l.name().replace("_", " ").replace("-", " ").lower() + synonym = "".join( + [char for char in synonym if char in " qwertyuiopasdfghjklzxcvbnm"] + ) + synonyms.add(synonym) + if word in synonyms: + synonyms.remove(word) + return list(synonyms) + + +######################################################################## +# Random deletion +# Randomly delete words from the sentence with probability p +######################################################################## + + +def random_deletion(words, p): + # obviously, if there's only one word, don't delete it + if len(words) == 1: + return words + + # randomly delete words with probability p + new_words = [] + for word in words: + r = random.uniform(0, 1) + if r > p: + new_words.append(word) + + # if you end up deleting all words, just return a random word + if len(new_words) == 0: + rand_int = random.randint(0, len(words) - 1) + return [words[rand_int]] + + return new_words + + +######################################################################## +# Random swap +# Randomly swap two words in the sentence n times +######################################################################## + + +def random_swap(words, n): + new_words = words.copy() + for _ in range(n): + new_words = swap_word(new_words) + return new_words + + +def swap_word(new_words): + random_idx_1 = random.randint(0, len(new_words) - 1) + random_idx_2 = random_idx_1 + counter = 0 + while random_idx_2 == random_idx_1: + random_idx_2 = random.randint(0, len(new_words) - 1) + counter += 1 + if counter > 3: + return new_words + new_words[random_idx_1], new_words[random_idx_2] = ( + new_words[random_idx_2], + new_words[random_idx_1], + ) + return new_words + + +######################################################################## +# Random insertion +# Randomly insert n words into the sentence +######################################################################## + + +def random_insertion(words, n): + new_words = words.copy() + for _ in range(n): + add_word(new_words) + return new_words + + +def add_word(new_words): + synonyms = [] + counter = 0 + while len(synonyms) < 1: + random_word = new_words[random.randint(0, len(new_words) - 1)] + synonyms = get_synonyms(random_word) + counter += 1 + if counter >= 10: + return + random_synonym = synonyms[0] + random_idx = random.randint(0, len(new_words) - 1) + new_words.insert(random_idx, random_synonym) + + +######################################################################## +# main data augmentation function +######################################################################## + + +def eda(sentence, alpha_sr=0.1, alpha_ri=0.1, alpha_rs=0.1, p_rd=0.1, num_aug=9): + sentence = get_only_chars(sentence) + words = sentence.split(" ") + words = [word for word in words if word != ""] + num_words = len(words) + + augmented_sentences = [] + num_new_per_technique = int(num_aug / 4) + 1 + n_sr = max(1, int(alpha_sr * num_words)) + n_ri = max(1, int(alpha_ri * num_words)) + n_rs = max(1, int(alpha_rs * num_words)) + + # sr + for _ in range(num_new_per_technique): + a_words = synonym_replacement(words, n_sr) + augmented_sentences.append(" ".join(a_words)) + + # ri + for _ in range(num_new_per_technique): + a_words = random_insertion(words, n_ri) + augmented_sentences.append(" ".join(a_words)) + + # rs + for _ in range(num_new_per_technique): + a_words = random_swap(words, n_rs) + augmented_sentences.append(" ".join(a_words)) + + # rd + for _ in range(num_new_per_technique): + a_words = random_deletion(words, p_rd) + augmented_sentences.append(" ".join(a_words)) + + augmented_sentences = [get_only_chars(sentence) for sentence in augmented_sentences] + shuffle(augmented_sentences) + + # trim so that we have the desired number of augmented sentences + if num_aug >= 1: + augmented_sentences = augmented_sentences[:num_aug] + else: + keep_prob = num_aug / len(augmented_sentences) + augmented_sentences = [ + s for s in augmented_sentences if random.uniform(0, 1) < keep_prob + ] + + # append the original sentence + augmented_sentences.append(sentence) + + return augmented_sentences diff --git a/robustnessgym/slicebuilders/transformations/eda.py b/robustnessgym/slicebuilders/transformations/eda.py new file mode 100644 index 00000000..02a5d0ca --- /dev/null +++ b/robustnessgym/slicebuilders/transformations/eda.py @@ -0,0 +1,87 @@ +from typing import Dict, List, Tuple + +import numpy as np + +from robustnessgym.core.identifier import Identifier +from robustnessgym.slicebuilders.transformation import Transformation +from robustnessgym.slicebuilders.transformations._eda import eda + + +class EasyDataAugmentation(Transformation): + """Text transformation class for Easy Data Augmentation. + + Citation + -------- + Wei, J., & Zou, K. (2019). EDA: Easy Data Augmentation Techniques for Boosting + Performance on Text Classification + Tasks. EMNLP 2019. + """ + + def __init__( + self, num_transformed=1, alpha_sr=0.1, alpha_ri=0.1, alpha_rs=0.1, p_rd=0.1 + ): + + super(EasyDataAugmentation, self).__init__( + identifiers=Identifier.range( + n=num_transformed, + _name=self.__class__.__name__, + alpha_sr=alpha_sr, + alpha_ri=alpha_ri, + alpha_rs=alpha_rs, + p_rd=p_rd, + ) + ) + + # Set the parameters + self.alpha_sr = alpha_sr + self.alpha_ri = alpha_ri + self.alpha_rs = alpha_rs + self.p_rd = p_rd + + # Download wordnet + self._download_wordnet() + + def _download_wordnet(self): + try: + if not (self.logdir / "wordnet").exists(): + import nltk + + nltk.download( + "wordnet", download_dir=str(self.logdir / "wordnet"), quiet=False + ) + except ImportError: + print("Need nltk.") + + def apply( + self, + skeleton_batches: List[Dict[str, List]], + slice_membership: np.ndarray, + batch: Dict[str, List], + columns: List[str], + *args, + **kwargs + ) -> Tuple[List[Dict[str, List]], np.ndarray]: + + for col in columns: + # Iterate over col for all examples in the batch + for i, text in enumerate(batch[col]): + try: + # EDA returns a list of augmented text, including the original + # text at the last position + augmented_texts = eda( + text, + alpha_sr=self.alpha_sr, + alpha_ri=self.alpha_ri, + alpha_rs=self.alpha_rs, + p_rd=self.p_rd, + num_aug=self.num_transformed, + )[:-1] + + # Store the augmented text in the augmented batches + for j, augmented_text in enumerate(augmented_texts): + skeleton_batches[j][col][i] = augmented_text + except: # noqa + # Unable to augment the example: set its slice membership to zero + slice_membership[i, :] = 0 + + return skeleton_batches, slice_membership diff --git a/robustnessgym/slicebuilders/transformations/fairseq.py b/robustnessgym/slicebuilders/transformations/fairseq.py new file mode 100644 index 00000000..a6395d71 --- /dev/null +++ b/robustnessgym/slicebuilders/transformations/fairseq.py @@ -0,0 +1,160 @@ +from typing import List + +import cytoolz as tz +import torch + +from robustnessgym.core.identifier import Identifier +from robustnessgym.slicebuilders.transformation import SingleColumnTransformation + +try: + import fastBPE # noqa +except ImportError: + _fastbpe_available = False +else: + _fastbpe_available = True + + +# TODO(karan): spec requirements (fastBPE) +class FairseqBacktranslation(SingleColumnTransformation): + """Class for performing backtranslation using torchhub fairseq models.""" + + def __init__( + self, + n_src2tgt: int = 1, + n_tgt2src: int = 1, + langs: str = "en2de", + torchhub_dir: str = None, + device: str = "cuda", + src2tgt_topk: int = 1000, + src2tgt_temp: float = 1.0, + tgt2src_topk: int = 1000, + tgt2src_temp: float = 1.0, + ): + + if not _fastbpe_available: + raise ImportError( + "fastBPE not available for import. Please install fastBPE with pip " + "install fastBPE." + ) + + super(FairseqBacktranslation, self).__init__( + identifiers=Identifier.range( + n=n_src2tgt * n_tgt2src, + _name=self.__class__.__name__, + langs=langs, + src2tgt_topk=src2tgt_topk, + src2tgt_temp=src2tgt_temp, + tgt2src_topk=tgt2src_topk, + tgt2src_temp=tgt2src_temp, + ) + ) + + # Set the parameters + self.n_src2tgt = n_src2tgt + self.n_tgt2src = n_tgt2src + self.src2tgt_topk = src2tgt_topk + self.src2tgt_temp = src2tgt_temp + self.tgt2src_topk = tgt2src_topk + self.tgt2src_temp = tgt2src_temp + + # Setup the backtranslation models + self.src2tgt, self.tgt2src = self.load_models( + langs=langs, + torchhub_dir=torchhub_dir, + # self.logdir if not torchhub_dir else torchhub_dir, + device=device, + ) + + @staticmethod + def load_models( + langs: str, + torchhub_dir: str = None, + device: str = "cuda", + half_precision: bool = False, + ): + if torchhub_dir: + # Set the directory where the models will be stored. + torch.hub.set_dir(torchhub_dir) + + if langs == "en2de": + # Round-trip translations between English and German + src2tgt = torch.hub.load( + "pytorch/fairseq", + "transformer.wmt19.en-de.single_model", + tokenizer="moses", + bpe="fastbpe", + ) + + tgt2src = torch.hub.load( + "pytorch/fairseq", + "transformer.wmt19.de-en.single_model", + tokenizer="moses", + bpe="fastbpe", + ) + + elif langs == "en2ru": + # Round-trip translations between English and Russian + src2tgt = torch.hub.load( + "pytorch/fairseq", + "transformer.wmt19.en-ru.single_model", + tokenizer="moses", + bpe="fastbpe", + ) + + tgt2src = torch.hub.load( + "pytorch/fairseq", + "transformer.wmt19.ru-en.single_model", + tokenizer="moses", + bpe="fastbpe", + ) + else: + raise NotImplementedError + + # Convert to half precision + if half_precision: + return src2tgt.to(device).half(), tgt2src.to(device).half() + return src2tgt.to(device), tgt2src.to(device) + + def single_column_apply(self, column_batch: List) -> List[List]: + """Perform backtranslation using the fairseq pretrained translation + models.""" + # Encode the source sentences + src_sentences = column_batch + src_sentences_bin = [self.src2tgt.encode(e)[:1024] for e in src_sentences] + + # Translate it + tgt_sentences = self.src2tgt.generate( + src_sentences_bin, + beam=self.n_src2tgt, + sampling=True, + sampling_topk=self.src2tgt_topk, + temperature=self.src2tgt_temp, + skip_invalid_size_inputs=True, + ) + + # Back-translate: moving tokens to CPU because of an error otherwise + src_paraphrases = self.tgt2src.generate( + [e["tokens"].cpu() for l in tgt_sentences for e in l], + beam=self.n_tgt2src, + sampling=True, + sampling_topk=self.tgt2src_topk, + temperature=self.tgt2src_temp, + skip_invalid_size_inputs=True, + ) + + # Flatten out all the translations into one giant list + flat_src_paraphrases = list( + tz.concat( + map( + lambda l: list(map(lambda e: self.tgt2src.decode(e["tokens"]), l)), + src_paraphrases, + ) + ) + ) + + # Partition so that we get n_src2tgt * n_tgt2src paraphrases per input sentence + return list( + tz.partition_all( + len(flat_src_paraphrases) // len(src_sentences), flat_src_paraphrases + ) + ) diff --git a/robustnessgym/slicebuilders/transformations/gpt3.py b/robustnessgym/slicebuilders/transformations/gpt3.py new file mode 100644 index 00000000..1cabea8a --- /dev/null +++ b/robustnessgym/slicebuilders/transformations/gpt3.py @@ -0,0 +1,25 @@ +from __future__ import annotations + +from typing import Dict, List, Tuple + +import numpy as np + +from robustnessgym.slicebuilders.transformation import Transformation + + +class GPT3Transformation(Transformation): + def __init__( + self, + ): + super(GPT3Transformation, self).__init__() + + def apply( + self, + skeleton_batches: List[Dict[str, List]], + slice_membership: np.ndarray, + batch: Dict[str, List], + columns: List[str], + *args, + **kwargs + ) -> Tuple[List[Dict[str, List]], np.ndarray]: + pass diff --git a/robustnessgym/slicebuilders/transformations/nlpaug.py b/robustnessgym/slicebuilders/transformations/nlpaug.py new file mode 100644 index 00000000..9abd0383 --- /dev/null +++ b/robustnessgym/slicebuilders/transformations/nlpaug.py @@ -0,0 +1,69 @@ +"""Transformations using nlpaug.""" +from typing import List + +from nlpaug.flow import Pipeline + +from robustnessgym.core.identifier import Identifier +from robustnessgym.slicebuilders.transformation import SingleColumnTransformation + + +class NlpAugTransformation(SingleColumnTransformation): + """Class for building transformations using nlpaug.""" + + def __init__( + self, + pipeline: Pipeline, + num_transformed: int = 1, + identifiers: List[Identifier] = None, + *args, + **kwargs + ): + assert isinstance(pipeline, Pipeline), ( + "`pipeline` must be an nlpaug Pipeline object. " + "Please use \nfrom nlpaug.flow import " + "Sequential\nrg.NlpAugTransformation(pipeline=Sequential(flow=[...]))." + ) + + # Superclass call + super(NlpAugTransformation, self).__init__( + num_transformed=num_transformed, + identifiers=Identifier.range( + n=num_transformed, + _name=self.__class__.__name__, + pipeline=[ + Identifier( + _name=augmenter.name, + src=augmenter.aug_src + if hasattr(augmenter, "aug_src") + else None, + action=augmenter.action, + method=augmenter.method, + ) + for augmenter in pipeline + ], + ) + if not identifiers + else identifiers, + *args, + **kwargs + ) + + # Set the pipeline + self._pipeline = pipeline + + @property + def pipeline(self): + return self._pipeline + + def single_column_apply(self, column_batch: List[str]) -> List[List[str]]: + # Apply the nlpaug pipeline + augmented_texts = self.pipeline.augment( + data=column_batch, + n=self.num_transformed, + ) + + if self.num_transformed == 1: + augmented_texts = [augmented_texts] + + # Transpose the list of lists from [4 x 32] to [32 x 4] and return + return list(map(list, zip(*augmented_texts))) diff --git a/robustnessgym/slicebuilders/transformations/similarity.py b/robustnessgym/slicebuilders/transformations/similarity.py new file mode 100644 index 00000000..5701947b --- /dev/null +++ b/robustnessgym/slicebuilders/transformations/similarity.py @@ -0,0 +1,65 @@ +from functools import partial +from typing import List, Sequence, Tuple + +import numpy as np + +from robustnessgym.cachedops.similarity import RougeMatrix +from robustnessgym.cachedops.spacy import Spacy +from robustnessgym.core.dataset import Batch +from robustnessgym.core.identifier import Identifier +from robustnessgym.core.tools import strings_as_json +from robustnessgym.slicebuilders.transformation import Transformation + + +class RougeMatrixSentenceTransformation(Transformation): + def __init__(self, metric: Sequence[str], threshold: float): + super(RougeMatrixSentenceTransformation, self).__init__( + num_transformed=1, + identifiers=Identifier.range(n=1, _name=self.__class__.__name__), + ) + + self.metric = metric + self.threshold = threshold + + def apply( + self, + skeleton_batches: List[Batch], + slice_membership: np.ndarray, + batch: Batch, + columns: List[str], + *args, + **kwargs + ) -> Tuple[List[Batch], np.ndarray]: + assert len(columns) == 2 + + # Retrieve the relevant Rouge matrices + matrices = RougeMatrix.retrieve( + batch=batch, + columns=columns, + proc_fns=partial(RougeMatrix.select, metric=self.metric), + )[strings_as_json(columns)] + + # Find max value along each row, remove rows that have max value below a + # threshold + rows_to_keep = [ + (m / (m.sum(axis=0) + 1e-5)).max(axis=1) >= self.threshold for m in matrices + ] + + # Fetch sentences for the first column + sentences = Spacy.retrieve( + batch=batch, + columns=[columns[0]], + proc_fns=Spacy.sentences, + )[columns[0]] + + # Delete sentences + new_sentences = [ + " ".join(np.array(sent)[rows_to_keep[i]]) + for i, sent in enumerate(sentences) + ] + + # Store the augmented text in the skeleton batches + for i, augmented in enumerate(new_sentences): + skeleton_batches[0][columns[0]][i] = augmented + + return skeleton_batches, slice_membership diff --git a/robustnessgym/tasks/__init__.py b/robustnessgym/tasks/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/robustnessgym/tasks/schema.py b/robustnessgym/tasks/schema.py new file mode 100644 index 00000000..bb4a754b --- /dev/null +++ b/robustnessgym/tasks/schema.py @@ -0,0 +1,69 @@ +from typing import Callable, Collection, Dict, OrderedDict + +from datasets.features import FeatureType + +from robustnessgym.core.tools import get_all_paths + + +class Schema: + def __init__( + self, features: OrderedDict, grounding_candidates: Dict[str, Collection] + ): + # Store the features and grounding candidates + self.features = features + self.grounding_candidates = grounding_candidates + self.reversed_grounding_candidates = { + v: k for k, values in self.grounding_candidates.items() for v in values + } + + def ground(self, features: Dict[str, FeatureType]): + """ + + Args: + features: given by Dataset.features + + Returns: (grounding, reversed_grounding) + + """ + # For features, get the path to the leaves in the (potentially nested) + # features dictionary + flat_columns = get_all_paths(features) + flat_columns = { + tuple(path) if len(path) > 1 else path[0] for path in flat_columns + } + + # Figure out the (reversed) grounding: map columns in the dataset to keys in + # the schema + reversed_grounding = {} + for k in self.reversed_grounding_candidates: + if ((isinstance(k, tuple) or isinstance(k, str)) and k in flat_columns) or ( + isinstance(k, Callable) + ): + reversed_grounding[k] = self.reversed_grounding_candidates[k] + + # Construct the grounding by reversing + grounding = {v: k for k, v in reversed_grounding.items()} + + # Assert that the grounding covers the entire schema + assert len(self.features) == len(grounding), "Grounding failed." + + # Assert that the grounded schema has the right types + # FIXME(karan): Value == ClassLabel should be allowed: shouldn't break this + # TODO(karan): if not, add code to automatically rejig the dataset in map_fn + # for key in self.features: + # if isinstance(grounding[key], str): + # assert self.features[key] == features[grounding[key]] + # elif isinstance(grounding[key], tuple): + # assert self.features[key] == tz.get_in(grounding[key], features) + + return grounding, reversed_grounding + + def __repr__(self): + features = "\n\t".join([f"{k}: {v}" for k, v in self.features.items()]) + return f"Schema(\n\t{features}\n)" + + def __len__(self): + return len(self.features) + + def keys(self): + return list(self.features.keys()) diff --git a/robustnessgym/tasks/task.py b/robustnessgym/tasks/task.py new file mode 100644 index 00000000..2c585612 --- /dev/null +++ b/robustnessgym/tasks/task.py @@ -0,0 +1,376 @@ +from collections import OrderedDict +from typing import List + +import cytoolz as tz +from datasets.features import ClassLabel, Sequence, Value + +from robustnessgym.core.dataset import Dataset +from robustnessgym.tasks.schema import Schema + + +class Task: + dataset_to_task = {} + + def __init__( + self, + identifier, + input_schema: Schema, + output_schema: Schema, + metrics: List[str], + *args, + **kwargs, + ): + self.identifier = identifier + self.input_schema = input_schema + self.output_schema = output_schema + self.metrics = metrics + + @classmethod + def lookup(cls, dataset: str): + return cls.dataset_to_task[dataset] + + @classmethod + def list_datasets(cls): + return [] + + # @classmethod + # def from_identifier(cls, identifier): + # return getattr() + + @classmethod + def create(cls, task: str): + # TODO(karan): figure out how to getattr this + if task == "TernaryNaturalLanguageInference": + return TernaryNaturalLanguageInference() + else: + raise NotImplementedError + + def remap_schema(self, dataset: Dataset): + # Ground the schema to the dataset + input_grounding, reversed_input_grounding = self.input_schema.ground( + dataset.features + ) + output_grounding, reversed_output_grounding = self.output_schema.ground( + dataset.features + ) + + # Construct a map_fn that remaps the dataset schema + def map_fn(example): + return tz.merge( + {k: example[input_grounding[k]] for k in self.input_schema.features}, + {k: example[output_grounding[k]] for k in self.output_schema.features}, + ) + + return dataset.map( + map_fn, + remove_columns=list(reversed_input_grounding.keys()) + + list(reversed_output_grounding.keys()), + ) + + def classification(self): + # TODO(karan): improve the schema inference + # Check that the only output is a ClassLabel output + if len(self.output_schema) == 1 and isinstance( + self.output_schema.features[self.output_schema.keys()[0]], ClassLabel + ): + return True + return False + + def __repr__(self): + return ( + f"task: {self.identifier}\n\nInput{str(self.input_schema)}\n\nOutput" + f"{str(self.output_schema)}" + ) + + +# class ClassificationMixin: +# +# def __init__(self, +# num_classes: int = None, +# *args, +# **kwargs): +# super(ClassificationMixin, self).__init__(*args, **kwargs) +# +# self.output_schema = None + + +class Sentiment(Task): + def __init__(self, identifier, input_schema, output_schema, *args, **kwargs): + super(Sentiment, self).__init__( + identifier=identifier, + input_schema=input_schema, + output_schema=output_schema, + metrics=[ + "accuracy", + "f1", + "class_dist", + "pred_dist" + # TODO(karan): calibration, other metrics + ], + *args, + **kwargs, + ) + + +class BinarySentiment(Sentiment): + def __init__(self): + super(BinarySentiment, self).__init__( + num_classes=2, + input_schema=Schema( + features=OrderedDict( + [ + ("text", Value(dtype="string")), + ] + ), + grounding_candidates={ + "text": {"text", "sentence"}, + }, + ), + output_schema=Schema( + features=OrderedDict( + [ + ("label", ClassLabel(names=["negative", "positive"])), + ] + ), + grounding_candidates={ + "label": {"label"}, + }, + ), + identifier=self.__class__.__name__, + ) + + @classmethod + def list_datasets(cls): + return [ + "imdb", + ] + + +class Summarization(Task): + def __init__(self): + super(Summarization, self).__init__( + identifier=self.__class__.__name__, + input_schema=Schema( + features=OrderedDict([("text", Value(dtype="string"))]), + grounding_candidates={ + "text": {"article", "document"}, + }, + ), + output_schema=Schema( + features=OrderedDict([("summary", Value(dtype="string"))]), + grounding_candidates={ + "summary": {"highlights", "summary"}, + }, + ), + metrics=[ + # blah, + # TODO(karan): calibration, other metrics + "rouge1", + "rouge2", + "rougeLsum", + ], + ) + + @classmethod + def list_datasets(cls): + return [ + "cnn_dailymail", + ] + + +class NaturalLanguageInference(Task): + def __init__(self, identifier, input_schema, output_schema, *args, **kwargs): + super(NaturalLanguageInference, self).__init__( + identifier=identifier, + input_schema=input_schema, + output_schema=output_schema, + metrics=[ + "accuracy", + "f1", + "class_dist", + "pred_dist" + # TODO(karan): calibration, other metrics + ], + *args, + **kwargs, + ) + + +class BinaryNaturalLanguageInference(NaturalLanguageInference): + def __init__(self): + super(BinaryNaturalLanguageInference, self).__init__( + num_classes=2, + input_schema=Schema( + features=OrderedDict( + [ + ("premise", Value(dtype="string")), + ("hypothesis", Value(dtype="string")), + ] + ), + grounding_candidates={ + "premise": {"premise", "sentence1"}, + "hypothesis": {"hypothesis", "sentence2"}, + }, + ), + output_schema=Schema( + features=OrderedDict( + [ + ("label", ClassLabel(names=["entailment", "non entailment"])), + ] + ), + grounding_candidates={ + "label": {"label"}, + }, + ), + identifier=self.__class__.__name__, + ) + + @classmethod + def list_datasets(cls): + return [] + + +class TernaryNaturalLanguageInference(NaturalLanguageInference): + def __init__(self): + super(TernaryNaturalLanguageInference, self).__init__( + num_classes=3, + input_schema=Schema( + features=OrderedDict( + [ + ("premise", Value(dtype="string")), + ("hypothesis", Value(dtype="string")), + ] + ), + grounding_candidates={ + "premise": {"premise", "sentence1"}, + "hypothesis": {"hypothesis", "sentence2"}, + }, + ), + output_schema=Schema( + features=OrderedDict( + [ + ( + "label", + ClassLabel( + names=["entailment", "neutral", "contradiction"] + ), + ), + ] + ), + grounding_candidates={ + "label": {"label"}, + }, + ), + identifier=self.__class__.__name__, + ) + + def datasets(self): + return { + "snli", + } + + +class QuestionAnswering(Task): + def __init__(self, identifier, input_schema, output_schema, *args, **kwargs): + super(QuestionAnswering, self).__init__( + identifier=identifier, + input_schema=input_schema, + output_schema=output_schema, + metrics=[ + "em", + "f1", + # TODO(karan): calibration, other metrics + ], + *args, + **kwargs, + ) + + +class ExtractiveQuestionAnswering(Task): + def __init__(self): + super(ExtractiveQuestionAnswering, self).__init__( + input_schema=Schema( + features=OrderedDict( + [ + ("context", Value(dtype="string")), + ("question", Value(dtype="string")), + ] + ), + grounding_candidates={ + "context": {"context"}, + "question": {"question"}, + }, + ), + output_schema=Schema( + features=OrderedDict( + [ + ( + "answers", + Sequence( + feature={ + "text": Value(dtype="string", id=None), + "answer_start": Value(dtype="int32", id=None), + }, + length=-1, + ), + ), + ] + ), + grounding_candidates={ + "answers": { + "answers", + }, + }, + ), + metrics=[ + "em", + "f1", + ], + identifier=self.__class__.__name__, + ) + + +# class ExtractiveQuestionAnswering(Task): +# +# def __init__(self): +# super(ExtractiveQuestionAnswering, self).__init__( +# input_schema=Schema( +# features=OrderedDict([ +# ('context', Value(dtype='string')), +# ('question', Value(dtype='string')), +# ]), +# grounding_candidates={ +# 'context': {'context'}, +# 'question': {'question'}, +# }, +# ), +# output_schema=Schema( +# features=OrderedDict([ +# ('answer', Sequence(Value(dtype='string'), length=-1)), +# ('start', Sequence(Value(dtype='int64'), length=-1)), +# ('end', Sequence(Value(dtype='int64'), length=-1)), +# ]), +# grounding_candidates={ +# 'answer': { +# ('answers', 'text'), +# }, +# 'start': { +# ('answers', 'answer_start') +# }, +# 'end': { +# lambda answer, start: [idx + len(answer) for idx in start], +# }, +# } +# ), +# metrics=[ +# 'em', +# 'f1', +# ], +# identifier=self.__class__.__name__, +# ) + +# Evaluation Hierarchy +# -------------------- +# (generic task, model) ### QuestionAnswering/NLI +# (narrow task, model) ### MultiHopQuestionAnswering/BinaryNLI +# (dataset, model) ### Particular Dataset/QNLI diff --git a/tests/__init__.py b/tests/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/tests/cached_ops/__init__.py b/tests/cached_ops/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/tests/cached_ops/test_spacy.py b/tests/cached_ops/test_spacy.py new file mode 100644 index 00000000..22825e6d --- /dev/null +++ b/tests/cached_ops/test_spacy.py @@ -0,0 +1,53 @@ +from unittest import TestCase + +from robustnessgym.cachedops import Spacy +from tests.testbeds import MockTestBedv0 + + +class TestSpacy(TestCase): + def setUp(self): + self.testbed = MockTestBedv0() + + def test_apply(self): + # Create the Spacy cached operation + spacy = Spacy() + + # Apply it + dataset = spacy(self.testbed.dataset, ["text"]) + + # Retrieve information to test + sentences = spacy.retrieve(dataset[:], ["text"], proc_fns=spacy.sentences) + tokens = spacy.retrieve(dataset[:], ["text"], proc_fns=spacy.tokens) + entities = spacy.retrieve(dataset[:], ["text"], proc_fns=spacy.entities) + num_tokens = spacy.retrieve(dataset[:], ["text"], proc_fns=spacy.num_tokens) + + self.assertEqual( + sentences, + { + "text": [ + ["The man is walking."], + ["The man is running."], + ["The woman is sprinting."], + ["The woman is resting."], + ["The hobbit is flying."], + ["The hobbit is swimming."], + ] + }, + ) + + self.assertEqual( + tokens, + { + "text": [ + ["The", "man", "is", "walking", "."], + ["The", "man", "is", "running", "."], + ["The", "woman", "is", "sprinting", "."], + ["The", "woman", "is", "resting", "."], + ["The", "hobbit", "is", "flying", "."], + ["The", "hobbit", "is", "swimming", "."], + ] + }, + ) + + self.assertEqual(entities, {"text": [[], [], [], [], [], []]}) + self.assertEqual(num_tokens, {"text": [5, 5, 5, 5, 5, 5]}) diff --git a/tests/cached_ops/test_stanza.py b/tests/cached_ops/test_stanza.py new file mode 100644 index 00000000..cf9744d5 --- /dev/null +++ b/tests/cached_ops/test_stanza.py @@ -0,0 +1,35 @@ +from unittest import TestCase + +from robustnessgym.cachedops.stanza import Stanza +from tests.testbeds import MockTestBedv0 + + +class TestStanza(TestCase): + def setUp(self): + self.testbed = MockTestBedv0() + + def test_apply(self): + # Create the Stanza cached operation + stanza = Stanza() + dataset = stanza(self.testbed.dataset, columns=["text"]) + + # Make sure things match up + self.assertEqual( + stanza.retrieve( + dataset[:], + ["text"], + proc_fns=lambda decoded_batch: [ + doc.get("lemma") for doc in decoded_batch + ], + ), + { + "text": [ + ["the", "man", "be", "walk", "."], + ["the", "man", "be", "run", "."], + ["the", "woman", "be", "sprint", "."], + ["the", "woman", "be", "rest", "."], + ["the", "hobbit", "be", "fly", "."], + ["the", "hobbit", "be", "swim", "."], + ] + }, + ) diff --git a/tests/core/__init__.py b/tests/core/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/tests/core/test_cachedops.py b/tests/core/test_cachedops.py new file mode 100644 index 00000000..647a3a72 --- /dev/null +++ b/tests/core/test_cachedops.py @@ -0,0 +1,183 @@ +"""Unittests for CachedOperations.""" + +from unittest import TestCase + +from robustnessgym.core.cachedops import CachedOperation +from robustnessgym.core.identifier import Identifier +from robustnessgym.core.tools import strings_as_json +from tests.testbeds import MockTestBedv0 + + +def a_single_column_apply_fn(batch, columns): + assert len(columns) == 1 and type(batch[columns[0]][0]) == int + return [e * 7 + 3.14 for e in batch[columns[0]]] + + +def a_multi_column_apply_fn(batch, columns): + assert len(columns) == 2 + return [e[0] * 0.1 + e[1] * 0.3 for e in zip(batch[columns[0]], batch[columns[1]])] + + +class TestCachedOperation(TestCase): + def setUp(self): + # Arrange + self.cachedop = CachedOperation( + apply_fn=a_single_column_apply_fn, + identifier=Identifier(_name="TestCachedOperation"), + ) + + self.testbed = MockTestBedv0() + + self.multicol_cachedop = CachedOperation( + apply_fn=a_multi_column_apply_fn, + identifier=Identifier(_name="TestCachedOperation", columns="multiple"), + ) + + def test_repr(self): + self.assertEqual(str(self.cachedop), "TestCachedOperation") + + def test_endtoend(self): + # Apply to the dataset + self.cachedop(self.testbed.dataset, columns=["label"]) + + # Check that the dataset remains the same + self.assertEqual( + self.testbed.dataset.features, self.testbed.original_dataset.features + ) + + # Apply and store + self.testbed.dataset = self.cachedop(self.testbed.dataset, columns=["label"]) + + # The dataset should have changed + self.assertNotEqual( + self.testbed.dataset.features, self.testbed.original_dataset.features + ) + + # It should contain the special cache key + self.assertTrue("cache" in self.testbed.dataset.features) + + # The interaction tape should contain the history of this operation + self.assertTrue( + self.testbed.dataset.fetch_tape(path=["cachedoperations"]).history + == {(self.cachedop.identifier, "label"): 0} + ) + + # Retrieve the information that was stored using the instance + self.assertEqual( + self.cachedop.retrieve(self.testbed.dataset[:], columns=["label"]), + {"label": [3.14, 3.14, 10.14, 10.14, 3.14, 3.14]}, + ) + + # Retrieve the information that was stored using the CachedOperation class, + # and an identifier + self.assertEqual( + CachedOperation.retrieve( + self.testbed.dataset[:], + columns=["label"], + identifier=self.cachedop.identifier, + ), + {"label": [3.14, 3.14, 10.14, 10.14, 3.14, 3.14]}, + ) + + # Retrieve the information that was stored using the CachedOperation class: + # fails without the identifier + with self.assertRaises(ValueError): + CachedOperation.retrieve(self.testbed.dataset[:], columns=["label"]) + + # Retrieve the information that was stored, and process it with a function + self.assertEqual( + self.cachedop.retrieve( + self.testbed.dataset[:], + columns=["label"], + proc_fns=lambda decoded_batch: [x + 0.01 for x in decoded_batch], + ), + {"label": [3.15, 3.15, 10.15, 10.15, 3.15, 3.15]}, + ) + + def test_multiple_calls(self): + # Apply to multiple columns of the dataset directly: fails since the function + # requires single column + with self.assertRaises(AssertionError): + self.cachedop(self.testbed.dataset, columns=["label", "fast"]) + + # Create an additional integer column in the dataset + dataset = self.testbed.dataset.map(lambda x: {"otherlabel": x["label"] + 1}) + + # Apply to multiple columns of the dataset in sequence + dataset_0_0 = self.cachedop(dataset, columns=["label"]) + dataset_0_1 = self.cachedop(dataset_0_0, columns=["z"]) + + # Check that the cache is populated with the processed columns + self.assertTrue( + "label" in dataset_0_0.features["cache"][str(self.cachedop.identifier)] + and "z" not in dataset_0_0.features["cache"][str(self.cachedop.identifier)] + ) + self.assertTrue( + "label" in dataset_0_1.features["cache"][str(self.cachedop.identifier)] + and "z" in dataset_0_1.features["cache"][str(self.cachedop.identifier)] + ) + + # Apply to multiple columns of the dataset, in reverse order + dataset_1_0 = self.cachedop(dataset, columns=["z"]) + dataset_1_1 = self.cachedop(dataset_1_0, columns=["label"]) + + # Check that the cache is populated with the processed columns + self.assertTrue( + "label" not in dataset_1_0.features["cache"][str(self.cachedop.identifier)] + and "z" in dataset_1_0.features["cache"][str(self.cachedop.identifier)] + ) + self.assertTrue( + "label" in dataset_1_1.features["cache"][str(self.cachedop.identifier)] + and "z" in dataset_1_1.features["cache"][str(self.cachedop.identifier)] + ) + + # Retrieving information fails if the columns are passed together in a single + # list + with self.assertRaises(KeyError) as context: + self.cachedop.retrieve(dataset_1_1[:], columns=["label", "z"]) + print("Fails:", str(context.exception)) + + # Retrieving information succeeds when the columns are passed separately + self.assertTrue( + self.cachedop.retrieve(dataset_1_1[:], columns=[["label"], ["z"]]), + { + "label": [3.14, 3.14, 10.14, 10.14, 3.14, 3.14], + "z": [10.14, 3.14, 10.14, 3.14, 10.14, 3.14], + }, + ) + + def test_multicolumn(self): + # Apply the multi-column cached operation + dataset = self.multicol_cachedop(self.testbed.dataset, columns=["label", "z"]) + + # Check that caching happens and that the cached values are correct + self.assertTrue( + strings_as_json(["label", "z"]) + in dataset.features["cache"][str(self.multicol_cachedop.identifier)] + ) + self.assertEqual( + self.multicol_cachedop.retrieve(dataset[:], columns=["label", "z"]), + {'["label", "z"]': [0.3, 0.0, 0.4, 0.1, 0.3, 0.0]}, + ) + + # Apply the single-column cached operation + dataset = self.cachedop(dataset, columns=["label"]) + dataset = self.cachedop(dataset, columns=["z"]) + + # Now recheck that everything can be retrieved correctly + self.assertTrue( + strings_as_json(["label", "z"]) + in dataset.features["cache"][str(self.multicol_cachedop.identifier)] + ) + self.assertEqual( + self.multicol_cachedop.retrieve(dataset[:], columns=["label", "z"]), + {'["label", "z"]': [0.3, 0.0, 0.4, 0.1, 0.3, 0.0]}, + ) + self.assertEqual( + self.cachedop.retrieve(dataset[:], columns=["label"]), + {"label": [3.14, 3.14, 10.14, 10.14, 3.14, 3.14]}, + ) + self.assertEqual( + self.cachedop.retrieve(dataset[:], columns=["z"]), + {"z": [10.14, 3.14, 10.14, 3.14, 10.14, 3.14]}, + ) diff --git a/tests/core/test_dataset.py b/tests/core/test_dataset.py new file mode 100644 index 00000000..074886d1 --- /dev/null +++ b/tests/core/test_dataset.py @@ -0,0 +1,171 @@ +"""Unittests for Datasets.""" +import os +import shutil +from unittest import TestCase + +import jsonlines + +from robustnessgym.core.dataset import Dataset, transpose_batch +from robustnessgym.core.identifier import Identifier +from tests.testbeds import MockTestBedv0 + + +class TestDataset(TestCase): + def setUp(self): + # Arrange + self.testbed = MockTestBedv0() + + def test_from_batch(self): + # Build a dataset from a batch + dataset = Dataset.from_batch( + { + "a": [1, 2, 3], + "b": [True, False, True], + "c": ["x", "y", "z"], + "d": [{"e": 2}, {"e": 3}, {"e": 4}], + }, + identifier=Identifier(_name="MyDataset"), + ) + + self.assertEqual(set(dataset.column_names), {"a", "b", "c", "d", "index"}) + self.assertEqual(len(dataset), 3) + + def test_from_batches(self): + # Build a dataset from multiple batches + dataset = Dataset.from_batches( + [ + { + "a": [1, 2, 3], + "b": [True, False, True], + "c": ["x", "y", "z"], + "d": [{"e": 2}, {"e": 3}, {"e": 4}], + } + ] + * 3, + identifier=Identifier(_name="MyDataset"), + ) + + self.assertEqual(set(dataset.column_names), {"a", "b", "c", "d", "index"}) + self.assertEqual(len(dataset), 9) + + def test_from_json(self): + # Create a temporary directory + os.mkdir("tmp") + + # Create a json file with data + with jsonlines.open("tmp/data.jsonl", "w") as writer: + writer.write_all( + transpose_batch( + { + "a": [1, 2, 3], + "b": [True, False, True], + "c": ["x", "y", "z"], + "d": [{"e": 2}, {"e": 3}, {"e": 4}], + } + ) + ) + + # Load the dataset + dataset = Dataset.from_json( + json_path="tmp/data.jsonl", + identifier=Identifier(_name="MockJSONDataset"), + ) + + self.assertEqual(set(dataset.column_names), {"a", "b", "c", "d", "index"}) + self.assertEqual(len(dataset), 3) + + # Remove the temporary directory + shutil.rmtree("tmp") + + def test_save_load(self): + # Create a temporary directory + os.mkdir("tmp") + + # Save the dataset to disk + self.testbed.dataset.save(path="tmp") + + # Load the dataset from disk + dataset = Dataset.load(path="tmp") + + # Remove the temporary directory + shutil.rmtree("tmp") + + self.assertEqual(dataset.features, self.testbed.dataset.features) + + def test_map(self): + # Map over the dataset + dataset = self.testbed.dataset.map(lambda x: {"otherlabel": x["label"] + 1}) + self.assertTrue("otherlabel" in dataset.column_names) + self.assertEqual(dataset["otherlabel"], [1, 1, 2, 2, 1, 1]) + + def test_batch(self): + # Check that we can make batches of different sizes + self.assertEqual(len(list(self.testbed.dataset.batch(4))), 2) + self.assertEqual(len(list(self.testbed.dataset.batch(3))), 2) + self.assertEqual(len(list(self.testbed.dataset.batch(2))), 3) + self.assertEqual(len(list(self.testbed.dataset.batch(1))), 6) + + # Check that batches of 2 are correct + self.assertEqual( + list(self.testbed.dataset.batch(2)), + [ + { + "fast": [False, True], + "index": ["0", "1"], + "label": [0, 0], + "metadata": [{"source": "real"}, {"source": "real"}], + "text": ["The man is walking.", "The man is running."], + "z": [1, 0], + }, + { + "fast": [True, False], + "index": ["2", "3"], + "label": [1, 1], + "metadata": [{"source": "real"}, {"source": "real"}], + "text": ["The woman is sprinting.", "The woman is resting."], + "z": [1, 0], + }, + { + "fast": [False, False], + "index": ["4", "5"], + "label": [0, 0], + "metadata": [{"source": "fictional"}, {"source": "fictional"}], + "text": ["The hobbit is flying.", "The hobbit is swimming."], + "z": [1, 0], + }, + ], + ) + + def test_chain(self): + # Chain the dataset with itself + dataset = Dataset.chain( + [self.testbed.dataset, self.testbed.dataset], + identifier=Identifier(_name="MockChainedDataset"), + ) + + # Check that the elements match up + for i, x in enumerate(dataset): + self.assertEqual(x, self.testbed.dataset[i % len(self.testbed.dataset)]) + + self.assertEqual(len(dataset), len(self.testbed.dataset) * 2) + + def test_interleave(self): + # Interleave the dataset with itself + dataset = Dataset.interleave( + [self.testbed.dataset, self.testbed.dataset], + identifier=Identifier(_name="MockInterleavedDataset"), + ) + + # Check that the elements match up + for i, x in enumerate(dataset): + self.assertEqual(x, self.testbed.dataset[i // 2]) + + self.assertEqual(len(dataset), len(self.testbed.dataset) * 2) + + def test_load_dataset(self): + # Load the first 20 examples of the boolq dataset + dataset = Dataset.load_dataset("boolq", split="train[:20]") + + # Check that we got 20 examples + self.assertTrue(isinstance(dataset, Dataset)) + self.assertEqual(len(dataset), 20) diff --git a/tests/core/test_decorators.py b/tests/core/test_decorators.py new file mode 100644 index 00000000..5cf76ce7 --- /dev/null +++ b/tests/core/test_decorators.py @@ -0,0 +1,15 @@ +"""Unittests for decorators.""" +from unittest import TestCase + +from robustnessgym.core.decorators import singlecolumn + + +class TestDecorators(TestCase): + def test_singlecolumn(self): + @singlecolumn + def apply(self, batch, columns): + print(columns) + + apply(None, None, ["abc"]) + with self.assertRaises(AssertionError): + apply(None, None, ["abc", "bcd"]) diff --git a/tests/core/test_identifier.py b/tests/core/test_identifier.py new file mode 100644 index 00000000..3c8dee65 --- /dev/null +++ b/tests/core/test_identifier.py @@ -0,0 +1,75 @@ +"""Unittests for Identifiers.""" +from unittest import TestCase + +from robustnessgym.core.identifier import Identifier + + +class TestIdentifier(TestCase): + def setUp(self): + self.min_identifier = Identifier(_name="MyIdentifier") + self.identifier = Identifier( + _name="MyIdentifier", _index=1, param="a", param_2="b" + ) + + def test_init(self): + # Create a simple identifier with a name + identifier = Identifier(_name="MyIdentifier") + self.assertEqual(str(identifier), "MyIdentifier") + + # Create an identifier with a string index + identifier = Identifier(_name="MyIdentifier", _index="abc") + self.assertEqual(str(identifier), "MyIdentifier-abc") + + # Create an identifier with an integer index + identifier = Identifier(_name="MyIdentifier", _index=1) + self.assertEqual(str(identifier), "MyIdentifier-1") + + # Create an identifier with an integer index and two parameters + identifier = Identifier(_name="MyIdentifier", _index=1, param="a", param_2="b") + self.assertEqual(str(identifier), "MyIdentifier-1(param=a, param_2=b)") + + def test_name(self): + # Check the name of the identifier + self.assertEqual(self.identifier.name, "MyIdentifier") + self.assertEqual(self.min_identifier.name, "MyIdentifier") + + def test_index(self): + # Check the index of the identifier + self.assertEqual(self.identifier.index, "1") + self.assertEqual(self.min_identifier.index, None) + + def test_parameters(self): + # Check the parameters of the identifier + self.assertEqual(self.identifier.parameters, {"param": "a", "param_2": "b"}) + self.assertEqual(self.min_identifier.parameters, {}) + + def test_range(self): + # Use the range function to create multiple identifiers + identifiers = Identifier.range(3, _name="MyIdentifier", param="a", param_2="b") + for i, identifier in enumerate(identifiers): + self.assertEqual(identifier, f"MyIdentifier-{i + 1}(param=a, param_2=b)") + + def test_eq(self): + # Two identifiers created with the same arguments should be equal + identifier = Identifier(_name="MyIdentifier", _index=1, param="a", param_2="b") + self.assertEqual(self.identifier, identifier) + self.assertNotEqual(self.min_identifier, identifier) + + # But not two identifiers created with different arguments + identifier = Identifier(_name="MyIdentifier", _index=2, param="a", param_2="b") + self.assertNotEqual(self.identifier, identifier) + self.assertNotEqual(self.min_identifier, identifier) + + def test_dumps(self): + # Dump the identifier to a json + self.assertEqual( + self.identifier.dumps(), + '{"_name": "MyIdentifier", "_index": "1", "_parameters": {"param": "a", ' + '"param_2": "b"}}', + ) + + def test_loads(self): + # Dump the identifier to a json string and load it back + s = self.identifier.dumps() + identifier = Identifier.loads(s) + self.assertEqual(identifier, self.identifier) diff --git a/tests/core/test_report.py b/tests/core/test_report.py new file mode 100644 index 00000000..486477ab --- /dev/null +++ b/tests/core/test_report.py @@ -0,0 +1,314 @@ +"""Unittests for Report.""" +from unittest import TestCase, skip + +import pandas as pd + +from robustnessgym.core.report import ( + ClassDistributionColumn, + NumericColumn, + Report, + ScoreColumn, +) + + +class TestReport(TestCase): + def setUp(self): + self.cols = [ + ScoreColumn("f1", 0, 1, is_0_to_1=True), + ScoreColumn("perplexity", 0, 50), + ClassDistributionColumn("Class Dist", ["e", "n", "c"]), + NumericColumn("Size"), + ] + self.data = pd.DataFrame( + [ + ["Cat A", "Slice C", 0.1, 5, [0.1, 0.2, 0.7], 300], + ["Cat C", "Slice A", 0.2, 10, [0.4, 0.2, 0.4], 3], + ["Cat A", "Slice A", 0.3, 15, [0.1, 0, 0.9], 5000], + ["Cat B", "Slice B", 0.4, 20, [0.5, 0.4, 0.1], 812], + ["Cat B", "Slice D", 0.5, 25, [0.3, 0.2, 0.5], 13312], + ] + ) + self.model_name = "BERT" + self.dataset_name = "SNLI" + + def test_init(self): + # Create a basic report + report = Report( + self.data, + self.cols, + model_name=self.model_name, + dataset_name=self.dataset_name, + ) + self.assertTrue(self.data.equals(report.data)) + + # Pass config params + custom_color_scheme = ["#000000"] + report = Report( + self.data, + self.cols, + model_name=self.model_name, + dataset_name=self.dataset_name, + color_scheme=custom_color_scheme, + ) + self.assertEqual(custom_color_scheme, report.config["color_scheme"]) + + def test_sort(self): + # Sort alphabetically + report = Report( + self.data, + self.cols, + model_name=self.model_name, + dataset_name=self.dataset_name, + ) + report.sort() + actual = report.data + expected = pd.DataFrame( + [ + ["Cat A", "Slice A", 0.3, 15, [0.1, 0, 0.9], 5000], + ["Cat A", "Slice C", 0.1, 5, [0.1, 0.2, 0.7], 300], + ["Cat B", "Slice B", 0.4, 20, [0.5, 0.4, 0.1], 812], + ["Cat B", "Slice D", 0.5, 25, [0.3, 0.2, 0.5], 13312], + ["Cat C", "Slice A", 0.2, 10, [0.4, 0.2, 0.4], 3], + ] + ) + self.assertTrue(actual.equals(expected)) + + # Sort by specified category order + report = Report( + self.data, + self.cols, + model_name=self.model_name, + dataset_name=self.dataset_name, + ) + report.sort( + category_order={ + "Cat B": 0, + "Cat C": 2, + "Cat A": 1, + } + ) + actual = report.data + expected = pd.DataFrame( + [ + ["Cat B", "Slice B", 0.4, 20, [0.5, 0.4, 0.1], 812], + ["Cat B", "Slice D", 0.5, 25, [0.3, 0.2, 0.5], 13312], + ["Cat A", "Slice A", 0.3, 15, [0.1, 0, 0.9], 5000], + ["Cat A", "Slice C", 0.1, 5, [0.1, 0.2, 0.7], 300], + ["Cat C", "Slice A", 0.2, 10, [0.4, 0.2, 0.4], 3], + ] + ) + self.assertTrue(actual.equals(expected)) + + # Sort by specified slice order + report = Report( + self.data, + self.cols, + model_name=self.model_name, + dataset_name=self.dataset_name, + ) + report.sort( + slice_order={"Slice D": 0, "Slice C": 1, "Slice B": 2, "Slice A": 3} + ) + actual = report.data + expected = pd.DataFrame( + [ + ["Cat A", "Slice C", 0.1, 5, [0.1, 0.2, 0.7], 300], + ["Cat A", "Slice A", 0.3, 15, [0.1, 0, 0.9], 5000], + ["Cat B", "Slice D", 0.5, 25, [0.3, 0.2, 0.5], 13312], + ["Cat B", "Slice B", 0.4, 20, [0.5, 0.4, 0.1], 812], + ["Cat C", "Slice A", 0.2, 10, [0.4, 0.2, 0.4], 3], + ] + ) + self.assertTrue(actual.equals(expected)) + + # Sort by specified category order and slice order + report = Report( + self.data, + self.cols, + model_name=self.model_name, + dataset_name=self.dataset_name, + ) + report.sort( + category_order={ + "Cat B": 0, + "Cat C": 2, + "Cat A": 1, + }, + slice_order={"Slice D": 0, "Slice C": 1, "Slice B": 2, "Slice A": 3}, + ) + actual = report.data + expected = pd.DataFrame( + [ + ["Cat B", "Slice D", 0.5, 25, [0.3, 0.2, 0.5], 13312], + ["Cat B", "Slice B", 0.4, 20, [0.5, 0.4, 0.1], 812], + ["Cat A", "Slice C", 0.1, 5, [0.1, 0.2, 0.7], 300], + ["Cat A", "Slice A", 0.3, 15, [0.1, 0, 0.9], 5000], + ["Cat C", "Slice A", 0.2, 10, [0.4, 0.2, 0.4], 3], + ] + ) + self.assertTrue(actual.equals(expected)) + + def test_filter(self): + # Filter by category + report = Report( + self.data, + self.cols, + model_name=self.model_name, + dataset_name=self.dataset_name, + ) + report.filter(categories=["Cat B"]) + actual = report.data + expected = pd.DataFrame( + [ + ["Cat B", "Slice B", 0.4, 20, [0.5, 0.4, 0.1], 812], + ["Cat B", "Slice D", 0.5, 25, [0.3, 0.2, 0.5], 13312], + ] + ) + self.assertTrue(actual.equals(expected)) + + # Filter by slice + report = Report( + self.data, + self.cols, + model_name=self.model_name, + dataset_name=self.dataset_name, + ) + report.filter(slices=["Slice A", "Slice C"]) + actual = report.data + expected = pd.DataFrame( + [ + ["Cat A", "Slice C", 0.1, 5, [0.1, 0.2, 0.7], 300], + ["Cat C", "Slice A", 0.2, 10, [0.4, 0.2, 0.4], 3], + ["Cat A", "Slice A", 0.3, 15, [0.1, 0, 0.9], 5000], + ] + ) + self.assertTrue(actual.equals(expected)) + + def test_rename(self): + report = Report( + self.data, + self.cols, + model_name=self.model_name, + dataset_name=self.dataset_name, + ) + category_map = {"Cat C": "Cat D"} + slice_map = {"Slice A": "Slice D"} + report.rename(category_map=category_map, slice_map=slice_map) + actual = report.data + expected = pd.DataFrame( + [ + ["Cat A", "Slice C", 0.1, 5, [0.1, 0.2, 0.7], 300], + ["Cat D", "Slice D", 0.2, 10, [0.4, 0.2, 0.4], 3], + ["Cat A", "Slice D", 0.3, 15, [0.1, 0, 0.9], 5000], + ["Cat B", "Slice B", 0.4, 20, [0.5, 0.4, 0.1], 812], + ["Cat B", "Slice D", 0.5, 25, [0.3, 0.2, 0.5], 13312], + ] + ) + self.assertTrue(actual.equals(expected)) + + def test_set_class_codes(self): + report = Report( + self.data, + self.cols, + model_name=self.model_name, + dataset_name=self.dataset_name, + ) + custom_class_codes = ["A", "B", "C"] + report.set_class_codes(custom_class_codes) + for col in report.columns: + if isinstance(col, ClassDistributionColumn): + self.assertEqual(col.class_codes, custom_class_codes) + + def test_set_range(self): + report = Report( + self.data, + self.cols, + model_name=self.model_name, + dataset_name=self.dataset_name, + ) + report.set_range("f1", 0.1, 0.3) + for col in report.columns: + if col.title == "f1": + self.assertEqual((col.min_val, col.max_val), (0.1, 0.3)) + + def test_figure(self): + report = Report( + self.data, + self.cols, + model_name=self.model_name, + dataset_name=self.dataset_name, + ) + + # Original unsorted data should cause an error + self.assertRaises(ValueError, report.figure) + + # Sort should resolve that error + report.sort() + try: + report.figure() + except ValueError: + self.fail("report.figure() raised ValueError unexpectedly!") + + @skip("Manual test") + def test_display(self): + report = Report( + self.data, + self.cols, + model_name=self.model_name, + dataset_name=self.dataset_name, + ) + + report.sort() + figure = report.figure() + figure.show() + + report.sort(category_order={"Cat C": 1, "Cat A": 2, "Cat B": 3}) + report.rename(slice_map={"Slice A": "A"}, category_map={"Cat B": "B"}) + report.filter(slices=["A", "Slice B", "Slice C"]) + report.set_range("f1", 0.05, 0.45) + report.update_config(font_size_heading=16) + figure = report.figure(show_title=True) + figure.show() + + @skip("Manual test") + def test_display_2(self): + data = pd.DataFrame( + [ + [ + "Eval", + "snli1", + 0.8799999952316284, + 0.876409113407135, + [0.368, 0.304, 0.328], + [0.344, 0.288, 0.368], + 125, + ], + [ + "Eval", + "snli2", + 0.8799999952316284, + 0.876409113407135, + [0.368, 0.304, 0.328], + [0.344, 0.288, 0.368], + 125, + ], + [ + "Eval", + "snli3", + 0.8799999952316284, + 0.876409113407135, + [0.368, 0.304, 0.328], + [0.344, 0.288, 0.368], + 125, + ], + ] + ) + cols = [ + ScoreColumn("F1", min_val=0, max_val=1, is_0_to_1=True), + ScoreColumn("Accuracy", min_val=0, max_val=1, is_0_to_1=True), + ClassDistributionColumn("Class Dist", ["e", "n", "c"]), + ClassDistributionColumn("Pred Dist", ["e", "n", "c"]), + NumericColumn("Size"), + ] + report = Report(data, cols) + report.figure().show() diff --git a/tests/core/test_slice.py b/tests/core/test_slice.py new file mode 100644 index 00000000..dad384b1 --- /dev/null +++ b/tests/core/test_slice.py @@ -0,0 +1,22 @@ +"""Unittests for Slices.""" +from unittest import TestCase + +from robustnessgym.core.slice import Slice +from tests.testbeds import MockTestBedv0 + + +class TestSlice(TestCase): + def setUp(self): + self.testbed = MockTestBedv0() + + def test_from_dataset(self): + # Create a slice + sl = Slice.from_dataset(self.testbed.dataset) + # Compare the slice identifier + self.assertEqual( + str(sl), "Slice[category: curated, num_rows: 6](MockDataset(version=1.0))" + ) + # Length of the slice + self.assertEqual(len(sl), 6) + # Lineage of the slice + self.assertEqual(sl.lineage, [("Dataset", "MockDataset(version=1.0)")]) diff --git a/tests/core/test_testbench.py b/tests/core/test_testbench.py new file mode 100644 index 00000000..b654f778 --- /dev/null +++ b/tests/core/test_testbench.py @@ -0,0 +1,188 @@ +"""Unittests for TestBench.""" + +import functools +from unittest import TestCase, skip + +import torch + +from robustnessgym import Dataset, Slice, Task, TestBench +from robustnessgym.core.model import Model + + +class TestTestbench(TestCase): + # TODO add NLG test cases + + @skip("Long-running test") + def test_evaluate(self): + + # Test evaluate with a task + task = self._get_task() + testbench = self._get_testbench(task) + model = self._get_model(task=task) + testbench.evaluate( + model=model, + coerce_fn=functools.partial(Model.remap_labels, label_map=[1, 2, 0]), + ) + self.assertIn(model.identifier, testbench.metrics) + self.assertSetEqual( + set(testbench.metrics[model.identifier].keys()), + set(sl.identifier for sl in testbench.slices), + ) + for sl in testbench.slices: + eval_dict = testbench.metrics[model.identifier][sl.identifier] + self.assertSetEqual(set(eval_dict.keys()), set(testbench.task.metrics)) + for value in eval_dict.values(): + self.assertIsNotNone(value) + + # Test evaluate without a task + + testbench = self._get_testbench() + model = self._get_model(is_classifier=True) + # Check that it raises exception if input_columns, + # output_columns not specified in absence of task + self.assertRaises( + ValueError, + testbench.evaluate, + model=model, + coerce_fn=functools.partial(Model.remap_labels, label_map=[1, 2, 0]), + ) + + task = self._get_task() + testbench = self._get_testbench(task=task) + model = self._get_model(task=task) + testbench.evaluate( + model=model, + coerce_fn=functools.partial(Model.remap_labels, label_map=[1, 2, 0]), + input_columns=["sentence1", "sentence2"], + output_columns=["label"], + ) + self.assertIn(model.identifier, testbench.metrics) + self.assertSetEqual( + set(testbench.metrics[model.identifier].keys()), + set(sl.identifier for sl in testbench.slices), + ) + for sl in testbench.slices: + eval_dict = testbench.metrics[model.identifier][sl.identifier] + self.assertSetEqual(set(eval_dict.keys()), set(testbench.task.metrics)) + for value in eval_dict.values(): + self.assertIsNotNone(value) + + @skip("Long-running test") + def test_add_metrics(self): + testbench = self._get_testbench() + metrics = {"snli_1": {"f1": 0.1, "accuracy": 0.3}} + testbench.add_metrics("bert", metrics) + self.assertEqual(testbench.metrics["bert"], metrics) + + @skip("Long-running test") + def test_add_predictions(self): + model = "bert-base" + task = self._get_task() + testbench = self._get_testbench(task) + torch.manual_seed(1) + predictions = {} + for sl in testbench.slices: + predictions[sl.identifier] = torch.randint(high=3, size=(len(sl),)) + + testbench.add_predictions(model=model, predictions=predictions) + + self.assertIn(model, testbench.metrics) + self.assertSetEqual( + set(testbench.metrics[model].keys()), + set(sl.identifier for sl in testbench.slices), + ) + for sl in testbench.slices: + eval_dict = testbench.metrics[model][sl.identifier] + self.assertSetEqual(set(eval_dict.keys()), set(testbench.task.metrics)) + for value in eval_dict.values(): + self.assertIsNotNone(value) + + @skip("Long-running test") + def test_create_report(self): + task = self._get_task() + testbench = self._get_testbench(task=task) + model = self._get_model(task=task) + testbench.evaluate( + model=model, + coerce_fn=functools.partial(Model.remap_labels, label_map=[1, 2, 0]), + ) + report = testbench.create_report(model) + fig = report.figure() + self.assertIsNotNone(fig) + + @skip("Manual test") + def test_display_report(self): + + # # Create report using 'evaluate' + task = self._get_task() + testbench = self._get_testbench(task=task) + model = self._get_model(task=task) + testbench.evaluate( + model=model, + coerce_fn=functools.partial(Model.remap_labels, label_map=[1, 2, 0]), + ) + report = testbench.create_report(model) + fig = report.figure() + fig.show() + + # Create report using add_predictions + task = self._get_task() + testbench = self._get_testbench(task) + torch.manual_seed(1) + predictions = {} + for sl in testbench.slices: + predictions[sl.identifier] = torch.randint(high=3, size=(len(sl),)) + testbench.add_predictions(model="bert-base", predictions=predictions) + report = testbench.create_report("bert-base") + fig = report.figure() + fig.show() + + # Create report using add_metrics + testbench = self._get_testbench() + metrics = { + "snli_1": {"f1": 0.1, "accuracy": 0.1}, + "snli_2": {"f1": 0.5, "accuracy": 0.5}, + "snli_3": {"f1": 0.9, "accuracy": 0.4}, + } + testbench.add_metrics(model, metrics) + report = testbench.create_report(model, metric_ids=["f1", "accuracy"]) + fig = report.figure() + fig.show() + + def _get_task(self): + # Create task + task_identifier = "TernaryNaturalLanguageInference" + task = Task.create(task=task_identifier) + return task + + def _get_model(self, **kwargs): + # TODO have a proper mock model + # Create model + model_identifier = "textattack/bert-base-uncased-snli" + model = Model.huggingface(identifier=model_identifier, **kwargs) + return model + + def _get_testbench(self, task=None): + # TODO Have a proper mock testbench + # Create test bench + testbench_identifier = "test-testbench" + testbench = TestBench( + identifier=testbench_identifier, + task=task, + slices=[ + Slice( + dataset=Dataset.load_dataset("snli", split="train[:128]"), + identifier="snli_1", + ).filter(lambda example: example["label"] != -1), + Slice( + dataset=Dataset.load_dataset("snli", split="validation[:128]"), + identifier="snli_2", + ).filter(lambda example: example["label"] != -1), + Slice( + dataset=Dataset.load_dataset("snli", split="test[:128]"), + identifier="snli_3", + ).filter(lambda example: example["label"] != -1), + ], + dataset_id="snli", + ) + return testbench diff --git a/tests/core/test_tools.py b/tests/core/test_tools.py new file mode 100644 index 00000000..a462a12f --- /dev/null +++ b/tests/core/test_tools.py @@ -0,0 +1,31 @@ +from unittest import TestCase + +from robustnessgym.core.tools import recmerge + + +class TestTools(TestCase): + def test_recmerge(self): + output = recmerge( + {"a": 2, "b": 3, "d": {"e": [1, 2, 3], "f": [3, 4, 5]}, "g": 17}, + {"b": 12, "d": {"e": [1, 2, 3], "f": [3, 4]}}, + {"a": 4, "d": {"f": [3]}}, + ) + self.assertEqual( + output, {"a": 4, "b": 12, "d": {"e": [1, 2, 3], "f": [3]}, "g": 17} + ) + + output = recmerge( + {"a": 2, "b": 3, "d": {"e": [1, 2, 3], "f": [3, 4, 5]}, "g": 17}, + {"b": 12, "d": {"e": [1, 2, 3], "f": [3, 4]}}, + {"a": 4, "d": {"f": [3]}}, + merge_sequences=True, + ) + self.assertEqual( + output, + { + "a": 4, + "b": 12, + "d": {"e": [1, 2, 3, 1, 2, 3], "f": [3, 4, 5, 3, 4, 3]}, + "g": 17, + }, + ) diff --git a/tests/slicebuilders/__init__.py b/tests/slicebuilders/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/tests/slicebuilders/subpopulations/__init__.py b/tests/slicebuilders/subpopulations/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/tests/slicebuilders/subpopulations/test_constituency_overlap.py b/tests/slicebuilders/subpopulations/test_constituency_overlap.py new file mode 100644 index 00000000..eb0b154f --- /dev/null +++ b/tests/slicebuilders/subpopulations/test_constituency_overlap.py @@ -0,0 +1,52 @@ +from unittest import TestCase + +import numpy as np + +from robustnessgym.cachedops.allen.constituency_parser import AllenConstituencyParser +from robustnessgym.slicebuilders.subpopulations.constituency_overlap import ( + ConstituencyOverlapSubpopulation, + ConstituencySubtreeSubpopulation, + FuzzyConstituencySubtreeSubpopulation, +) +from tests.testbeds import MockTestBedv1 + + +class TestConstituencyOverlap(TestCase): + testbed = MockTestBedv1() + acp = AllenConstituencyParser() + testbed.dataset = acp(testbed.dataset, columns=["text_a"]) + testbed.dataset = acp(testbed.dataset, columns=["text_b"]) + + def test_has_constituency_overlap(self): + # Create the constituency overlap subpopulation + cos = ConstituencyOverlapSubpopulation( + intervals=[(0, 20), (20, 40), (40, 60), (60, 80), (80, 100)] + ) + self.assertTrue( + np.allclose( + cos.score(self.testbed.dataset[:], columns=["text_a", "text_b"]), + [100] * 4, + ) + ) + + def test_has_constituency_subtree(self): + # Create the constituency subtree subpopulation + css = ConstituencySubtreeSubpopulation() + self.assertTrue( + np.allclose( + css.score(self.testbed.dataset[:], columns=["text_a", "text_b"]), + [1] * 4, + ) + ) + + def test_has_fuzzy_constituency_subtree(self): + # Create the fuzzy constituency subtree subpopulation + fcss = FuzzyConstituencySubtreeSubpopulation( + intervals=[(0, 20), (20, 40), (40, 60), (60, 80), (80, 100)] + ) + self.assertTrue( + np.allclose( + fcss.score(self.testbed.dataset[:], columns=["text_a", "text_b"]), + [100] * 4, + ) + ) diff --git a/tests/slicebuilders/subpopulations/test_length.py b/tests/slicebuilders/subpopulations/test_length.py new file mode 100644 index 00000000..af25268d --- /dev/null +++ b/tests/slicebuilders/subpopulations/test_length.py @@ -0,0 +1,27 @@ +from unittest import TestCase + +import numpy as np + +from robustnessgym.cachedops.spacy import Spacy +from robustnessgym.slicebuilders.subpopulations.length import LengthSubpopulation +from tests.testbeds import MockTestBedv0 + + +class TestLengthSubpopulation(TestCase): + def setUp(self): + self.testbed = MockTestBedv0() + self.testbed.dataset = Spacy()(self.testbed.dataset, columns=["text"]) + + def test_score(self): + # Create the length subpopulation + length = LengthSubpopulation(intervals=[(1, 3), (4, 5)]) + + # Compute scores + scores = length.score(self.testbed.dataset[:], columns=["text"]) + self.assertTrue(np.allclose(scores, np.array([5, 5, 5, 5, 5, 5]))) + + # Apply the subpopulation + dataset, slices, slice_matrix = length(self.testbed.dataset, columns=["text"]) + + # Check that the slice membership lines up + self.assertTrue(np.allclose(slice_matrix, np.array([[0, 1]] * 6))) diff --git a/tests/slicebuilders/transformations/__init__.py b/tests/slicebuilders/transformations/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/tests/slicebuilders/transformations/test_eda.py b/tests/slicebuilders/transformations/test_eda.py new file mode 100644 index 00000000..641686cb --- /dev/null +++ b/tests/slicebuilders/transformations/test_eda.py @@ -0,0 +1,60 @@ +"""Unittests for the EasyDataAugmentation class.""" +import random +import unittest +from unittest import TestCase + +import numpy as np + +from robustnessgym.slicebuilders.transformations.eda import EasyDataAugmentation +from tests.testbeds import MockTestBedv0 + + +@unittest.skip("Unable to control EDA randomness.") +class TestEasyDataAugmentation(TestCase): + def setUp(self): + self.testbed = MockTestBedv0() + + def test_apply(self): + # Create the EDA SliceBuilder + eda = EasyDataAugmentation(num_transformed=3) + + # Set the seed + random.seed(0) + np.random.seed(0) + + for i, identifier in enumerate(eda.identifiers): + self.assertEqual( + str(identifier), + f"EasyDataAugmentation-{i + 1}(alpha_sr=0.1, alpha_ri=0.1, " + f"alpha_rs=0.1, p_rd=0.1)", + ) + + # Apply it + dataset, slices, slice_membership = eda(self.testbed.dataset, columns=["text"]) + + # All the sizes match up + self.assertEqual(len(dataset), len(self.testbed.dataset)) + for sl in slices: + self.assertEqual(len(sl), len(self.testbed.dataset)) + self.assertEqual(slice_membership.shape, (6, 3)) + + # Everything was transformed + self.assertTrue(np.all(slice_membership)) + + # Checking that the transformed text matches + self.assertEqual( + slices[0]["text"], + [ + "the man is military man walking", + "the constitute man is running", + "the woman is sprinting", + "the woman is", + "the hobbit is flying", + "the hobbit is swimming", + ], + ) + + # Dataset interaction history updated correctly + self.assertEqual( + len(dataset.fetch_tape(["slicebuilders", "transformation"]).history), 3 + ) diff --git a/tests/slicebuilders/transformations/test_fairseq.py b/tests/slicebuilders/transformations/test_fairseq.py new file mode 100644 index 00000000..e946dd0b --- /dev/null +++ b/tests/slicebuilders/transformations/test_fairseq.py @@ -0,0 +1,45 @@ +"""Unittests for the FairseqBacktranslation class.""" +import unittest +from unittest import TestCase + +import numpy as np +import torch + +from robustnessgym.slicebuilders.transformations.fairseq import FairseqBacktranslation +from tests.testbeds import MockTestBedv0 + + +@unittest.skip("Downloads fairseq models during CI, which is slow.") +class TestFairseqBacktranslation(TestCase): + def setUp(self): + self.testbed = MockTestBedv0() + + def test_apply(self): + # Set the seed + np.random.seed(0) + torch.random.manual_seed(0) + + # Create the backtranslation transformation + self.backtranslation = FairseqBacktranslation( + n_src2tgt=2, + n_tgt2src=2, + device="cpu", + ) + + # Apply it + dataset, slices, slice_membership = self.backtranslation( + self.testbed.dataset, columns=["text"] + ) + + # Checking that the transformed text matches + self.assertEqual( + slices[0]["text"], + [ + "The man leaves.", + "The man runs.", + "The woman sprints.", + "The Lady rests.", + "The Hobbit is flying.", + "The Hobbit floats.", + ], + ) diff --git a/tests/slicebuilders/transformations/test_nlpaug.py b/tests/slicebuilders/transformations/test_nlpaug.py new file mode 100644 index 00000000..42f8ea0b --- /dev/null +++ b/tests/slicebuilders/transformations/test_nlpaug.py @@ -0,0 +1,65 @@ +"""Unittests for the NlpAugTransformation class.""" +import random +from unittest import TestCase + +import numpy as np +from nlpaug.augmenter.word import SynonymAug +from nlpaug.flow import Sequential + +from robustnessgym.slicebuilders.transformations.nlpaug import NlpAugTransformation +from tests.testbeds import MockTestBedv0 + + +class TestNlpAugTransformation(TestCase): + def setUp(self): + self.testbed = MockTestBedv0() + + def test_apply(self): + # Set the seed + random.seed(0) + np.random.seed(0) + + # Create the nlpaug transformation + nlpaug_transformation = NlpAugTransformation( + pipeline=Sequential(flow=[SynonymAug()]), + num_transformed=3, + ) + + for i, identifier in enumerate(nlpaug_transformation.identifiers): + self.assertEqual( + str(identifier), + f"NlpAugTransformation-{i + 1}(pipeline=[Synonym_Aug(src=wordnet, " + f"action=substitute, method=word)])", + ) + + # Apply it + dataset, slices, slice_membership = nlpaug_transformation( + self.testbed.dataset, columns=["text"] + ) + + # All the sizes match up + self.assertEqual(len(dataset), len(self.testbed.dataset)) + for sl in slices: + self.assertEqual(len(sl), len(self.testbed.dataset)) + self.assertEqual(slice_membership.shape, (6, 3)) + + # Everything was transformed + self.assertTrue(np.all(slice_membership)) + + # Dataset interaction history updated correctly + self.assertEqual( + len(dataset.fetch_tape(["slicebuilders", "transformation"]).history), 3 + ) + + # Checking that the transformed text matches + self.assertEqual( + slices[0]["text"], + [ + "The man is walk.", + "The man be running.", + "The cleaning lady is sprinting.", + "The woman personify resting.", + "The hobbit is fly.", + "The hobbit is swimming.", + ], + ) diff --git a/tests/testbeds.py b/tests/testbeds.py new file mode 100644 index 00000000..c7e5e52e --- /dev/null +++ b/tests/testbeds.py @@ -0,0 +1,87 @@ +"""A collection of simple testbeds to build test cases.""" +from copy import deepcopy + +from robustnessgym.core.dataset import Dataset +from robustnessgym.core.identifier import Identifier + + +class MockTestBedv0: + """Simple mock dataset with 6 examples.""" + + def __init__(self): + # Create a fake dataset + self.dataset = Dataset.from_batch( + { + "text": [ + "The man is walking.", + "The man is running.", + "The woman is sprinting.", + "The woman is resting.", + "The hobbit is flying.", + "The hobbit is swimming.", + ], + "label": [0, 0, 1, 1, 0, 0], + "z": [1, 0, 1, 0, 1, 0], + "fast": [False, True, True, False, False, False], + "metadata": [ + {"source": "real"}, + {"source": "real"}, + {"source": "real"}, + {"source": "real"}, + {"source": "fictional"}, + {"source": "fictional"}, + ], + }, + identifier=Identifier(_name="MockDataset", version="1.0"), + ) + + # Keep a copy of the original + self.original_dataset = deepcopy(self.dataset) + + assert len(self.dataset) == 6 + + def test_attributes(self): + # Both datasets use the same cache files for backing + print(self.dataset.cache_files) + print(self.original_dataset.cache_files) + print(self.dataset.identifier) + + def problems(self): + # FIXME(karan): this shouldn't be happening: why is otherlabel disappearing here + with self.assertRaises(AssertionError): + # Create an additional integer column in the dataset + dataset = self.testbed.dataset.map(lambda x: {"otherlabel": x["label"] + 1}) + dataset_0_0 = self.cachedop(dataset, columns=["label"]) + self.assertTrue("otherlabel" in dataset_0_0.column_names) + + +class MockTestBedv1: + """Simple mock dataset with 4 examples containing pairs of sentences.""" + + def __init__(self): + # Create a fake dataset + self.dataset = Dataset.from_batch( + { + "text_a": [ + "Before the actor slept, the senator ran.", + "The lawyer knew that the judges shouted.", + "If the actor slept, the judge saw the artist.", + "The lawyers resigned, or the artist slept.", + ], + "text_b": [ + "The actor slept.", + "The judges shouted.", + "The actor slept.", + "The artist slept.", + ], + "label": [0, 0, 1, 1], + "z": [1, 0, 1, 0], + "fast": [False, True, True, False], + }, + identifier=Identifier(_name="MockDataset", version="2.0"), + ) + + # Keep a copy of the original + self.original_dataset = deepcopy(self.dataset) + + assert len(self.dataset) == 4