diff --git a/.devcontainer/devcontainer.json b/.devcontainer/devcontainer.json new file mode 100644 index 0000000..be2d7fe --- /dev/null +++ b/.devcontainer/devcontainer.json @@ -0,0 +1,18 @@ +{ // https://aka.ms/devcontainer.json + /* We want to develop on the lowest version of Python this project supports, since + * backwards compatibility issues come up more often than forward compatibility. */ + // https://github.com/devcontainers/images/tree/main/src/python + "image": "mcr.microsoft.com/devcontainers/python:3.9-bookworm", + "features": { + /* `features/common-utils` is already in the python image; it's missing non-free + * packages, but that's only manpages-posix and manpages-posix-dev. */ + // Only `nvm` is installed in the python image, so install node: + "ghcr.io/devcontainers/features/node:1": {}, + // "ghcr.io/devcontainers/features/docker-in-docker:2": {}, + // "ghcr.io/haukex/devcontainer-features/perl:1": {}, + "ghcr.io/lukewiwa/features/shellcheck:0": {}, + "ghcr.io/guiyomh/features/vim:0": {} + }, + "postCreateCommand": ".devcontainer/initialize.sh" +} +/* vim: set filetype=javascript ts=4 sw=4 expandtab : */ diff --git a/.devcontainer/initialize.sh b/.devcontainer/initialize.sh new file mode 100644 index 0000000..ea926f1 --- /dev/null +++ b/.devcontainer/initialize.sh @@ -0,0 +1,16 @@ +#!/bin/bash +set -euxo pipefail +cd -- "$( dirname -- "${BASH_SOURCE[0]}" )"/.. + +# set up venv in $HOME (the /workspaces mount can be slow in some containers) +python_version="$( python -c 'import sys; print(".".join(map(str,sys.version_info[:2])))' )" +venv_dir="$HOME/.venvs/$( basename -- "$PWD" )/.venv$python_version" +python -m venv "$venv_dir" +# shellcheck source=/dev/null +source "$venv_dir/bin/activate" + +make installdeps + +# make sure all files are owned by us (but only if we already own this directory) - sometimes needed on e.g. DevPod +[[ "$(stat --printf="%u" .)" -eq "$(id -u)" ]] && sudo -n chown -Rc "$(id -u)" . +simple-perms -mr . diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml new file mode 100644 index 0000000..54636aa --- /dev/null +++ b/.github/workflows/tests.yml @@ -0,0 +1,55 @@ +# https://docs.github.com/en/actions/using-workflows/workflow-syntax-for-github-actions +name: Full Python Tests, Lint, and Coverage (all versions and OSes) +on: + push: + # only on commits, not on tags + branches: + - '**' + pull_request: +jobs: + tests: + name: CPython ${{ matrix.python-version }} on ${{ matrix.os }} + # Reminder: Keep in sync with dev/local-actions.sh + strategy: + fail-fast: false + matrix: + os: [Ubuntu, Windows, macOS] + # Remember that some tests below only run on one version, so keep that up-to-date. + python-version: ["3.9", "3.10", "3.11", "3.12", "3.13"] + runs-on: ${{ matrix.os }}-latest + steps: + - name: Disable autocrlf on Windows + if: ${{ matrix.os == 'Windows' }} + # https://github.com/actions/checkout/issues/135 + run: git config --global core.autocrlf false + - uses: actions/checkout@v4 + - uses: actions/setup-node@v4 + with: + node-version: 20 + - name: Install pyright + run: npm install --global pyright + - name: Set up Python ${{ matrix.python-version }} + uses: actions/setup-python@v5 + with: + python-version: ${{ matrix.python-version }} + allow-prereleases: true + # https://github.com/actions/setup-python#caching-packages-dependencies + cache: pip + # remember to keep in sync with Makefile: + cache-dependency-path: | + requirements.txt + dev/requirements.txt + - name: Install dependencies + run: make installdeps + - name: Run checks and lint + run: make smoke-checks ver-checks + - name: Run version-independent checks + if: ${{ matrix.python-version == '3.12' }} + run: make other-checks + - name: Run nix-checks and shellcheck on Linux + if: ${{ matrix.os == 'Ubuntu' }} + # Only run nix-checks on Ubuntu because it doesn't work on Windows and bash is too old on macOS. + # Only run shellcheck on Ubuntu because it's only installed there by default. + run: make nix-checks shellcheck + - name: Tests and Coverage + run: make coverage diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..eb39a10 --- /dev/null +++ b/.gitignore @@ -0,0 +1,9 @@ +/.devcontainer/.devpod-internal/ +.venv*/ +__pycache__/ +.mypy_cache/ +.coverage +coverage.xml +htmlcov/ +/dist/ +/*.egg-info/ \ No newline at end of file diff --git a/.vscode/extensions.json b/.vscode/extensions.json new file mode 100644 index 0000000..8299fa3 --- /dev/null +++ b/.vscode/extensions.json @@ -0,0 +1,18 @@ +{ + "recommendations": [ + "ms-vscode.makefile-tools", + "github.vscode-github-actions", + "ms-python.python", + "ms-python.vscode-pylance", + "ms-python.mypy-type-checker", + "ms-python.pylint", + "ms-python.flake8", + "mads-hartmann.bash-ide-vscode", + "timonwong.shellcheck", + "ryanluker.vscode-coverage-gutters", + "oderwat.indent-rainbow", + "tamasfe.even-better-toml", + "streetsidesoftware.code-spell-checker", + ] +} +/* vim: set filetype=javascript ts=4 sw=4 expandtab : */ diff --git a/.vscode/settings.json b/.vscode/settings.json new file mode 100644 index 0000000..bb4a394 --- /dev/null +++ b/.vscode/settings.json @@ -0,0 +1,37 @@ +{ + "files.eol": "\n", + "files.trimTrailingWhitespace": true, + "editor.rulers": [ 150 ], // keep in sync with pyproject.toml + "[markdown]": { + "editor.rulers": [ 100 ], + }, + "cSpell.language": "en,en-US", + "python.testing.pytestEnabled": false, + "python.testing.unittestEnabled": true, + "python.testing.unittestArgs": [ "-v", "-s", "${workspaceFolder}", ], + "pylint.importStrategy": "fromEnvironment", + "pylint.args": [ "--rcfile=${workspaceFolder}/pyproject.toml", ], + "pylint.severity": { + // raised the default severity so they are easier to find + "convention": "Warning", + "refactor": "Warning", + "info": "Warning" + }, + "flake8.importStrategy": "fromEnvironment", + "flake8.args": [ "--toml-config=${workspaceFolder}/pyproject.toml", ], + "mypy-type-checker.importStrategy": "fromEnvironment", + "mypy-type-checker.reportingScope": "workspace", + "mypy-type-checker.args": [ "--config-file", "${workspaceFolder}/pyproject.toml", ], + "indentRainbow.ignoreErrorLanguages": [ + "python", + "markdown" + ], + "cSpell.ignoreWords": [ + "Pushd", + "hexversion", + "igbpyutils", + "openhook", + "unitless" + ], +} +/* vim: set filetype=javascript ts=4 sw=4 expandtab : */ diff --git a/CHANGELOG.md b/CHANGELOG.md new file mode 100644 index 0000000..4bcb543 --- /dev/null +++ b/CHANGELOG.md @@ -0,0 +1,7 @@ +Changelog for PyTOA5 +==================== + +0.9.0 - *not yet released* +-------------------------- + +- Initial release diff --git a/LICENSE.txt b/LICENSE.txt new file mode 100644 index 0000000..0a04128 --- /dev/null +++ b/LICENSE.txt @@ -0,0 +1,165 @@ + GNU LESSER GENERAL PUBLIC LICENSE + Version 3, 29 June 2007 + + Copyright (C) 2007 Free Software Foundation, Inc. + Everyone is permitted to copy and distribute verbatim copies + of this license document, but changing it is not allowed. + + + This version of the GNU Lesser General Public License incorporates +the terms and conditions of version 3 of the GNU General Public +License, supplemented by the additional permissions listed below. + + 0. Additional Definitions. + + As used herein, "this License" refers to version 3 of the GNU Lesser +General Public License, and the "GNU GPL" refers to version 3 of the GNU +General Public License. + + "The Library" refers to a covered work governed by this License, +other than an Application or a Combined Work as defined below. + + An "Application" is any work that makes use of an interface provided +by the Library, but which is not otherwise based on the Library. +Defining a subclass of a class defined by the Library is deemed a mode +of using an interface provided by the Library. + + A "Combined Work" is a work produced by combining or linking an +Application with the Library. The particular version of the Library +with which the Combined Work was made is also called the "Linked +Version". + + The "Minimal Corresponding Source" for a Combined Work means the +Corresponding Source for the Combined Work, excluding any source code +for portions of the Combined Work that, considered in isolation, are +based on the Application, and not on the Linked Version. + + The "Corresponding Application Code" for a Combined Work means the +object code and/or source code for the Application, including any data +and utility programs needed for reproducing the Combined Work from the +Application, but excluding the System Libraries of the Combined Work. + + 1. Exception to Section 3 of the GNU GPL. + + You may convey a covered work under sections 3 and 4 of this License +without being bound by section 3 of the GNU GPL. + + 2. Conveying Modified Versions. + + If you modify a copy of the Library, and, in your modifications, a +facility refers to a function or data to be supplied by an Application +that uses the facility (other than as an argument passed when the +facility is invoked), then you may convey a copy of the modified +version: + + a) under this License, provided that you make a good faith effort to + ensure that, in the event an Application does not supply the + function or data, the facility still operates, and performs + whatever part of its purpose remains meaningful, or + + b) under the GNU GPL, with none of the additional permissions of + this License applicable to that copy. + + 3. Object Code Incorporating Material from Library Header Files. + + The object code form of an Application may incorporate material from +a header file that is part of the Library. You may convey such object +code under terms of your choice, provided that, if the incorporated +material is not limited to numerical parameters, data structure +layouts and accessors, or small macros, inline functions and templates +(ten or fewer lines in length), you do both of the following: + + a) Give prominent notice with each copy of the object code that the + Library is used in it and that the Library and its use are + covered by this License. + + b) Accompany the object code with a copy of the GNU GPL and this license + document. + + 4. Combined Works. + + You may convey a Combined Work under terms of your choice that, +taken together, effectively do not restrict modification of the +portions of the Library contained in the Combined Work and reverse +engineering for debugging such modifications, if you also do each of +the following: + + a) Give prominent notice with each copy of the Combined Work that + the Library is used in it and that the Library and its use are + covered by this License. + + b) Accompany the Combined Work with a copy of the GNU GPL and this license + document. + + c) For a Combined Work that displays copyright notices during + execution, include the copyright notice for the Library among + these notices, as well as a reference directing the user to the + copies of the GNU GPL and this license document. + + d) Do one of the following: + + 0) Convey the Minimal Corresponding Source under the terms of this + License, and the Corresponding Application Code in a form + suitable for, and under terms that permit, the user to + recombine or relink the Application with a modified version of + the Linked Version to produce a modified Combined Work, in the + manner specified by section 6 of the GNU GPL for conveying + Corresponding Source. + + 1) Use a suitable shared library mechanism for linking with the + Library. A suitable mechanism is one that (a) uses at run time + a copy of the Library already present on the user's computer + system, and (b) will operate properly with a modified version + of the Library that is interface-compatible with the Linked + Version. + + e) Provide Installation Information, but only if you would otherwise + be required to provide such information under section 6 of the + GNU GPL, and only to the extent that such information is + necessary to install and execute a modified version of the + Combined Work produced by recombining or relinking the + Application with a modified version of the Linked Version. (If + you use option 4d0, the Installation Information must accompany + the Minimal Corresponding Source and Corresponding Application + Code. If you use option 4d1, you must provide the Installation + Information in the manner specified by section 6 of the GNU GPL + for conveying Corresponding Source.) + + 5. Combined Libraries. + + You may place library facilities that are a work based on the +Library side by side in a single library together with other library +facilities that are not Applications and are not covered by this +License, and convey such a combined library under terms of your +choice, if you do both of the following: + + a) Accompany the combined library with a copy of the same work based + on the Library, uncombined with any other library facilities, + conveyed under the terms of this License. + + b) Give prominent notice with the combined library that part of it + is a work based on the Library, and explaining where to find the + accompanying uncombined form of the same work. + + 6. Revised Versions of the GNU Lesser General Public License. + + The Free Software Foundation may publish revised and/or new versions +of the GNU Lesser General Public License from time to time. Such new +versions will be similar in spirit to the present version, but may +differ in detail to address new problems or concerns. + + Each version is given a distinguishing version number. If the +Library as you received it specifies that a certain numbered version +of the GNU Lesser General Public License "or any later version" +applies to it, you have the option of following the terms and +conditions either of that published version or of any later version +published by the Free Software Foundation. If the Library as you +received it does not specify a version number of the GNU Lesser +General Public License, you may choose any version of the GNU Lesser +General Public License ever published by the Free Software Foundation. + + If the Library as you received it specifies that a proxy can decide +whether future versions of the GNU Lesser General Public License shall +apply, that proxy's public statement of acceptance of any version is +permanent authorization for you to choose that version for the +Library. diff --git a/Makefile b/Makefile new file mode 100644 index 0000000..fd07394 --- /dev/null +++ b/Makefile @@ -0,0 +1,108 @@ +## To get help on this makefile, run `make help`. +# https://www.gnu.org/software/make/manual/make.html + +# Adapt these variables for this project: +py_code_locs = toa5 tests +# Hint: $(filter-out whatever,$(py_code_locs)) +# Remember to keep in sync with GitHub Actions workflows: +requirement_txts = requirements.txt dev/requirements.txt +perm_checks = ./* .gitignore .vscode .github + +# The user can change the following on the command line: +PYTHON3BIN = python + +.PHONY: help tasklist installdeps test build-check +.PHONY: smoke-checks nix-checks shellcheck ver-checks other-checks coverage unittest +test: smoke-checks nix-checks shellcheck ver-checks other-checks coverage ## Run all tests +# Reminder: If the `test` target changes, make the appropriate changes to .github/workflows/tests.yml + +SHELL = /bin/bash +.ONESHELL: # each recipe is executed as a single script + +build-check: smoke-checks + @set -euxo pipefail + [[ "$$OSTYPE" =~ linux.* ]] + $(PYTHON3BIN) -m build --sdist + dist_files=(dist/*.tar.gz) + $(PYTHON3BIN) -m twine check "$${dist_files[@]}" + if [[ $${#dist_files[@]} -ne 1 ]]; then echo "More than one dist file:" "$${dist_files[@]}"; exit 1; fi + PYTHON3BIN="$(PYTHON3BIN)" dev/isolated-dist-test.sh "$${dist_files[0]}" + echo "$${dist_files[@]}" + +tasklist: ## List open tasks. + @grep --color=auto \ + --exclude-dir=.git --exclude-dir=__pycache__ --exclude-dir=.ipynb_checkpoints --exclude-dir='.venv*' \ + --exclude-dir='.*cache' --exclude-dir=node_modules --exclude='LICENSE*' --exclude='.*.swp' \ + -Eri '\bto.?do\b' + true # ignore nonzero exit code from grep + +installdeps: ## Install project dependencies + @set -euxo pipefail + $(PYTHON3BIN) -m pip install --upgrade --upgrade-strategy=eager --no-warn-script-location pip wheel + $(PYTHON3BIN) -m pip install --upgrade --upgrade-strategy=eager --no-warn-script-location $(foreach x,$(requirement_txts),-r $(x)) + # $(PYTHON3BIN) -m pip install --editable . # for modules/packages + # other examples: git lfs install / npm ci + +smoke-checks: ## Basic smoke tests + @set -euxo pipefail + # example: [[ "$$OSTYPE" =~ linux.* ]] # this project only runs on Linux + $(PYTHON3BIN) -c 'import sys; sys.exit(0 if sys.version_info.major==3 else 1)' # make sure we're on Python 3 + +nix-checks: ## Checks that depend on a *NIX OS/FS + @set -euo pipefail + unreliable_perms="yes" + if [ "$$OSTYPE" == "msys" ]; then # e.g. Git bash on Windows + echo "- Assuming unreliable permission bits because Windows" + set -x + else + fstype="$$( findmnt --all --first --noheadings --list --output FSTYPE --notruncate --target . )" + if [[ "$$fstype" =~ ^(vfat|vboxsf|9p)$$ ]]; then + echo "- Assuming unreliable permission bits because fstype=$$fstype" + set -x + else # we can probably depend on permission bits being correct + unreliable_perms="" + set -x + $(PYTHON3BIN) -m simple_perms -r $(perm_checks) # if this errors, run `simple-perms -m ...` for auto fix + test -z "$$( find . \( -type d -name '.venv*' -prune \) -o \( -iname '*.sh' ! -executable -print \) )" + fi + fi + $(PYTHON3BIN) -m igbpyutils.dev.script_vs_lib $${unreliable_perms:+"--exec-git"} --notice $(py_code_locs) + # exclusions to the above can be done via: + # find $(py_code_locs) -path '*/exclude/me.py' -o -type f -iname '*.py' -exec py-check-script-vs-lib --notice '{}' + + +shellcheck: ## Run shellcheck + @set -euxo pipefail + # https://www.gnu.org/software/findutils/manual/html_mono/find.html + find . \( -type d \( -name '.venv*' -o -name '.devpod-internal' \) -prune \) -o \( -iname '*.sh' -exec shellcheck '{}' + \) + +ver-checks: ## Checks that depend on the Python version + @set -euxo pipefail + # https://microsoft.github.io/pyright/#/command-line + npx pyright --project pyproject.toml --pythonpath "$$( $(PYTHON3BIN) -c 'import sys; print(sys.executable)' )" $(py_code_locs) + $(PYTHON3BIN) -m mypy --config-file pyproject.toml $(py_code_locs) + $(PYTHON3BIN) -m flake8 --toml-config=pyproject.toml $(py_code_locs) + $(PYTHON3BIN) -m pylint --rcfile=pyproject.toml --recursive=y $(py_code_locs) + +other-checks: ## Checks not depending on the Python version + @set -euxo pipefail + # note the following is on one line b/c GitHub macOS Action Runners are running bash 3.2 and the multiline version didn't work there... + for REQ in $(requirement_txts); do $(PYTHON3BIN) -m pur --skip-gt --dry-run-changed --nonzero-exit-code -r "$$REQ"; done + +unittest: ## Run unit tests + @PYTHONDEVMODE=1 PYTHONWARNINGS=error PYTHONWARNDEFAULTENCODING=1 $(PYTHON3BIN) -m unittest -v + +coverage: ## Run unit tests with coverage + @set -euxo pipefail + # Note: Don't add command-line arguments here, put them in the rcfile + # We also don't use --fail_under=100 because then the report won't be written. + PYTHONDEVMODE=1 PYTHONWARNINGS=error PYTHONWARNDEFAULTENCODING=1 $(PYTHON3BIN) -m coverage run --rcfile=pyproject.toml + $(PYTHON3BIN) -m coverage report --rcfile=pyproject.toml + # $(PYTHON3BIN) -m coverage html --rcfile=pyproject.toml + $(PYTHON3BIN) -m coverage xml --rcfile=pyproject.toml + $(PYTHON3BIN) -m coverage json --rcfile=pyproject.toml -o- \ + | perl -wM5.014 -MJSON::PP=decode_json -MTerm::ANSIColor=colored -0777 -ne \ + '$$_=decode_json($$_)->{totals}{percent_covered};print"=> ",colored([$$_==100?"green":"red"],"$$_% Coverage")," <=\n";exit($$_==100?0:1)' + +# https://stackoverflow.com/q/8889035 +help: ## Show this help + @sed -ne 's/^\([^[:space:]]*\):.*##/\1:\t/p' $(MAKEFILE_LIST) | column -t -s $$'\t' diff --git a/README.md b/README.md new file mode 100644 index 0000000..94798f0 --- /dev/null +++ b/README.md @@ -0,0 +1,22 @@ + +TODO: Document + +Author, Copyright, and License +------------------------------ + +Copyright (c) 2023-2024 Hauke Dämpfling (haukex@zero-g.net) +at the Leibniz Institute of Freshwater Ecology and Inland Fisheries (IGB), +Berlin, Germany, + +This library is free software: you can redistribute it and/or modify it under +the terms of the GNU Lesser General Public License as published by the Free +Software Foundation, either version 3 of the License, or (at your option) any +later version. + +This library is distributed in the hope that it will be useful, but WITHOUT +ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more +details. + +You should have received a copy of the GNU Lesser General Public License +along with this program. If not, see diff --git a/dev/isolated-dist-test.sh b/dev/isolated-dist-test.sh new file mode 100644 index 0000000..a83f044 --- /dev/null +++ b/dev/isolated-dist-test.sh @@ -0,0 +1,27 @@ +#!/bin/bash +set -euxo pipefail + +##### Test distribution in an isolated environment +# This test takes a built .tar.gz distribution (must be passed as first argument) +# and runs the test suite on it in an isolated venv. +### + +python3bin="${PYTHON3BIN:-python}" + +usage() { echo "Usage: $0 DIST_FILE" 1>&2; exit 1; } +[[ $# -eq 1 ]] || usage +dist_file="$(realpath "$1")" +test -f "$dist_file" || usage + +cd -- "$( dirname -- "${BASH_SOURCE[0]}" )"/.. + +temp_dir="$( mktemp --directory )" +trap 'set +e; popd; rm -rf "$temp_dir"' EXIT + +rsync -a tests "$temp_dir" --exclude=__pycache__ + +pushd "$temp_dir" +$python3bin -m venv .venv +.venv/bin/python -m pip -q install --upgrade pip +.venv/bin/python -m pip install "$dist_file" +.venv/bin/python -Im unittest -v diff --git a/dev/local-actions.sh b/dev/local-actions.sh new file mode 100644 index 0000000..d780b3d --- /dev/null +++ b/dev/local-actions.sh @@ -0,0 +1,55 @@ +#!/bin/bash +set -euo pipefail +cd -- "$( dirname -- "${BASH_SOURCE[0]}" )"/.. + +##### Run tests locally +# The intention of this script is to allow the user to run the same actions as are run by the +# GitHub Actions in the local environment, i.e. across multiple Python versions. +# +# WARNING: This script requires that you've set up `python3.X` aliases to the various Python versions! +# See also: https://github.com/haukex/toolshed/blob/main/notes/Python.md +### +# Reminder: Keep these checks in sync with `.github/workflows/tests.yml`. + +usage() { echo "Usage: $0 VENV_PATH" 1>&2; exit 1; } +[[ $# -eq 1 ]] || usage +venv_path="$1" +test -d "$venv_path" || usage + +activate_venv () { # argument: python version (X.Y) + echo "+ . $venv_path/.venv$1/{Scripts,bin}/activate" + # Remember venv may only set up the `python` alias, not necessarily `python3` + if [ -e "$venv_path/.venv$1/Scripts" ]; then + # shellcheck source=/dev/null + . "$venv_path/.venv$1/Scripts/activate" + else + # shellcheck source=/dev/null + . "$venv_path/.venv$1/bin/activate" + fi + # Double-check: + python_version="$( python -c 'import sys; print(".".join(map(str,sys.version_info[:2])))' )" + if [[ "$python_version" == "$1" ]]; then + echo "# Python $python_version at $( python -c 'import sys; print(sys.executable)' )" + else + echo "ERROR: Expected python $1, got $python_version" + exit 1 + fi +} + +# Reminder: Keep version list in sync with `.github/workflows/tests.yml`. +for py_ver in 3.9 3.10 3.11 3.12 3.13; do + echo -e "\e[1;33m====================================================> Python $py_ver <====================================================\e[0m" + + if [ -e "$venv_path/.venv$py_ver" ]; then + activate_venv $py_ver + else + python$py_ver -m venv "$venv_path/.venv$py_ver" + activate_venv $py_ver + make installdeps + fi + + make test + + echo -e "\e[1;32m*** Done with Python $py_ver\e[0m" +done +echo -e "\n=====> \e[1;32mALL GOOD\e[0m <=====" diff --git a/dev/requirements.txt b/dev/requirements.txt new file mode 100644 index 0000000..4bec55b --- /dev/null +++ b/dev/requirements.txt @@ -0,0 +1,14 @@ +# requirements *for development*! +igbpyutils == 0.8.0 +simple-perms == 1.0.0 ; sys_platform != 'win32' +coverage[toml] == 7.6.3 +coverage-simple-excludes == 0.9.1 +pylint == 3.3.1 +mypy == 1.12.0 +flake8 == 7.1.1 +Flake8-pyproject == 1.2.3 +pur == 7.3.2 +build == 1.2.2.post1 ; sys_platform == 'linux' +twine == 5.1.1 ; sys_platform == 'linux' +# for this project: +pandas == 2.2.3 diff --git a/pyproject.toml b/pyproject.toml new file mode 100644 index 0000000..68b8b6c --- /dev/null +++ b/pyproject.toml @@ -0,0 +1,307 @@ +[build-system] +requires = ["setuptools>=61.0"] +build-backend = "setuptools.build_meta" + +[project] +name = "PyTOA5" +description = "Utilities for TOA5 Files" +version = "0.9.0" +authors = [ { name="Hauke D", email="haukex@zero-g.net" } ] +readme = "README.md" +requires-python = ">=3.9" +dynamic = ["dependencies"] +# https://pypi.org/classifiers/ +classifiers = [ + "Programming Language :: Python :: 3", + "Programming Language :: Python :: 3.9", + "Programming Language :: Python :: 3.10", + "Programming Language :: Python :: 3.11", + "Programming Language :: Python :: 3.12", + "Programming Language :: Python :: 3.13", + "License :: OSI Approved :: GNU Lesser General Public License v3 or later (LGPLv3+)", + "Development Status :: 5 - Production/Stable", + "Operating System :: POSIX", + "Operating System :: Unix", + "Operating System :: Microsoft :: Windows", + "Operating System :: MacOS :: MacOS X", + "Topic :: File Formats", + "Topic :: Scientific/Engineering", +] + +[tool.setuptools] +packages = ["toa5", "toa5.to_csv"] + +[tool.setuptools.dynamic] +dependencies = {file = ["requirements.txt"]} + +[project.scripts] +toa5-to-csv = "toa5.to_csv:main" + +[project.urls] +"Repository" = "https://github.com/haukex/pytoa5" +"Bug Tracker" = "https://github.com/haukex/pytoa5/issues" +"Changelog" = "https://github.com/haukex/pytoa5/blob/main/CHANGELOG.md" + +# ##### ##### ##### ##### ##### Hauke's Python Type Checker / Linter Settings ##### ##### ##### ##### ##### + +# Individual lines can be ignored with: +# pylance: ``# pyright: ignore [settingName]`` +# pylint: ``# pylint: disable=setting-name`` +# flake8: ``# noqa: CODE`` +# mypy: ``# type: ignore[setting-name]`` +# coverage: ``# pragma: no cover``, ``# pragma: no branch``, and https://pypi.org/project/coverage-simple-excludes/ + +# ##### ##### ##### ##### ##### https://github.com/microsoft/pyright/blob/main/docs/configuration.md ##### ##### ##### ##### ##### + +[tool.pyright] +typeCheckingMode = "strict" +reportMissingTypeStubs = "none" +reportUnknownMemberType = "none" +reportUnknownArgumentType = "none" +reportUnknownVariableType = "none" +reportUnknownParameterType = "none" +reportUnknownLambdaType = "none" +reportMissingParameterType = "none" +reportMissingTypeArgument = "none" +reportGeneralTypeIssues = "none" # too bad there isn't more fine-grained control than this +reportUnnecessaryTypeIgnoreComment = "none" # this flags too many comments; we use mypy's `warn_unused_ignores` instead +reportShadowedImports = "warning" +reportImportCycles = "error" +reportPropertyTypeMismatch = "warning" + +# ##### ##### ##### ##### ##### https://coverage.readthedocs.io/en/latest/config.html ##### ##### ##### ##### ##### + +[tool.coverage.run] +plugins = ["coverage_simple_excludes"] +command_line = "-m unittest -v" +branch = true +omit = [ + "**/.venv*/**", +# "somefile*", + ] + +[tool.coverage.report] +show_missing = true +skip_covered = false +# don't do the following; it'll prevent the report from being generated (see Makefile) +#fail_under = 100 + +[tool.coverage.xml] +output = "coverage.xml" + +# ##### ##### ##### ##### ##### https://flake8.pycqa.org/en/stable/user/configuration.html ##### ##### ##### ##### ##### + +[tool.flake8] +extend-ignore = [ + # NOTE the following is commented out because otherwise `coverage`'s reports aren't always correct. + #"E701", # Multiple statements on one line (colon) + "E262", # Inline comment should start with '# ' + "E265", # Block comment should start with '# ' + "E201", # Whitespace after '(' + "E202", # Whitespace before ')' + "E203", # Whitespace before ':' + "E231", # Missing whitespace after ',', ';', or ':' + "E227", # Missing whitespace around bitwise or shift operator + "E251", # Unexpected spaces around keyword / parameter equals + "E252", # Missing whitespace around parameter equals + "E221", # Multiple spaces before operator + "E225", # Missing whitespace around operator + "E128", # Continuation line under-indented for visual indent + "E301", # Expected 1 blank line, found 0 + "E302", # Expected 2 blank lines, found 0 + "E305", # Expected 2 blank lines after end of function or class + "E306", # Expected 1 blank line before a nested definition + ] +# NOTE: Keep in sync with pylint settings. +max-line-length = 150 + +# ##### ##### ##### ##### ##### https://mypy.readthedocs.io/en/stable/config_file.html ##### ##### ##### ##### ##### + +[tool.mypy] +enable_error_code = [ "ignore-without-code" ] +ignore_missing_imports = true +check_untyped_defs = true +#disallow_incomplete_defs = true # This would require a lot of `-> None` and such +warn_redundant_casts = true +warn_unused_ignores = true +warn_return_any = true +warn_unreachable = true +# in regards to the following see also https://github.com/python/mypy/issues/8046 +local_partial_types = true +implicit_reexport = false +strict_equality = true +warn_unused_configs = true + +# ##### ##### ##### ##### ##### https://pylint.readthedocs.io/en/latest/user_guide/configuration/all-options.html ##### ##### ##### ##### +# pylint --rcfile=pyproject.toml --generate-toml-config + +[tool.pylint.main] + +# Clear in-memory caches upon conclusion of linting. Useful if running pylint in +# a server-like mode. +clear-cache-post-run = true + +# Return non-zero exit code if any of these messages/categories are detected, +# even if score is above --fail-under value. Syntax same as enable. Messages +# specified are enabled, while categories only check already-enabled messages. +fail-on = "I" # informational, wouldn't normally fail + +# Specify a score threshold under which the program will exit with error. +fail-under = 10.0 + +# List of module names for which member attributes should not be checked (useful +# for modules/projects where namespaces are manipulated during runtime and thus +# existing member attributes cannot be deduced by static analysis). It supports +# qualified module names, as well as Unix pattern matching. +ignored-modules = ["ldap"] + +# Use multiple processes to speed up Pylint. Specifying 0 will auto-detect the +# number of processors available to use, and will cap the count on Windows to +# avoid hangs. +jobs = 0 + +# When enabled, pylint would attempt to guess common misconfiguration and emit +# user-friendly hints instead of false-positive error messages. +suggestion-mode = true + +[tool.pylint.basic] + +# Naming style matching correct class attribute names. +class-attribute-naming-style = "snake_case" + +# Good variable names which should always be accepted, separated by a comma. +good-names = ["i", "ex", "fh", "_"] + +# Include a hint for the correct naming format with invalid-name. +include-naming-hint = true + +# Naming style matching correct inline iteration names. +inlinevar-naming-style = "snake_case" + +[tool.pylint.design] + +# Maximum number of arguments for function / method. +max-args = 20 +# too-many-positional-arguments default is 5 +max-positional-arguments = 4 + +# Maximum number of attributes for a class (see R0902). +max-attributes = 10 + +# Maximum number of boolean expressions in an if statement (see R0916). +max-bool-expr = 5 + +# Maximum number of branch for function / method body. +max-branches = 30 + +# Maximum number of locals for function / method body. +max-locals = 15 + +# Maximum number of parents for a class (see R0901). +max-parents = 7 + +# Maximum number of public methods for a class (see R0904). +max-public-methods = 20 + +# Maximum number of return / yield for function / method body. +max-returns = 6 + +# Maximum number of statements in function / method body. +max-statements = 70 + +[tool.pylint.exceptions] + +# Exceptions that will emit a warning when caught. +overgeneral-exceptions = ["builtins.BaseException"] + +[tool.pylint.format] + +# Expected format of line ending, e.g. empty (any line ending), LF or CRLF. +expected-line-ending-format = "LF" + +# Regexp for a line that is allowed to be longer than the limit. +ignore-long-lines = "^\\s*(# )??$" + +# Maximum number of characters on a single line. +# NOTE: Keep in sync with flake8 settings. +max-line-length = 150 + +# Maximum number of lines in a module. +max-module-lines = 500 + +# Allow the body of a class to be on the same line as the declaration if body +# contains single statement. +single-line-class-stmt = true + +# Allow the body of an if to be on the same line as the test if there is no else. +# NOTE the following is now `false` because otherwise `coverage`'s reports aren't always correct. +single-line-if-stmt = false + +[tool.pylint.imports] + +# Allow explicit reexports by alias from a package __init__. +allow-reexport-from-package = true + +# Allow wildcard imports from modules that define __all__. +allow-wildcard-with-all = true + +[tool.pylint."messages control"] + +# Disable the message, report, category or checker with the given id(s). You can +# either give multiple identifiers separated by comma (,) or put this option +# multiple times (only on the command line, not in the configuration file where +# it should appear only once). You can also use "--disable=all" to disable +# everything first and then re-enable specific checks. For example, if you want +# to run only the similarities checker, you can use "--disable=all +# --enable=similarities". If you want to run only the classes checker, but have +# no Warning level messages displayed, use "--disable=all --enable=classes +# --disable=W". +disable = [ + "locally-disabled", + "file-ignored", + "suppressed-message", + "missing-module-docstring", + "missing-function-docstring", + "missing-class-docstring", + "too-few-public-methods", + # NOTE the following is commented out because otherwise `coverage`'s reports aren't always correct. + #"multiple-statements", + "fixme"] + +# Enable the message, report, category or checker with the given id(s). You can +# either give multiple identifier separated by comma (,) or put this option +# multiple time (only on the command line, not in the configuration file where it +# should appear only once). See also the "--disable" option for examples. +enable = [ + "raw-checker-failed", + "bad-inline-option", + "useless-suppression", + "deprecated-pragma", + "use-symbolic-message-instead", + "use-implicit-booleaness-not-comparison-to-string", + "use-implicit-booleaness-not-comparison-to-zero", + ] + +[tool.pylint.refactoring] + +# Maximum number of nested blocks for function / method body +max-nested-blocks = 5 + +# Complete name of functions that never returns. When checking for inconsistent- +# return-statements if a never returning function is called then it will be +# considered as an explicit return statement and no message will be printed. +never-returning-functions = ["sys.exit", "argparse.parse_error", "argparse.exit"] + +[tool.pylint.similarities] + +# Minimum lines number of a similarity. +min-similarity-lines = 4 + +[tool.pylint.variables] + +# A regular expression matching the name of dummy variables (i.e. expected to not +# be used). +dummy-variables-rgx = "_+$|(_[a-zA-Z0-9_]*[a-zA-Z0-9]+?$)" + +# Argument names that match this expression will be ignored. +ignored-argument-names = "_.*" diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..ce7458d --- /dev/null +++ b/requirements.txt @@ -0,0 +1 @@ +igbpyutils >= 0.8.0 diff --git a/tests/__init__.py b/tests/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/tests/doctest_wd/Example.dat b/tests/doctest_wd/Example.dat new file mode 100644 index 0000000..71ba615 --- /dev/null +++ b/tests/doctest_wd/Example.dat @@ -0,0 +1,6 @@ +"TOA5","TestLogger","CR1000X","12342","CR1000X.Std.03.02","CPU:TestLogger.CR1X","2438","Example" +"TIMESTAMP","RECORD","BattV_Min" +"TS","RN","Volts" +"","","Min" +"2021-06-19 00:00:00",0,12.99 +"2021-06-20 00:00:00",1,12.96 \ No newline at end of file diff --git a/tests/test_toa5.py b/tests/test_toa5.py new file mode 100644 index 0000000..c4f7545 --- /dev/null +++ b/tests/test_toa5.py @@ -0,0 +1,197 @@ +"""Tests for :mod:`toa5` and :mod:`toa5.to_csv`. + +Author, Copyright, and License +------------------------------ + +Copyright (c) 2023-2024 Hauke Dämpfling (haukex@zero-g.net) +at the Leibniz Institute of Freshwater Ecology and Inland Fisheries (IGB), +Berlin, Germany, https://www.igb-berlin.de/ + +This library is free software: you can redistribute it and/or modify it under +the terms of the GNU Lesser General Public License as published by the Free +Software Foundation, either version 3 of the License, or (at your option) any +later version. + +This library is distributed in the hope that it will be useful, but WITHOUT +ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more +details. + +You should have received a copy of the GNU Lesser General Public License +along with this program. If not, see https://www.gnu.org/licenses/ +""" +import io +import os +import sys +import csv +import doctest +import unittest +from pathlib import Path +from unittest.mock import patch +from typing import Optional, Any +from collections.abc import Callable, Sequence +from contextlib import redirect_stdout, redirect_stderr +from igbpyutils.file import Pushd, NamedTempFileDeleteLater +from pandas.testing import assert_frame_equal +import pandas +import toa5.to_csv +import toa5 + +_exp_env_daily = toa5.EnvironmentLine(station_name="TestLogger",logger_model="CR1000X",logger_serial="12342", + logger_os="CR1000X.Std.03.02",program_name="CPU:TestLogger.CR1X",program_sig="2438",table_name="Daily") +_exp_env_hourly = _exp_env_daily._replace(table_name="Hourly") + +_exp_hdr :dict[str, tuple[tuple[toa5.ColumnHeader, str, str],...]] = { + "Daily": ( + ( toa5.ColumnHeader(name="TIMESTAMP", unit="TS"), "TIMESTAMP", "timestamp" ), + ( toa5.ColumnHeader(name="RECORD", unit="RN"), "RECORD", "record" ), + ( toa5.ColumnHeader(name="BattV_Min", unit="Volts", prc="Min"), "BattV_Min[V]", "battv_min" ), + ( toa5.ColumnHeader(name="BattV_TMn", prc="TMn"), "BattV_TMn", "battv_tmn" ), + ( toa5.ColumnHeader(name="PTemp", unit="oC", prc="Smp"), "PTemp[°C]", "ptemp" ), + ( toa5.ColumnHeader(name="PTemp_C_Min", unit="Deg C", prc="Min"), "PTemp_C_Min[°C]", "ptemp_c_min" ), + ( toa5.ColumnHeader(name="PTemp_C_TMn", prc="TMn"), "PTemp_C_TMn", "ptemp_c_tmn" ), + ( toa5.ColumnHeader(name="PTemp_C_Max", unit="Deg C", prc="Max"), "PTemp_C_Max[°C]", "ptemp_c_max" ), + ( toa5.ColumnHeader(name="PTemp_C_TMx", prc="TMx"), "PTemp_C_TMx", "ptemp_c_tmx" ), + ), + "Hourly": ( + ( toa5.ColumnHeader(name="TIMESTAMP", unit="TS"), "TIMESTAMP", "timestamp" ), + ( toa5.ColumnHeader(name="RECORD", unit="RN"), "RECORD", "record" ), + ( toa5.ColumnHeader(name="BattV", unit="Volts", prc="Avg"), "BattV/Avg[V]", "battv_avg" ), + ( toa5.ColumnHeader(name="PTemp_C_Min", unit="Deg C", prc="Min"), "PTemp_C_Min[°C]", "ptemp_c_min" ), + ( toa5.ColumnHeader(name="PTemp_C_Max", unit="Deg C", prc="Max"), "PTemp_C_Max[°C]", "ptemp_c_max" ), + ( toa5.ColumnHeader(name="AirT_C(42)", unit="Deg C", prc="Smp"), "AirT_C(42)[°C]", "airt_c_42" ), + ( toa5.ColumnHeader(name="RelHumid_Avg(3)", unit="%", prc="Avg"), "RelHumid_Avg(3)[%]", "relhumid_avg_3" ), + ), +} + +_in_path = Path(__file__).parent/'toa5' + +def load_tests(_loader :unittest.TestLoader, tests :unittest.TestSuite, _ignore) -> unittest.TestSuite: + globs :dict = {} + def doctest_setup(_t :doctest.DocTest): + globs['_prev_dir'] = os.getcwd() + os.chdir( Path(__file__).parent/'doctest_wd' ) + def doctest_teardown(_t :doctest.DocTest): + os.chdir( globs['_prev_dir'] ) + del globs['_prev_dir'] + tests.addTests(doctest.DocTestSuite(toa5, setUp=doctest_setup, tearDown=doctest_teardown, globs=globs)) + return tests + +class Toa5TestCase(unittest.TestCase): + + def test_toa5_read_write_header(self): + with (_in_path/'TestLogger_Daily_1.dat').open(encoding='ASCII', newline='') as fh: + csv_rd = csv.reader(fh, strict=True) + env_line, columns = toa5.read_header(csv_rd) + self.assertEqual(env_line, _exp_env_daily) + self.assertEqual(columns, tuple( t[0] for t in _exp_hdr['Daily'] )) + + with (_in_path/'TestLogger_Hourly_1.dat').open(encoding='ASCII', newline='') as fh: + csv_rd = csv.reader(fh, strict=True) + env_line, columns = toa5.read_header(csv_rd) + self.assertEqual(env_line, _exp_env_hourly) + self.assertEqual(columns, tuple( t[0] for t in _exp_hdr['Hourly'] )) + + self.assertEqual( tuple( toa5.write_header(env_line, columns) ), ( + ("TOA5","TestLogger","CR1000X","12342","CR1000X.Std.03.02","CPU:TestLogger.CR1X","2438","Hourly"), + ("TIMESTAMP","RECORD","BattV","PTemp_C_Min","PTemp_C_Max","AirT_C(42)","RelHumid_Avg(3)"), + ("TS","RN","Volts","Deg C","Deg C","Deg C","%"), + ("","","Avg","Min","Max","Smp","Avg"), + ) ) + + def test_bad_toa5(self): + for fi in range(1, 13): + with (_in_path/f'TestLogger_Hourly_Bad{fi:02d}.dat').open(encoding='ASCII', newline='') as fh: + csv_rd = csv.reader(fh, strict=True) + with self.assertRaises(toa5.Toa5Error): + toa5.read_header(csv_rd) + + def test_col_trans(self): + for tp in _exp_hdr.values(): + for ch, cn, sq in tp: + self.assertEqual(toa5.default_col_hdr_transform(ch), cn) + self.assertEqual(toa5.sql_col_hdr_transform(ch), sq) + + def test_pandas(self): + fh = io.StringIO( + "TOA5,sn,lm,ls,os,pn,ps,tn\n" + "RECORD,BattV_Min\n" + "RN,Volts\n" + ",Min\n" + "1,12\n" + "2,11.9\n") + df = toa5.read_pandas(fh, low_memory=False) + assert_frame_equal(df, pandas.DataFrame( + index=pandas.Index(name='RECORD', data=[1,2]), + data={ 'BattV_Min[V]': [12,11.9] } ) ) + el = toa5.EnvironmentLine(station_name='sn', logger_model='lm', logger_serial='ls', logger_os='os', + program_name='pn', program_sig='ps', table_name='tn' ) + self.assertEqual( df.attrs['toa5_env_line'], el ) + fh = io.StringIO( + "TOA5,sn,lm,ls,os,pn,ps,tn\n" + "Blah,BattV_Min\n" + ",Volts\n" + ",Min\n" + "1,12\n" + "2,11.9") + df = toa5.read_pandas(fh, low_memory=False) + assert_frame_equal(df, pandas.DataFrame( + index=pandas.Index(data=[0,1]), + data={ 'Blah':[1,2], 'BattV_Min[V]':[12,11.9] } ) ) + self.assertEqual( df.attrs['toa5_env_line'], el ) + with self.assertRaises(KeyError): + toa5.read_pandas(fh, names=['x']) + + def test_to_csv_cli(self): + with Pushd(Path(__file__).parent/'doctest_wd'): + self.assertEqual( self._fake_cli(toa5.to_csv.main, argv=['-t','Example.dat']), [ + 'TIMESTAMP,RECORD,BattV_Min[V]', + '2021-06-19 00:00:00,0,12.99', + '2021-06-20 00:00:00,1,12.96', + ] ) + self.assertEqual( self._fake_cli(toa5.to_csv.main, argv=['-l-','-nt','Example.dat']), [ + 'TIMESTAMP,RECORD,BattV_Min', + '2021-06-19 00:00:00,0,12.99', + '2021-06-20 00:00:00,1,12.96', + '{', + ' "station_name": "TestLogger",', + ' "logger_model": "CR1000X",', + ' "logger_serial": "12342",', + ' "logger_os": "CR1000X.Std.03.02",', + ' "program_name": "CPU:TestLogger.CR1X",', + ' "program_sig": "2438",', + ' "table_name": "Example"', + '}' + ] ) + self._fake_cli(toa5.to_csv.main, argv=['-eLatin1'], stderr=None, + exit_call=(2, 'toa5.to_csv: error: Can only use --in-encoding when specifying an input file\n')) + self._fake_cli(toa5.to_csv.main, argv=['-cLatin1'], stderr=None, + exit_call=(2, 'toa5.to_csv: error: Can only use --out-encoding when specifying an output file\n')) + with NamedTempFileDeleteLater() as tf: + tf.write(b"TOA5,sn,lm,ls,os,pn,ps,tn\nRECORD,BattV_Min\nRN,Volts\n,Min\n1,12\n") + tf.close() + with self.assertRaises(ValueError): + self._fake_cli(toa5.to_csv.main, argv=['-t',tf.name]) + with NamedTempFileDeleteLater() as tf: + tf.write(b"TOA5,sn,lm,ls,os,pn,ps,tn\nTIMESTAMP,RECORD,BattV_Min\nTS,RN,Volts\n,,Min\n\"2021-06-19 00:00:00\",1") + tf.close() + with self.assertRaises(ValueError): + self._fake_cli(toa5.to_csv.main, argv=[tf.name]) + + def _fake_cli(self, target :Callable[[], None], *, argv :Sequence[str] = (), + exit_call :Sequence[Any] = (0,), stderr :Optional[str] = '' ) -> list[str]: + prev_argv = sys.argv + try: + sys.argv = [os.path.basename(target.__name__)] + list(argv) + with (redirect_stdout(io.StringIO()) as out, redirect_stderr(io.StringIO()) as err, + patch('argparse.ArgumentParser.exit', side_effect=SystemExit) as mock_exit): + try: + target() + except SystemExit: + pass + finally: + sys.argv = prev_argv + mock_exit.assert_called_once_with(*exit_call) + if stderr is not None: + self.assertEqual(err.getvalue(), stderr) + return out.getvalue().splitlines() diff --git a/tests/toa5/TestLogger_Daily_1.dat b/tests/toa5/TestLogger_Daily_1.dat new file mode 100644 index 0000000..bea4616 --- /dev/null +++ b/tests/toa5/TestLogger_Daily_1.dat @@ -0,0 +1,6 @@ +"TOA5","TestLogger","CR1000X","12342","CR1000X.Std.03.02","CPU:TestLogger.CR1X","2438","Daily" +"TIMESTAMP","RECORD","BattV_Min","BattV_TMn","PTemp","PTemp_C_Min","PTemp_C_TMn","PTemp_C_Max","PTemp_C_TMx" +"TS","RN","Volts","","oC","Deg C","","Deg C","" +"","","Min","TMn","Smp","Min","TMn","Max","TMx" +"2021-06-19 00:00:00",0,12.99,"2021-06-18 16:08:30",20.1,23.72,"2021-06-19 00:00:00",39.16,"2021-06-18 15:33:20" +"2021-06-20 00:00:00",1,12.96,"2021-06-19 13:13:05",30.5,21.54,"2021-06-19 03:15:00",40.91,"2021-06-19 14:04:15" diff --git a/tests/toa5/TestLogger_Hourly_1.dat b/tests/toa5/TestLogger_Hourly_1.dat new file mode 100644 index 0000000..fb582fa --- /dev/null +++ b/tests/toa5/TestLogger_Hourly_1.dat @@ -0,0 +1,7 @@ +"TOA5","TestLogger","CR1000X","12342","CR1000X.Std.03.02","CPU:TestLogger.CR1X","2438","Hourly" +"TIMESTAMP","RECORD","BattV","PTemp_C_Min","PTemp_C_Max","AirT_C(42)","RelHumid_Avg(3)" +"TS","RN","Volts","Deg C","Deg C","Deg C","%" +"","","Avg","Min","Max","Smp","Avg" +"2021-06-18 21:00:00",10,13.23,28.62,30.82,26.29,49.31 +"2021-06-18 22:00:00",11,13.31,27.14,28.62,25.14,54.85 +"2021-06-18 23:00:00",12,13.36,25.62,27.14,23.64,63.28 diff --git a/tests/toa5/TestLogger_Hourly_Bad01.dat b/tests/toa5/TestLogger_Hourly_Bad01.dat new file mode 100644 index 0000000..e69de29 diff --git a/tests/toa5/TestLogger_Hourly_Bad02.dat b/tests/toa5/TestLogger_Hourly_Bad02.dat new file mode 100644 index 0000000..865f66f --- /dev/null +++ b/tests/toa5/TestLogger_Hourly_Bad02.dat @@ -0,0 +1 @@ +"TOA5","TestLogger","CR1000X","12342","CR diff --git a/tests/toa5/TestLogger_Hourly_Bad03.dat b/tests/toa5/TestLogger_Hourly_Bad03.dat new file mode 100644 index 0000000..139597f --- /dev/null +++ b/tests/toa5/TestLogger_Hourly_Bad03.dat @@ -0,0 +1,2 @@ + + diff --git a/tests/toa5/TestLogger_Hourly_Bad04.dat b/tests/toa5/TestLogger_Hourly_Bad04.dat new file mode 100644 index 0000000..d5fa708 --- /dev/null +++ b/tests/toa5/TestLogger_Hourly_Bad04.dat @@ -0,0 +1 @@ +"TOA6","TestLogger","CR1000X","12342","CR1000X.Std.03.02","CPU:TestLogger.CR1X","2438","Hourly" diff --git a/tests/toa5/TestLogger_Hourly_Bad05.dat b/tests/toa5/TestLogger_Hourly_Bad05.dat new file mode 100644 index 0000000..e68654a --- /dev/null +++ b/tests/toa5/TestLogger_Hourly_Bad05.dat @@ -0,0 +1 @@ +"TOA5","TestLogger","CR1000X","12342","CR1000X.Std.03.02","CPU:TestLogger.CR1X","2438","Hourly","foo" diff --git a/tests/toa5/TestLogger_Hourly_Bad06.dat b/tests/toa5/TestLogger_Hourly_Bad06.dat new file mode 100644 index 0000000..0eb0cc6 --- /dev/null +++ b/tests/toa5/TestLogger_Hourly_Bad06.dat @@ -0,0 +1,2 @@ +"TOA5","TestLogger","CR1000X","12342","CR1000X.Std.03.02","CPU:TestLogger.CR1X","2438","Hourly" +"TIMESTAMP","RECORD","BattV_Min","PTemp_C_Min","PTemp_C_Max","AirT_C(42)","RelHumid" diff --git a/tests/toa5/TestLogger_Hourly_Bad07.dat b/tests/toa5/TestLogger_Hourly_Bad07.dat new file mode 100644 index 0000000..b1521e1 --- /dev/null +++ b/tests/toa5/TestLogger_Hourly_Bad07.dat @@ -0,0 +1 @@ +"TOA5","TestLogger","CR1000X","12342","CR1000X.Std.03.02","CPU:TestLogger.CR1X","2438","Hourly" diff --git a/tests/toa5/TestLogger_Hourly_Bad08.dat b/tests/toa5/TestLogger_Hourly_Bad08.dat new file mode 100644 index 0000000..79ba052 --- /dev/null +++ b/tests/toa5/TestLogger_Hourly_Bad08.dat @@ -0,0 +1,2 @@ +"TOA5","TestLogger","CR1000X","12342","CR1000X.Std.03.02","CPU:TestLogger.CR1X","2438","Hourly" +"TIMESTAMP","RECORD","BattV_Min","PTemp_C_Min","PTemp diff --git a/tests/toa5/TestLogger_Hourly_Bad09.dat b/tests/toa5/TestLogger_Hourly_Bad09.dat new file mode 100644 index 0000000..0f8c6f0 --- /dev/null +++ b/tests/toa5/TestLogger_Hourly_Bad09.dat @@ -0,0 +1,3 @@ +"TOA5","TestLogger","CR1000X","12342","CR1000X.Std.03.02","CPU:TestLogger.CR1X","2438","Hourly" +"TIMESTAMP","RECORD","BattV_Min","PTemp_C_Min","PTemp_C_Max","AirT_C(42)","RelHumid" +"TS","RN","Volts","Deg C","De diff --git a/tests/toa5/TestLogger_Hourly_Bad10.dat b/tests/toa5/TestLogger_Hourly_Bad10.dat new file mode 100644 index 0000000..1024a88 --- /dev/null +++ b/tests/toa5/TestLogger_Hourly_Bad10.dat @@ -0,0 +1,5 @@ +"TOA5","TestLogger","CR1000X","12342","CR1000X.Std.03.02","CPU:TestLogger.CR1X","2438","Hourly" +"TIMESTAMP","RECORD","BattV_Min","PTemp_C_Min","PTemp_C_Max","AirT_C(42)" +"TS","RN","Volts","Deg C","Deg C","Deg C","%" +"","","Min","Min","Max","Smp","Smp" +"2021-06-18 20:00:00",9,13.14,30.82,33.51,27.96,45.66 diff --git a/tests/toa5/TestLogger_Hourly_Bad11.dat b/tests/toa5/TestLogger_Hourly_Bad11.dat new file mode 100644 index 0000000..6e6150e --- /dev/null +++ b/tests/toa5/TestLogger_Hourly_Bad11.dat @@ -0,0 +1,5 @@ +"TOA5","TestLogger","CR1000X","12342","CR1000X.Std.03.02","CPU:TestLogger.CR1X","2438","Hourly" +"TIMESTAMP","RECORD","BattV_Min","PTemp_C_Min","PTemp_C_Max","AirT_C(42)","RelHumid" +"TS","RN","Volts","Deg C","Deg C","Deg C" +"","","Min","Min","Max","Smp","Smp" +"2021-06-18 20:00:00",9,13.14,30.82,33.51,27.96,45.66 diff --git a/tests/toa5/TestLogger_Hourly_Bad12.dat b/tests/toa5/TestLogger_Hourly_Bad12.dat new file mode 100644 index 0000000..c387bc8 --- /dev/null +++ b/tests/toa5/TestLogger_Hourly_Bad12.dat @@ -0,0 +1,5 @@ +"TOA5","TestLogger","CR1000X","12342","CR1000X.Std.03.02","CPU:TestLogger.CR1X","2438","Hourly" +"TIMESTAMP","RECORD","BattV_Min","PTemp_C_Min","PTemp_C_Max","AirT_C(42)","AirT_C(42)" +"TS","RN","Volts","Deg C","Deg C","Deg C","%" +"","","Min","Min","Max","Smp","Smp" +"2021-06-18 20:00:00",9,13.14,30.82,33.51,27.96,45.66 diff --git a/toa5/__init__.py b/toa5/__init__.py new file mode 100644 index 0000000..a0fc3c8 --- /dev/null +++ b/toa5/__init__.py @@ -0,0 +1,207 @@ +""" +TODO: Document + +Author, Copyright, and License +------------------------------ + +Copyright (c) 2023-2024 Hauke Dämpfling (haukex@zero-g.net) +at the Leibniz Institute of Freshwater Ecology and Inland Fisheries (IGB), +Berlin, Germany, https://www.igb-berlin.de/ + +This library is free software: you can redistribute it and/or modify it under +the terms of the GNU Lesser General Public License as published by the Free +Software Foundation, either version 3 of the License, or (at your option) any +later version. + +This library is distributed in the hope that it will be useful, but WITHOUT +ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more +details. + +You should have received a copy of the GNU Lesser General Public License +along with this program. If not, see https://www.gnu.org/licenses/ +""" +import re +import csv +import importlib +from typing import NamedTuple +from collections.abc import Iterator, Sequence, Generator, Callable +from igbpyutils.iter import no_duplicates, zip_strict + +class Toa5Error(RuntimeError): + """An error class for :func:`read_header`.""" + +class EnvironmentLine(NamedTuple): + """Represents a TOA5 "Environment Line", giving details about the data logger and its program.""" + station_name :str + logger_model :str + logger_serial :str + logger_os :str + program_name :str + program_sig :str + table_name :str + +class ColumnHeader(NamedTuple): + """Named tuple representing a column header. + + This class represents a column header as it would be read from a text file, therefore, + when fields are empty, this is represented by empty strings, not by ``None``. + """ + #: Column name + name :str + #: Scientific/engineering units (optional) + unit :str = "" + #: Data process (optional; examples: ``"Smp"``, ``"Avg"``, ``"Max"``, etc.) + prc :str = "" + +#: A type for a function that takes a :class:`ColumnHeader` and turns it into a single string. See :func:`default_col_hdr_transform`. +ColumnHeaderTransformer = Callable[[ColumnHeader], str] + +#: A table of shorter versions of common units, used in :func:`default_col_hdr_transform`. +SHORTER_UNITS = { + "meters/second": "m/s", + "Deg C": "°C", + "oC": "°C", + "Volts": "V", + "m^3/m^3": "m³/m³", + "W/m^2": "W/m²", + "Watts/meter^2": "W/m²", + "nSec": "ns", + "uSec": "μs", + "hours": "hr", + "micrometer": "μm", + "degrees": "°", + "Deg": "°", + "unitless": "" +} + +def _maybe_prc(col :ColumnHeader, sep :str) -> str: + if col.prc and col.prc.lower()!='smp' and not re.search(re.escape(col.prc)+r'(?:\(\d+\))?\Z', col.name, re.I): + return col.name + sep + col.prc + return col.name + +_sql_parens_re = re.compile(r'\((\d+)\)\Z') +def sql_col_hdr_transform(col :ColumnHeader) -> str: + """TODO: Doc""" + return _sql_parens_re.sub(r'_\1', _maybe_prc(col, '_') ).lower() + +def default_col_hdr_transform(col :ColumnHeader): + """The default function used to transform a :class:`ColumnHeader` into a single string. + + This conversion is slightly opinionated and will: + + - append :attr:`ColumnHeader.prc` with a slash (unless the name already ends with it or it is "Smp"), + - shorten some units (:data:`SHORTER_UNITS`), + - use square brackets around the units, and + - ignore the "TS" and "RN" "units" on the "TIMESTAMP" and "RECORD" columns, respectively. + """ + c = _maybe_prc(col, '/') + if col.unit and \ + not ( col.name=='TIMESTAMP' and col.unit=='TS' or col.name=='RECORD' and col.unit=='RN' ) \ + and len(SHORTER_UNITS.get(col.unit, col.unit)): + c += "[" + SHORTER_UNITS.get(col.unit, col.unit) + "]" + return c + +#: A short alias for :func:`default_col_hdr_transform`. +short_name = default_col_hdr_transform + +_env_line_keys = ('toa5',) + EnvironmentLine._fields +def read_header(csv_reader :Iterator[Sequence[str]]) -> tuple[EnvironmentLine, tuple[ColumnHeader, ...]]: + """Read the header of a TOA5 file. + + A common use case to read a TOA5 file would be the following; as you can see the main difference + between reading a regular CSV file and a TOA5 file is the additional call to this function. + + >>> import csv + >>> import toa5 + >>> with open('Example.dat', encoding='ASCII', newline='') as fh: + ... csv_rd = csv.reader(fh, strict=True) + ... env_line, columns = toa5.read_header(csv_rd) + ... print([ toa5.short_name(col) for col in columns ]) + ... for row in csv_rd: + ... print(row) + ['TIMESTAMP', 'RECORD', 'BattV_Min[V]'] + ['2021-06-19 00:00:00', '0', '12.99'] + ['2021-06-20 00:00:00', '1', '12.96'] + + :param csv_reader: TODO Doc + :return: TODO Doc + :raises Toa5Error: TODO Doc + """ + # ### Read the environment line + try: + env_line = next(csv_reader) + except StopIteration as ex: + raise Toa5Error("failed to read environment line") from ex + except csv.Error as ex: + raise Toa5Error("CSV parse error on environment line") from ex + if len(env_line)<1 or env_line[0]!='TOA5': + raise Toa5Error("not a TOA5 file?") + if len(_env_line_keys) != len(env_line): + raise Toa5Error("TOA5 environment line length mismatch") + env_line_dict = dict(zip_strict(_env_line_keys, env_line)) + del env_line_dict['toa5'] + # ### Read the header rows + try: + field_names = next(csv_reader) + units = next(csv_reader) + proc = next(csv_reader) + except StopIteration as ex: + raise Toa5Error("unexpected end of headers") from ex + except csv.Error as ex: + raise Toa5Error("CSV parse error on headers") from ex + # ### Do some checks on the header + if len(field_names) != len(units) or len(field_names) != len(proc): + raise Toa5Error("header column count mismatch") + try: + set(no_duplicates(field_names, name='column name')) + except ValueError as ex: + raise Toa5Error(*ex.args) # pylint: disable=raise-missing-from # (we're just stealing the error message) + columns = tuple( ColumnHeader(*c) for c in zip_strict(field_names, units, proc) ) + return EnvironmentLine(**env_line_dict), columns + +def write_header(env_line :EnvironmentLine, columns :Sequence[ColumnHeader]) -> Generator[Sequence[str], None, None]: + """TODO: Doc""" + yield ('TOA5',)+env_line + yield tuple( c.name for c in columns ) + yield tuple( c.unit for c in columns ) + yield tuple( c.prc for c in columns ) + +def read_pandas(fh, *, col_trans :ColumnHeaderTransformer = default_col_hdr_transform, **kwargs): + """A helper function to read TOA5 files into a Pandas DataFrame with ``pandas.read_csv``. + + >>> import toa5 + >>> with open('Example.dat', encoding='ASCII', newline='') as fh: + ... df = toa5.read_pandas(fh, low_memory=False) + >>> print(df) # doctest: +NORMALIZE_WHITESPACE + RECORD BattV_Min[V] + TIMESTAMP \n\ + 2021-06-19 0 12.99 + 2021-06-20 1 12.96 + >>> print(df.attrs['toa5_env_line']) # doctest: +NORMALIZE_WHITESPACE + EnvironmentLine(station_name='TestLogger', logger_model='CR1000X', logger_serial='12342', + logger_os='CR1000X.Std.03.02', program_name='CPU:TestLogger.CR1X', program_sig='2438', table_name='Example') + + :param fh: TODO Doc + :param col_trans: TODO Doc + :param kwargs: Additional keyword arguments are passed through to ``pandas.read_csv``. + Not allowed are ``filepath_or_buffer``, ``header``, and ``names``. + Other options that this function provides by default, such as ``na_values`` or ``index_col``, may be overridden. + :return: A Pandas DataFrame. + The :class:`EnvironmentLine` is stored in the DataFrame's ``attrs`` under the key ``toa5_env_line``. + Note that, at the time of writing, Pandas documents ``attrs`` as being experimental. + """ + if any( k in kwargs for k in ('filepath_or_buffer','header','names') ): + raise KeyError("Arguments 'filepath_or_buffer', 'header', and 'names' may not be used") + pd = importlib.import_module('pandas') + env_line, columns = read_header( csv.reader(fh, strict=True) ) + cols = [ col_trans(c) for c in columns ] + xa = {} + if columns[0] == ColumnHeader(name='TIMESTAMP', unit='TS'): + xa['parse_dates'] = [0] + xa['index_col'] = [0] + elif columns[0] == ColumnHeader(name='RECORD', unit='RN'): + xa['index_col'] = [0] + df = pd.read_csv(fh, header=None, names=cols, na_values=['NAN'], **xa, **kwargs) + df.attrs['toa5_env_line'] = env_line + return df diff --git a/toa5/to_csv/__init__.py b/toa5/to_csv/__init__.py new file mode 100644 index 0000000..ea9bb63 --- /dev/null +++ b/toa5/to_csv/__init__.py @@ -0,0 +1,79 @@ +"""See :func:`main`. + +Author, Copyright, and License +------------------------------ + +Copyright (c) 2023-2024 Hauke Dämpfling (haukex@zero-g.net) +at the Leibniz Institute of Freshwater Ecology and Inland Fisheries (IGB), +Berlin, Germany, https://www.igb-berlin.de/ + +This library is free software: you can redistribute it and/or modify it under +the terms of the GNU Lesser General Public License as published by the Free +Software Foundation, either version 3 of the License, or (at your option) any +later version. + +This library is distributed in the hope that it will be useful, but WITHOUT +ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more +details. + +You should have received a copy of the GNU Lesser General Public License +along with this program. If not, see https://www.gnu.org/licenses/ +""" +import csv +import sys +import json +import argparse +import fileinput +from igbpyutils.file import open_out +from igbpyutils.error import init_handlers +from .. import read_header, ColumnHeader, ColumnHeaderTransformer, default_col_hdr_transform + +def main(): + """Command-line interface to convert a TOA5 file's headers to a single row, suitable for use in CSV. + + If this module and its scripts have been installed correctly, you should be able to run ``toa5-to-csv --help`` + or ``python -m toa5.to_csv --help`` for details. + """ + init_handlers() + parser = argparse.ArgumentParser('toa5.to_csv', description='TOA5 to CSV Converter') + parser.add_argument('-o', '--out-file', help="Output filename") + parser.add_argument('-l', '--env-line', metavar='ENV_LINE_FILE', help="JSON file to write environment line to") + parser.add_argument('-d', '--out-dialect', help="Output CSV dialect (see Python `csv` documentation)", + choices=csv.list_dialects(), default='excel') + parser.add_argument('-n', '--simple-names', help="Don't add units etc. to column names", action="store_true") + parser.add_argument('-e', '--in-encoding', help="Input file encoding (default UTF-8)", default="UTF-8") + parser.add_argument('-c', '--out-encoding', help="Output encoding (default UTF-8)", default="UTF-8") + parser.add_argument('-t', '--require-timestamp', help="Require first column to be TIMESTAMP", action="store_true") + parser.add_argument('-j', '--allow-jagged', help="Allow rows to have differing column counts", action="store_true") + parser.add_argument('toa5file', metavar='TOA5FILE', help="The TOA5 file to process", nargs='?') + args = parser.parse_args() + + if args.in_encoding!='UTF-8' and (not args.toa5file or args.toa5file=='-'): + parser.error('Can only use --in-encoding when specifying an input file') + if args.out_encoding!='UTF-8' and (not args.out_file or args.out_file=='-'): + parser.error('Can only use --out-encoding when specifying an output file') + col_trans :ColumnHeaderTransformer = (lambda col: col.name) if args.simple_names else default_col_hdr_transform + + if sys.hexversion >= 0x03_0A_00_00: # cover-req-ge3.10 + enc = { "encoding": args.in_encoding } + else: # cover-req-lt3.10 + enc = { "openhook": fileinput.hook_encoded(args.in_encoding) } + with (fileinput.input((args.toa5file,) if args.toa5file else (), **enc) as ifh, # pyright: ignore [reportCallIssue, reportArgumentType] + open_out(args.out_file, encoding=args.out_encoding, newline='') as ofh): + csv_rd = csv.reader(ifh, strict=True) + csv_wr = csv.writer(ofh, dialect=args.out_dialect) + env_line, columns = read_header(csv_rd) + if args.require_timestamp and columns[0] != ColumnHeader(name='TIMESTAMP', unit='TS'): + raise ValueError("First column was not a timestamp (if that's not an error, turn off `require_timestamp0`)") + csv_wr.writerow( col_trans(c) for c in columns ) + for ri, row in enumerate(csv_rd, start=5): + if not args.allow_jagged and len(row)!=len(columns): + raise ValueError(f"Row {ri}: expected {len(columns)} columns but got {len(row)}") + csv_wr.writerow(row) + + if args.env_line: + with open_out(args.env_line, encoding=args.out_encoding) as fh: + json.dump(env_line._asdict(), fp=fh, indent=2) + + parser.exit(0) diff --git a/toa5/to_csv/__main__.py b/toa5/to_csv/__main__.py new file mode 100755 index 0000000..e9bd2c5 --- /dev/null +++ b/toa5/to_csv/__main__.py @@ -0,0 +1,4 @@ +#!/usr/bin/env python +from . import main +if __name__=='__main__': + main()