diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 4400150..14bfaa9 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -12,39 +12,43 @@ jobs: strategy: fail-fast: false matrix: - python-version: ["3.9", "3.10", "3.11", "3.12", "3.13"] + python-version: ["3.10", "3.12", "3.13"] steps: - name: Checkout main - uses: actions/checkout@v3 + uses: actions/checkout@v4 with: fetch-depth: 0 + - name: Checkout lndocs - uses: actions/checkout@v3 + uses: actions/checkout@v4 with: repository: laminlabs/lndocs ssh-key: ${{ secrets.READ_LNDOCS }} path: lndocs ref: main + - name: Setup Python - uses: actions/setup-python@v4 + uses: actions/setup-python@v5 with: python-version: ${{ matrix.python-version }} - cache: "pip" - cache-dependency-path: ".github/workflows/build.yml" # See dependencies below + - name: Cache pre-commit uses: actions/cache@v3 if: matrix.python-version == '3.12' with: path: ~/.cache/pre-commit key: pre-commit-${{ runner.os }}-${{ hashFiles('.pre-commit-config.yaml') }} + - name: Install Python dependencies run: | python -m pip install -U pip pip install -U laminci + - run: nox -s lint if: matrix.python-version == '3.12' - run: nox -s build + - name: Codecov if: matrix.python-version == '3.12' uses: codecov/codecov-action@v2 diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 41661d2..5f79c92 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -1,73 +1,51 @@ +fail_fast: false +default_language_version: + python: python3 +default_stages: + - pre-commit + - pre-push +minimum_pre_commit_version: 2.12.0 repos: - - repo: https://github.com/pre-commit/pre-commit-hooks - rev: v3.2.0 - hooks: - - id: trailing-whitespace - - id: end-of-file-fixer - exclude: | - (?x)( - .github/workflows/latest-changes.jinja2 - ) - - id: check-yaml - - id: check-added-large-files - - repo: https://github.com/psf/black - rev: 22.3.0 - hooks: - - id: black-jupyter - - repo: https://github.com/pycqa/flake8 - rev: 4.0.1 + - repo: https://github.com/pre-commit/mirrors-prettier + rev: v4.0.0-alpha.8 hooks: - - id: flake8 - additional_dependencies: - - flake8-black>=0.1.1 - - flake8-typing-imports==1.10.0 - language_version: python3 - args: - - --max-line-length=88 - - --ignore=E203 - - --min-python-version=3.8.0 + - id: prettier exclude: | (?x)( - __init__.py + docs/changelog.md ) - - repo: https://github.com/pre-commit/mirrors-prettier - rev: v2.6.2 - hooks: - - id: prettier - repo: https://github.com/kynan/nbstripout - rev: 0.3.9 + rev: 0.6.1 hooks: - id: nbstripout exclude: | (?x)( - docs/tasks/| - docs/examples/ + docs/examples/| + docs/notes/ ) - - repo: https://github.com/Lucas-C/pre-commit-hooks - rev: v1.1.9 + - repo: https://github.com/astral-sh/ruff-pre-commit + rev: v0.9.2 hooks: - - id: forbid-crlf - - id: remove-crlf - - repo: https://github.com/pre-commit/mirrors-isort - rev: v5.8.0 + - id: ruff + args: [--fix, --exit-non-zero-on-fix, --unsafe-fixes] + - id: ruff-format + - repo: https://github.com/pre-commit/pre-commit-hooks + rev: v4.5.0 hooks: - - id: isort - args: ["--profile", "black"] + - id: detect-private-key + - id: check-ast + - id: end-of-file-fixer exclude: | (?x)( - __init__.py - ) + .github/workflows/latest-changes.jinja2 + ) + - id: mixed-line-ending + args: [--fix=lf] + - id: trailing-whitespace + - id: check-case-conflict - repo: https://github.com/pre-commit/mirrors-mypy - rev: v0.940 + rev: v1.14.1 hooks: - id: mypy - - repo: https://github.com/pycqa/pydocstyle - rev: 6.1.1 - hooks: - - id: pydocstyle - args: # google style + __init__, see http://www.pydocstyle.org/en/stable/error_codes.html - - --ignore=D100,D101,D102,D103,D105,D107,D203,D204,D213,D215,D400,D401,D402,D403,D404,D406,D407,D408,D409,D413 - exclude: | - (?x)( - __init__.py - ) + args: [--no-strict-optional, --ignore-missing-imports] + additional_dependencies: ["types-requests", "types-attrs"] diff --git a/CITATION.cff b/CITATION.cff index d92623d..94a51e6 100644 --- a/CITATION.cff +++ b/CITATION.cff @@ -1,23 +1,23 @@ cff-version: 1.2.0 title: "readfcs: Read FCS files" authors: -- family-names: Sun - given-names: Sunny - orcid: https://orcid.org/0000-0002-2365-0888 -- family-names: Wolf - given-names: F. Alexander - orcid: https://orcid.org/0000-0002-8760-7838 -url: https://github.com/laminlabs/readfcs -preferred-citation: - type: article - title: "readfcs: Read FCS files" - authors: - family-names: Sun given-names: Sunny orcid: https://orcid.org/0000-0002-2365-0888 - family-names: Wolf given-names: F. Alexander orcid: https://orcid.org/0000-0002-8760-7838 +url: https://github.com/laminlabs/readfcs +preferred-citation: + type: article + title: "readfcs: Read FCS files" + authors: + - family-names: Sun + given-names: Sunny + orcid: https://orcid.org/0000-0002-2365-0888 + - family-names: Wolf + given-names: F. Alexander + orcid: https://orcid.org/0000-0002-8760-7838 doi: 10.56528/rfcs journal: Lamin Reports year: 2022 diff --git a/pyproject.toml b/pyproject.toml index 9036836..b935cd6 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -9,7 +9,6 @@ readme = "README.md" dynamic = ["version", "description"] classifiers = [ "License :: OSI Approved :: Apache Software License", - "Programming Language :: Python :: 3.9", "Programming Language :: Python :: 3.10", "Programming Language :: Python :: 3.11", "Programming Language :: Python :: 3.12", @@ -33,9 +32,6 @@ dev = [ "laminci", ] -[tool.black] -preview = true - [tool.pytest.ini_options] testpaths = [ "tests", @@ -46,3 +42,109 @@ testpaths = [ omit = [ "readfcs/*", ] + +[tool.ruff] +src = ["src"] +line-length = 88 +lint.select = [ + "F", # Errors detected by Pyflakes + "E", # Error detected by Pycodestyle + "W", # Warning detected by Pycodestyle + "I", # isort + "D", # pydocstyle + "B", # flake8-bugbear + "TID", # flake8-tidy-imports + "C4", # flake8-comprehensions + "BLE", # flake8-blind-except + "UP", # pyupgrade + "RUF100", # Report unused noqa directives + "TCH", # Typing imports + "NPY", # Numpy specific rules + "PTH", # Use pathlib + "S" # Security +] +lint.ignore = [ + # Do not catch blind exception: `Exception` + "BLE001", + # Errors from function calls in argument defaults. These are fine when the result is immutable. + "B008", + # line too long -> we accept long comment lines; black gets rid of long code lines + "E501", + # Do not assign a lambda expression, use a def -> lambda expression assignments are convenient + "E731", + # allow I, O, l as variable names -> I is the identity matrix + "E741", + # Missing docstring in public module + "D100", + # undocumented-public-class + "D101", + # Missing docstring in public method + "D102", + # Missing docstring in public function + "D103", + # Missing docstring in public package + "D104", + # __magic__ methods are are often self-explanatory, allow missing docstrings + "D105", + # Missing docstring in public nested class + "D106", + # Missing docstring in __init__ + "D107", + ## Disable one in each pair of mutually incompatible rules + # We don’t want a blank line before a class docstring + "D203", + # 1 blank line required after class docstring + "D204", + # first line should end with a period [Bug: doesn't work with single-line docstrings] + # We want docstrings to start immediately after the opening triple quote + "D213", + # Section underline is over-indented ("{name}") + "D215", + # First line should end with a period + "D400", + # First line should be in imperative mood; try rephrasing + "D401", + # First word of the first line should be capitalized: {} -> {} + "D403", + # First word of the docstring should not be "This" + "D404", + # Section name should end with a newline ("{name}") + "D406", + # Missing dashed underline after section ("{name}") + "D407", + # Section underline should be in the line following the section's name ("{name}") + "D408", + # Section underline should match the length of its name ("{name}") + "D409", + # No blank lines allowed between a section header and its content ("{name}") + "D412", + # Missing blank line after last section ("{name}") + "D413", + # camcelcase imported as lowercase + "N813", + # module import not at top level of file + "E402", + # open()` should be replaced by `Path.open() + "PTH123", + # subprocess` call: check for execution of untrusted input - https://github.com/PyCQA/bandit/issues/333 + "S603", + # Starting a process with a partial executable path + "S607", + # Audit URL open for permitted schemes. Allowing use of `file:` or custom schemes is often unexpected. + "S310" +] + +[tool.ruff.lint.pydocstyle] +convention = "google" + +[tool.ruff.lint.per-file-ignores] +"docs/*" = ["I", "S101"] +"tests/**/*.py" = [ + "D", # docstrings are allowed to look a bit off + "S101", # asserts allowed in tests... + "ARG", # Unused function args -> fixtures nevertheless are functionally relevant... + "FBT", # Don't care about booleans as positional arguments in tests, e.g. via @pytest.mark.parametrize() + "PLR2004", # Magic value used in comparison, ... + "S311", # Standard pseudo-random generators are not suitable for cryptographic purposes +] +"*/__init__.py" = ["F401"] diff --git a/readfcs/_core.py b/readfcs/_core.py index 807a959..68ee146 100644 --- a/readfcs/_core.py +++ b/readfcs/_core.py @@ -1,7 +1,7 @@ import copy import re from pathlib import Path -from typing import Dict, Union +from typing import Union import anndata as ad import flowio @@ -13,18 +13,17 @@ def _channels_df(text: dict) -> pd.DataFrame: """Format channels into a DataFrame. Args: - text: dict - original metadata + text: original metadata Returns: a DataFrame of channels with columns PnN, PnS, etc. """ - channel_groups: Dict = {} + channel_groups: dict = {} # channel groups are $PnB, $PnS, $PnN... for k, v in text.items(): # Get all fields with $PnX pattern - if re.match(r"^p\d+[a-z]$", k): # noqa + if re.match(r"^p\d+[a-z]$", k): group_key = f"Pn{k[-1].upper()}" if group_key not in channel_groups: channel_groups[group_key] = [] @@ -63,11 +62,7 @@ def _get_spill_matrix(matrix_string: str) -> pd.DataFrame: Published 2017 Jul 26. doi:10.3389/fimmu.2017.00858 Args: - matrix_string: str - string value extracted from the 'spill' parameter of the FCS file - - Returns: - Pandas.DataFrame + matrix_string: string value extracted from the 'spill' parameter of the FCS file """ matrix_list = matrix_string.split(",") n = int(matrix_list[0]) @@ -77,8 +72,8 @@ def _get_spill_matrix(matrix_string: str) -> pd.DataFrame: matrix = np.reshape(list(map(float, values)), (n, n)) matrix_df = pd.DataFrame(matrix) matrix_df = matrix_df.rename( - index={k: v for k, v in zip(matrix_df.columns.to_list(), header)}, - columns={k: v for k, v in zip(matrix_df.columns.to_list(), header)}, + index=dict(zip(matrix_df.columns.to_list(), header)), + columns=dict(zip(matrix_df.columns.to_list(), header)), ) return matrix_df @@ -95,6 +90,10 @@ class ReadFCS: def __init__(self, filepath: Union[str, Path], data_set: int = 0) -> None: # FlowIO makes all keys lowercase in .text + if isinstance( + filepath, Path + ): # Fix for https://laminlabs.slack.com/archives/C07DB677JF6/p1737542733821819?thread_ts=1729512530.482559&cid=C07DB677JF6 + filepath = str(filepath) self._flow_data = flowio.read_multiple_data_sets(filepath)[data_set] # data @@ -144,9 +143,9 @@ def data(self) -> pd.DataFrame: def compensate(self) -> None: """Apply compensation to event data.""" - assert ( - self.meta["spill"] is not None - ), f"Unable to locate spillover matrix, please provide a compensation matrix" # noqa + assert self.meta["spill"] is not None, ( # noqa: S101 + "Unable to locate spillover matrix, please provide a compensation matrix" + ) channel_idx = [ i for i, (_, row) in enumerate(self.channels.iterrows()) @@ -156,8 +155,8 @@ def compensate(self) -> None: channel_idx = [ i for i, (_, row) in enumerate(self.channels.iterrows()) - if all([z not in row["PnN"].lower() for z in ["fsc", "ssc", "time"]]) - and row["PnN"] in self.meta["spill"].columns # noqa + if all(z not in row["PnN"].lower() for z in ["fsc", "ssc", "time"]) + and row["PnN"] in self.meta["spill"].columns ] comp_data = self.data.iloc[:, channel_idx] @@ -168,17 +167,13 @@ def to_anndata(self, reindex=True) -> ad.AnnData: """Convert the FCSFile instance to an AnnData. Args: - reindex: bool. Default is True - variables will be reindexed with marker names if possible otherwise - channels - Returns: - an AnnData object + reindex: variables will be reindexed with marker names if possible otherwise channels """ channels_mapping = { "PnN": "channel", "PnS": "marker", } - if any([i for i in ["PnN", "PnS"] if i not in self.channels.columns]): + if any(i for i in ["PnN", "PnS"] if i not in self.channels.columns): raise AssertionError( "PnN or PnS field not found in the file!\nPlease check your file" " content with `readfcs.view`!" @@ -222,7 +217,7 @@ def to_anndata(self, reindex=True) -> ad.AnnData: n_mismatch = self.meta["spill"].index.map(mapper).isna().sum() if n_mismatch > 0: raise AssertionError( - f"spill matrix index contains {n_mismatch} mismatches to the channels, please check your metadata." # noqa + f"spill matrix index contains {n_mismatch} mismatches to the channels, please check your metadata." ) meta["spill"] = meta["spill"].rename(index=mapper) meta["spill"] = meta["spill"].rename(columns=mapper) @@ -255,10 +250,8 @@ def view(filepath: Union[str, Path], data_set: int = 0) -> tuple: """Read in file content without preprocessing for debugging. Args: - filepath: str or Path - location of fcs file to parse - data_set: int. Default is 0. - Index of retrieved data set in the fcs file. + filepath: Location of fcs file to parse + data_set: Index of retrieved data set in the fcs file. Returns: a tuple of (data, metadata) diff --git a/readfcs/datasets/__init__.py b/readfcs/datasets/__init__.py index fc14353..dc421e0 100644 --- a/readfcs/datasets/__init__.py +++ b/readfcs/datasets/__init__.py @@ -9,4 +9,4 @@ example """ -from ._datasets import example, Oetjen18_t1, Oetjen18_t2, Oetjen18_dc +from ._datasets import Oetjen18_dc, Oetjen18_t1, Oetjen18_t2, example diff --git a/readfcs/datasets/_datasets.py b/readfcs/datasets/_datasets.py index 61bf088..7e55ca2 100644 --- a/readfcs/datasets/_datasets.py +++ b/readfcs/datasets/_datasets.py @@ -25,7 +25,7 @@ def Oetjen18_t1() -> str: Reference: https://insight.jci.org/articles/view/124928 """ - url = "https://lamindb-dev-datasets.s3.amazonaws.com/.lamindb/DBNEczSgBui0bbzBXMGH.fcs" # noqa + url = "https://lamindb-dev-datasets.s3.amazonaws.com/.lamindb/DBNEczSgBui0bbzBXMGH.fcs" path_data, _ = urlretrieve(url, "oetjen18_t1.fcs") return path_data @@ -40,7 +40,7 @@ def Oetjen18_t2() -> str: Reference: https://insight.jci.org/articles/view/124928 """ - url = "https://lamindb-dev-datasets.s3.amazonaws.com/.lamindb/ckbFvcuxG4tln7OIg3ml.fcs" # noqa + url = "https://lamindb-dev-datasets.s3.amazonaws.com/.lamindb/ckbFvcuxG4tln7OIg3ml.fcs" path_data, _ = urlretrieve(url, "oetjen18_t2.fcs") return path_data @@ -55,7 +55,7 @@ def Oetjen18_dc() -> str: Reference: https://insight.jci.org/articles/view/124928 """ - url = "https://lamindb-dev-datasets.s3.amazonaws.com/.lamindb/yYLIWRT3sg4E3NUFigpB.fcs" # noqa + url = "https://lamindb-dev-datasets.s3.amazonaws.com/.lamindb/yYLIWRT3sg4E3NUFigpB.fcs" path_data, _ = urlretrieve(url, "oetjen18_dc.fcs") return path_data diff --git a/tests/test_base.py b/tests/test_base.py deleted file mode 100644 index 1ab1cc8..0000000 --- a/tests/test_base.py +++ /dev/null @@ -1,5 +0,0 @@ -from readfcs import __version__ - - -def test_version(): - assert __version__ >= "0.1.0"