From 90c176832da3146372db12f341bb48fded4d3742 Mon Sep 17 00:00:00 2001 From: Ben Jeffery Date: Mon, 16 Dec 2024 12:03:51 +0000 Subject: [PATCH 1/4] Empty commit to test CI From 94184a802499f487139c62d0f936c40981a65034 Mon Sep 17 00:00:00 2001 From: Ben Jeffery Date: Mon, 16 Dec 2024 12:41:38 +0000 Subject: [PATCH 2/4] Fixup pre-commit --- .flake8 | 6 ++++ .pre-commit-config.yaml | 29 ++++++++++++++++ tests/test_methods.py | 55 +++++++++++++++++++----------- tscompare/__init__.py | 9 +++-- tscompare/methods.py | 74 +++++++++++++++++++++-------------------- tscompare/provenance.py | 1 - 6 files changed, 115 insertions(+), 59 deletions(-) create mode 100644 .flake8 create mode 100644 .pre-commit-config.yaml diff --git a/.flake8 b/.flake8 new file mode 100644 index 0000000..b387539 --- /dev/null +++ b/.flake8 @@ -0,0 +1,6 @@ +[flake8] +# Based directly on Black's recommendations: +# https://black.readthedocs.io/en/stable/the_black_code_style.html#line-length +max-line-length = 81 +select = A,C,E,F,W,B,B950 +ignore = E203, E501, W503 \ No newline at end of file diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml new file mode 100644 index 0000000..a5565c2 --- /dev/null +++ b/.pre-commit-config.yaml @@ -0,0 +1,29 @@ +repos: + - repo: https://github.com/pre-commit/pre-commit-hooks + rev: v5.0.0 + hooks: + - id: check-merge-conflict + - id: debug-statements + - id: mixed-line-ending + - id: check-case-conflict + - id: check-yaml + - repo: https://github.com/asottile/reorder_python_imports + rev: v3.14.0 + hooks: + - id: reorder-python-imports + - repo: https://github.com/asottile/pyupgrade + rev: v3.19.0 + hooks: + - id: pyupgrade + args: [--py39-plus] + - repo: https://github.com/psf/black + rev: 24.10.0 + hooks: + - id: black + language_version: python3 + - repo: https://github.com/pycqa/flake8 + rev: 7.1.1 + hooks: + - id: flake8 + args: [--config=.flake8] + additional_dependencies: ["flake8-bugbear==24.12.12", "flake8-builtins==2.5.0"] diff --git a/tests/test_methods.py b/tests/test_methods.py index 9933bb0..e4654f0 100644 --- a/tests/test_methods.py +++ b/tests/test_methods.py @@ -23,7 +23,6 @@ """ Test tools for mapping between node sets of different tree sequences """ - from collections import defaultdict from itertools import combinations @@ -99,10 +98,13 @@ def naive_compare(ts, other, transform=None): Ineffiecient but transparent function to compute dissimilarity and root-mean-square-error between two tree sequences. """ + def f(t): return np.log(1 + t) - if transform is not None: - f = transform + + if transform is None: + transform = f + shared_spans = naive_shared_node_spans(ts, other).toarray() max_span = np.max(shared_spans, axis=1) assert len(max_span) == ts.num_nodes @@ -115,7 +117,9 @@ def f(t): else: for j in range(other.num_nodes): if shared_spans[i, j] == max_span[i]: - time_array[i, j] = np.abs(f(ts.nodes_time[i]) - f(other.nodes_time[j])) + time_array[i, j] = np.abs( + transform(ts.nodes_time[i]) - transform(other.nodes_time[j]) + ) dissimilarity_matrix[i, j] = 1 / (1 + time_array[i, j]) best_match = np.argmax(dissimilarity_matrix, axis=1) best_match_spans = np.zeros((ts.num_nodes,)) @@ -180,9 +184,7 @@ def test_node_spans(self, ts): naive_ns = naive_node_span(ts) assert np.allclose(eval_ns, naive_ns) - @pytest.mark.parametrize( - "pair", combinations([true_simpl, true_unary], 2) - ) + @pytest.mark.parametrize("pair", combinations([true_simpl, true_unary], 2)) def test_shared_spans(self, pair): """ Check that efficient implementation returns same answer as naive @@ -205,13 +207,16 @@ def test_match_self(self, ts): assert np.allclose(time, ts.nodes_time) assert np.array_equal(hit, np.arange(ts.num_nodes)) + class TestDissimilarity: def verify_compare(self, ts, other, transform=None): - match_span, ts_span, other_span, rmse = naive_compare(ts, other, transform=transform) + match_span, ts_span, other_span, rmse = naive_compare( + ts, other, transform=transform + ) dis = tscompare.compare(ts, other, transform=transform) - assert np.isclose(1.0 - match_span/ts_span, dis.arf) - assert np.isclose(match_span/other_span, dis.tpr) + assert np.isclose(1.0 - match_span / ts_span, dis.arf) + assert np.isclose(match_span / other_span, dis.tpr) assert np.isclose(ts_span - match_span, dis.dissimilarity) assert np.isclose(ts_span, dis.total_span[0]) assert np.isclose(other_span, dis.total_span[1]) @@ -235,7 +240,7 @@ def test_basic_comparison(self, pair): def test_zero_dissimilarity(self, pair): dis = tscompare.compare(pair[0], pair[1]) assert np.isclose(dis.dissimilarity, 0) - assert np.isclose(dis.arf, 0) + assert np.isclose(dis.arf, 0) assert np.isclose(dis.rmse, 0) def test_transform(self): @@ -243,7 +248,7 @@ def test_transform(self): dis2 = tscompare.compare(true_simpl, true_simpl, transform=None) assert dis1.dissimilarity == dis2.dissimilarity assert dis1.rmse == dis2.rmse - self.verify_compare(true_simpl, true_ext, transform=lambda t: 1/(1 + t)) + self.verify_compare(true_simpl, true_ext, transform=lambda t: 1 / (1 + t)) def get_simple_ts(self, samples=None, time=False, span=False, no_match=False): # A simple tree sequence we can use to properly test various @@ -397,12 +402,17 @@ def test_rmse(self): true_total_span = 46 assert dis.total_span[0] == true_total_span assert dis.total_span[1] == true_total_span + def f(t): return np.log(1 + t) - true_rmse = np.sqrt(( - 2 * 6 * (f(500) - f(200))**2 # nodes 4, 5 - + 2 * 2 * (f(750) - f(600))**2 # nodes, 7, 8 - ) / true_total_span) + + true_rmse = np.sqrt( + ( + 2 * 6 * (f(500) - f(200)) ** 2 # nodes 4, 5 + + 2 * 2 * (f(750) - f(600)) ** 2 # nodes, 7, 8 + ) + / true_total_span + ) assert np.isclose(dis.arf, 0.0) assert np.isclose(dis.tpr, 1.0) assert np.isclose(dis.dissimilarity, 0.0) @@ -414,12 +424,17 @@ def test_value_and_error(self): dis = tscompare.compare(ts, other) true_total_spans = (46, 47) assert dis.total_span == true_total_spans + def f(t): return np.log(1 + t) - true_rmse = np.sqrt(( - 2 * 6 * (f(500) - f(200))**2 # nodes 4, 5 - + 2 * 2 * (f(750) - f(600))**2 # nodes, 7, 8 - ) / true_total_spans[0]) + + true_rmse = np.sqrt( + ( + 2 * 6 * (f(500) - f(200)) ** 2 # nodes 4, 5 + + 2 * 2 * (f(750) - f(600)) ** 2 # nodes, 7, 8 + ) + / true_total_spans[0] + ) assert np.isclose(dis.arf, 4 / true_total_spans[0]) assert np.isclose(dis.tpr, (true_total_spans[0] - 4) / true_total_spans[1]) assert np.isclose(dis.dissimilarity, 4) diff --git a/tscompare/__init__.py b/tscompare/__init__.py index c88e8f7..35777ef 100644 --- a/tscompare/__init__.py +++ b/tscompare/__init__.py @@ -22,5 +22,10 @@ """ Tools for comparing tree sequences """ -from .methods import compare, node_spans, CladeMap, shared_node_spans, match_node_ages, ARFResult -from .provenance import __version__ +from .methods import ARFResult # noqa F401 +from .methods import CladeMap # noqa F401 +from .methods import compare # noqa F401 +from .methods import match_node_ages # noqa F401 +from .methods import node_spans # noqa F401 +from .methods import shared_node_spans # noqa F401 +from .provenance import __version__ # noqa F401 diff --git a/tscompare/methods.py b/tscompare/methods.py index e8e2561..ee92669 100644 --- a/tscompare/methods.py +++ b/tscompare/methods.py @@ -22,17 +22,16 @@ """ Tools for comparing node times between tree sequences with different node sets """ - -from dataclasses import dataclass +import copy from collections import defaultdict -from itertools import groupby, product +from dataclasses import dataclass +from itertools import product -import copy import numpy as np import scipy.sparse - import tskit + def node_spans(ts): """ Returns the array of "node spans", i.e., the `j`th entry gives @@ -97,7 +96,7 @@ def _propagate(self, edge, downdate=False): node = self.tree.parent(node) return nodes - def next(self): + def next(self): # noqa: A003 """ Advance to the next tree, returning the difference between trees as a dictionary of the form `node : (last_clade, next_clade)` @@ -254,18 +253,18 @@ def shared_node_spans(ts, other): def match_node_ages(ts, other): """ - For each node in `ts`, return the age of a matched node from `other`. Node - matching is accomplished as described in :func:`.compare`. - + For each node in `ts`, return the age of a matched node from `other`. Node + matching is accomplished as described in :func:`.compare`. + - Returns a tuple of three vectors of length `ts.num_nodes`, in this order: - the age of the best matching node in `other`; - the proportion of the node span in `ts` that is covered by the best match; - and the node id of the best match in `other`. + Returns a tuple of three vectors of length `ts.num_nodes`, in this order: + the age of the best matching node in `other`; + the proportion of the node span in `ts` that is covered by the best match; + and the node id of the best match in `other`. -:return: A tuple of arrays of length `ts.num_nodes` containing - (time of matching node, proportion overlap, and node ID of match). + :return: A tuple of arrays of length `ts.num_nodes` containing + (time of matching node, proportion overlap, and node ID of match). """ shared_spans = shared_node_spans(ts, other) @@ -283,7 +282,6 @@ def match_node_ages(ts, other): @dataclass class ARFResult: - """ The result of a call to tscompare.compare(ts, other), returning metrics associated with the ARG Robinson-Foulds @@ -302,7 +300,7 @@ class ARFResult: `dissimilarity`: The total span of `ts` that is not represented in `other`. - + `total_span`: The total of all node spans of the two tree sequences, in order (`ts`, `other`). @@ -314,6 +312,7 @@ class ARFResult: `transform`: The transformation function used to transform times for computing `rmse`. """ + arf: float tpr: float dissimilarity: float @@ -326,28 +325,29 @@ def __str__(self): Return a plain text summary of the ARF result. """ out = "Tree sequence comparison:\n" - out += f" ARF: {100*self.arf:.2f}%\n" - out += f" TPR: {100*self.tpr:.2f}%\n" + out += f" ARF: {100 * self.arf:.2f}%\n" + out += f" TPR: {100 * self.tpr:.2f}%\n" out += f" dissimilarity: {self.dissimilarity}\n" - out += f" total span (ts, other): {self.total_span[0]}, {self.total_span[1]}\n" + out += ( + f" total span (ts, other): {self.total_span[0]}, {self.total_span[1]}\n" + ) out += f" time RMSE: {self.rmse}\n" return out def compare(ts, other, transform=None): - """ For two tree sequences `ts` and `other`, this method returns an object of type :class:`.ARFResult`. The values reported summarize the degree to which nodes in `ts` "match" corresponding nodes in `other`. - + To match nodes, for each node in `ts`, the best matching node(s) from `other` has the longest matching span using :func:`.shared_node_spans`. If there are multiple matches with the same longest shared span for a single node, the best match is the match that is closest in time. - + Then, :class:`.ARFResult` contains: - (`dissimilarity`) @@ -356,8 +356,8 @@ def compare(ts, other, transform=None): samples as its best match in `other`. - (`arf`) - The fraction of the total span of `ts` over which each nodes' - descendant sample set does not match its' best match's descendant + The fraction of the total span of `ts` over which each nodes' + descendant sample set does not match its' best match's descendant sample set (i.e., the total *un*-matched span divided by the total span of `ts`). @@ -387,8 +387,11 @@ def compare(ts, other, transform=None): :rtype: ARFResult """ + def f(t): + return np.log(1 + t) + if transform is None: - transform = lambda t: np.log(1 + t) + transform = f shared_spans = shared_node_spans(ts, other) # Find all potential matches for a node based on max shared span length @@ -403,7 +406,9 @@ def compare(ts, other, transform=None): # determine best matches with the best_match_matrix ts_times = ts.nodes_time[row_ind[match]] other_times = other.nodes_time[col_ind[match]] - time_difference = np.absolute(np.asarray(transform(ts_times) - transform(other_times))) + time_difference = np.absolute( + np.asarray(transform(ts_times) - transform(other_times)) + ) # If a node x in `ts` has no match then we set time_difference to zero # This node then does not effect the rmse for j in range(len(shared_spans.data[match])): @@ -438,13 +443,10 @@ def compare(ts, other, transform=None): product = np.multiply((time_discrepancies**2), ts_node_spans) rmse = np.sqrt(np.sum(product) / total_span_ts) return ARFResult( - - arf = 1.0 - total_match_span / total_span_ts, - tpr = total_match_span / total_span_other, - - dissimilarity = total_span_ts - total_match_span, - total_span = (total_span_ts, total_span_other), - rmse = rmse, - transform = transform, + arf=1.0 - total_match_span / total_span_ts, + tpr=total_match_span / total_span_other, + dissimilarity=total_span_ts - total_match_span, + total_span=(total_span_ts, total_span_other), + rmse=rmse, + transform=transform, ) - diff --git a/tscompare/provenance.py b/tscompare/provenance.py index e6ed3d3..ddd0850 100644 --- a/tscompare/provenance.py +++ b/tscompare/provenance.py @@ -32,4 +32,3 @@ __version__ = get_version(root="..", relative_to=__file__) except ImportError: pass - From 32d3e155367a55b64f945cc72ca78731835349a3 Mon Sep 17 00:00:00 2001 From: Ben Jeffery Date: Mon, 16 Dec 2024 12:46:56 +0000 Subject: [PATCH 3/4] Fix docs --- .github/workflows/docs.yml | 42 +++++++++-------------- .github/workflows/tests.yml | 2 +- requirements/CI-docs-pip/requirements.txt | 1 + 3 files changed, 19 insertions(+), 26 deletions(-) diff --git a/.github/workflows/docs.yml b/.github/workflows/docs.yml index b626fb1..e51fff7 100644 --- a/.github/workflows/docs.yml +++ b/.github/workflows/docs.yml @@ -7,13 +7,6 @@ on: tags: - '*' -env: - COMMIT_EMAIL: ben.jeffery.well+adminbot@gmail.com - MAKE_TARGET: -C python - OWNER: tskit-dev - REPO: tskit - REQUIREMENTS: python/requirements/CI-docs/requirements.txt - jobs: build-deploy-docs: name: Docs @@ -29,26 +22,25 @@ jobs: - uses: actions/checkout@v4 - - name: Setup Conda - uses: conda-incubator/setup-miniconda@v3 - with: - miniforge-version: latest - activate-environment: tskit-docs-env - - - name: Cache Conda env - uses: actions/cache@v4 + - uses: actions/setup-python@v5.3.0 with: - path: ${{ env.CONDA }}/envs - key: conda-${{ runner.os }}--${{ runner.arch }}--${{ hashFiles(env.REQUIREMENTS) }}-${{ env.CACHE_NUMBER }} - env: - CACHE_NUMBER: 0 - id: cache + python-version: "3.11" + cache: "pip" - - name: Update environment + - name: Create venv and install deps (one by one to avoid conflict errors) run: | - mamba install -y python=3.12 doxygen pip - pip install -r ${{ env.REQUIREMENTS }} - if: steps.cache.outputs.cache-hit != 'true' + pip install --upgrade pip wheel + pip install -r requirements/CI-docs-pip/requirements.txt - name: Build Docs - run: make -C docs + run: | + cd docs + make + + - name: Trigger docs site rebuild + if: github.ref == 'refs/heads/main' + run: | + curl -X POST https://api.github.com/repos/tskit-dev/tskit-site/dispatches \ + -H 'Accept: application/vnd.github.everest-preview+json' \ + -u AdminBot-tskit:${{ secrets.ADMINBOT_TOKEN }} \ + --data '{"event_type":"build-docs"}' \ No newline at end of file diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index 992d971..8f736f8 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -26,7 +26,7 @@ jobs: runs-on: ${{ matrix.os }} strategy: matrix: - python: [ 3.9, 3.12 ] + python: [ 3.10, 3.12 ] os: [ macos-latest, ubuntu-24.04, windows-latest ] defaults: run: diff --git a/requirements/CI-docs-pip/requirements.txt b/requirements/CI-docs-pip/requirements.txt index e8b5efd..639199c 100644 --- a/requirements/CI-docs-pip/requirements.txt +++ b/requirements/CI-docs-pip/requirements.txt @@ -5,6 +5,7 @@ sphinx-issues==4.1.0 sphinx-argparse==0.4.0 svgwrite==1.4.3 tskit==0.6.0 +tsinfer==0.3.3 scipy==1.14.1 msprime==1.3.2 sphinx-book-theme From bdab23310eaa9fd7f45640ddbbf6e9802fb9c83b Mon Sep 17 00:00:00 2001 From: Ben Jeffery Date: Mon, 16 Dec 2024 14:58:25 +0000 Subject: [PATCH 4/4] Fix tests --- .github/workflows/tests.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index 8f736f8..e88c69c 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -26,7 +26,7 @@ jobs: runs-on: ${{ matrix.os }} strategy: matrix: - python: [ 3.10, 3.12 ] + python: [ "3.10", 3.12 ] os: [ macos-latest, ubuntu-24.04, windows-latest ] defaults: run: