Merge branch 'main' into support_python_3.13

SpikeInterface · Jan 20, 2025 · d5f6317 · d5f6317
2 parents c935f6b + cabe66e
commit d5f6317
Show file tree

Hide file tree

Showing 132 changed files with 5,281 additions and 1,035 deletions.
diff --git a/.github/actions/build-test-environment/action.yml b/.github/actions/build-test-environment/action.yml
@@ -1,41 +1,20 @@
 name: Install packages
 description: This action installs the package and its dependencies for testing
 
-inputs:
-  python-version:
-    description: 'Python version to set up'
-    required: false
-  os:
-    description: 'Operating system to set up'
-    required: false
-
 runs:
   using: "composite"
   steps:
     - name: Install dependencies
       run: |
-        sudo apt install git
         git config --global user.email "[email protected]"
         git config --global user.name "CI Almighty"
-        python -m venv ${{ github.workspace }}/test_env # Environment used in the caching step
-        python -m pip install -U pip  # Official recommended way
-        source ${{ github.workspace }}/test_env/bin/activate
         pip install tabulate  # This produces summaries at the end
         pip install -e .[test,extractors,streaming_extractors,test_extractors,full]
       shell: bash
-    - name: Force installation of latest dev from key-packages when running dev (not release)
-      run: |
-        source ${{ github.workspace }}/test_env/bin/activate
-        spikeinterface_is_dev_version=$(python -c "import spikeinterface; print(spikeinterface.DEV_MODE)")
-        if [ $spikeinterface_is_dev_version = "True" ]; then
-          echo "Running spikeinterface dev version"
-          pip install --no-cache-dir git+https://github.com/NeuralEnsemble/python-neo
-          pip install --no-cache-dir git+https://github.com/SpikeInterface/probeinterface
-        fi
-          echo "Running tests for release, using pyproject.toml versions of neo and probeinterface"
+    - name: Install git-annex
       shell: bash
-    - name: git-annex install
       run: |
+        pip install datalad-installer
         wget https://downloads.kitenet.net/git-annex/linux/current/git-annex-standalone-amd64.tar.gz
         mkdir /home/runner/work/installation
         mv git-annex-standalone-amd64.tar.gz /home/runner/work/installation/
@@ -44,4 +23,14 @@ runs:
         tar xvzf git-annex-standalone-amd64.tar.gz
         echo "$(pwd)/git-annex.linux" >> $GITHUB_PATH
         cd $workdir
+        git config --global filter.annex.process "git-annex filter-process"  # recommended for efficiency
+    - name: Force installation of latest dev from key-packages when running dev (not release)
+      run: |
+        spikeinterface_is_dev_version=$(python -c "import spikeinterface; print(spikeinterface.DEV_MODE)")
+        if [ $spikeinterface_is_dev_version = "True" ]; then
+          echo "Running spikeinterface dev version"
+          pip install --no-cache-dir git+https://github.com/NeuralEnsemble/python-neo
+          pip install --no-cache-dir git+https://github.com/SpikeInterface/probeinterface
+        fi
+          echo "Running tests for release, using pyproject.toml versions of neo and probeinterface"
       shell: bash
diff --git a/.github/workflows/all-tests.yml b/.github/workflows/all-tests.yml
@@ -47,7 +47,7 @@ jobs:
             echo "$file was changed"
           done
 
-      - name: Set testing environment  # This decides which tests are run and whether to install especial dependencies
+      - name: Set testing environment  # This decides which tests are run and whether to install special dependencies
         shell: bash
         run: |
           changed_files="${{ steps.changed-files.outputs.all_changed_files }}"

diff --git a/.github/workflows/full-test-with-codecov.yml b/.github/workflows/full-test-with-codecov.yml
@@ -45,7 +45,6 @@ jobs:
         env:
           HDF5_PLUGIN_PATH: ${{ github.workspace }}/hdf5_plugin_path_maxwell
         run: |
-          source ${{ github.workspace }}/test_env/bin/activate
           pytest -m "not sorters_external" --cov=./ --cov-report xml:./coverage.xml -vv -ra --durations=0 | tee report_full.txt; test ${PIPESTATUS[0]} -eq 0 || exit 1
           echo "# Timing profile of full tests" >> $GITHUB_STEP_SUMMARY
           python ./.github/scripts/build_job_summary.py report_full.txt >> $GITHUB_STEP_SUMMARY

diff --git a/doc/api.rst b/doc/api.rst
@@ -346,6 +346,9 @@ spikeinterface.curation
     .. autofunction:: remove_redundant_units
     .. autofunction:: remove_duplicated_spikes
     .. autofunction:: remove_excess_spikes
+    .. autofunction:: load_model
+    .. autofunction:: auto_label_units
+    .. autofunction:: train_model
 
 Deprecated
 ~~~~~~~~~~

diff --git a/doc/conf.py b/doc/conf.py
@@ -119,12 +119,15 @@
 
 # for sphinx gallery plugin
 sphinx_gallery_conf = {
-    'only_warn_on_example_error': True,
+    # This is the default but including here explicitly. Should build all docs and fail on gallery failures only.
+    # other option would be abort_on_example_error, but this fails on first failure. So we decided against this.
+    'only_warn_on_example_error': False,
     'examples_dirs': ['../examples/tutorials'],
     'gallery_dirs': ['tutorials' ],  # path where to save gallery generated examples
     'subsection_order': ExplicitOrder([
                                        '../examples/tutorials/core',
                                        '../examples/tutorials/extractors',
+                                       '../examples/tutorials/curation',
                                        '../examples/tutorials/qualitymetrics',
                                        '../examples/tutorials/comparison',
                                        '../examples/tutorials/widgets',

diff --git a/doc/development/development.rst b/doc/development/development.rst
@@ -213,6 +213,25 @@ We use Sphinx to build the documentation. To build the documentation locally, yo
 
 This will build the documentation in the :code:`doc/_build/html` folder. You can open the :code:`index.html` file in your browser to see the documentation.
 
+Adding new documentation
+------------------------
+
+Documentation can be added as a
+`sphinx-gallery <https://sphinx-gallery.github.io/stable/index.html>`_
+python file ('tutorials')
+or a
+`sphinx rst <https://sphinx-tutorial.readthedocs.io/step-1/>`_
+file (all other sections).
+
+To add a new tutorial, add your ``.py`` file to ``spikeinterface/examples``.
+Then, update the ``spikeinterface/doc/tutorials_custom_index.rst`` file
+to make a new card linking to the page and an optional image. See
+``tutorials_custom_index.rst`` header for more information.
+
+For other sections, write your documentation in ``.rst`` format and add
+the page to the appropriate ``index.rst`` file found in the relevant
+folder (e.g. ``how_to/index.rst``).
+
 How to run code coverage locally
 --------------------------------
 To run code coverage locally, you can use the following command:

diff --git a/doc/get_started/quickstart.rst b/doc/get_started/quickstart.rst
@@ -673,7 +673,7 @@ compute quality metrics (some quality metrics require certain extensions
                               'min_spikes': 0,
                               'window_size_s': 1},
      'snr': {'peak_mode': 'extremum', 'peak_sign': 'neg'},
-     'synchrony': {'synchrony_sizes': (2, 4, 8)}}
+     'synchrony': {}
 
 
 Since the recording is very short, let’s change some parameters to

diff --git a/doc/how_to/auto_curation_prediction.rst b/doc/how_to/auto_curation_prediction.rst
@@ -0,0 +1,43 @@
+How to use a trained model to predict the curation labels
+=========================================================
+
+For a more detailed guide to using trained models, `read our tutorial here
+<https://spikeinterface.readthedocs.io/en/latest/tutorials/curation/plot_1_automated_curation.html>`_).
+
+There is a Collection of models for automated curation available on the
+`SpikeInterface HuggingFace page <https://huggingface.co/SpikeInterface>`_.
+
+We'll apply the model ``toy_tetrode_model`` from ``SpikeInterface`` on a SortingAnalyzer
+called ``sorting_analyzer``. We assume that the quality and template metrics have
+already been computed.
+
+We need to pass the ``sorting_analyzer``, the ``repo_id`` (which is just the part of the
+repo's URL after huggingface.co/) and that we trust the model.
+
+.. code::
+
+    from spikeinterface.curation import auto_label_units
+
+    labels_and_probabilities = auto_label_units(
+        sorting_analyzer = sorting_analyzer,
+        repo_id = "SpikeInterface/toy_tetrode_model",
+        trust_model = True
+    )
+
+If you have a local directory containing the model in a ``skops`` file you can use this to
+create the labels:
+
+.. code::
+
+    labels_and_probabilities = si.auto_label_units(
+        sorting_analyzer = sorting_analyzer,
+        model_folder = "my_folder_with_a_model_in_it",
+    )
+
+The returned labels are a dictionary of model's predictions and it's confidence. These
+are also saved as a property of your ``sorting_analyzer`` and can be accessed like so:
+
+.. code::
+
+    labels = sorting_analyzer.sorting.get_property("classifier_label")
+    probabilities = sorting_analyzer.sorting.get_property("classifier_probability")
diff --git a/doc/how_to/auto_curation_training.rst b/doc/how_to/auto_curation_training.rst
@@ -0,0 +1,58 @@
+How to train a model to predict curation labels
+===============================================
+
+A full tutorial for model-based curation can be found `here <https://spikeinterface.readthedocs.io/en/latest/tutorials/curation/plot_2_train_a_model.html>`_.
+
+Here, we assume that you have:
+
+* Two SortingAnalyzers called ``analyzer_1`` and
+  ``analyzer_2``, and have calculated some template and quality metrics for both
+* Manually curated labels for the units in each analyzer, in lists called
+  ``analyzer_1_labels`` and ``analyzer_2_labels``. If you have used phy, the lists can
+  be accessed using ``curated_labels = analyzer.sorting.get_property("quality")``.
+
+With these objects calculated, you can train a model as follows
+
+.. code::
+
+    from spikeinterface.curation import train_model
+
+    analyzer_list = [analyzer_1, analyzer_2]
+    labels_list = [analyzer_1_labels, analyzer_2_labels]
+    output_folder = "/path/to/output_folder"
+
+    trainer = train_model(
+        mode="analyzers",
+        labels=labels_list,
+        analyzers=analyzer_list,
+        output_folder=output_folder,
+        metric_names=None, # Set if you want to use a subset of metrics, defaults to all calculated quality and template metrics
+        imputation_strategies=None, # Default is all available imputation strategies
+        scaling_techniques=None, # Default is all available scaling techniques
+        classifiers=None, # Defaults to Random Forest classifier only - we usually find this gives the best results, but a range of classifiers is available
+        seed=None, # Set a seed for reproducibility
+    )
+
+
+The trainer tries several models and chooses the most accurate one. This model and
+some metadata are stored in the ``output_folder``, which can later be loaded using the
+``load_model`` function (`more details <https://spikeinterface.readthedocs.io/en/latest/tutorials/curation/plot_1_automated_curation.html#download-a-pretrained-model>`_).
+We can also access the model, which is an sklearn ``Pipeline``, from the trainer object
+
+.. code::
+
+    best_model = trainer.best_pipeline
+
+
+The training function can also be run in “csv” mode, if you prefer to
+store metrics in as .csv files. If the target labels are stored as a column in
+the file, you can point to these with the ``target_label`` parameter
+
+.. code::
+
+    trainer = train_model(
+        mode="csv",
+        metrics_paths = ["/path/to/csv_file_1", "/path/to/csv_file_2"],
+        target_label = "my_label",
+        output_folder=output_folder,
+    )
diff --git a/doc/how_to/index.rst b/doc/how_to/index.rst
@@ -15,3 +15,5 @@ Guides on how to solve specific, short problems in SpikeInterface. Learn how to.
     load_your_data_into_sorting
     benchmark_with_hybrid_recordings
     drift_with_lfp
+    auto_curation_training
+    auto_curation_prediction
diff --git a/doc/images/files_screen.png b/doc/images/files_screen.png
diff --git a/doc/images/hf-logo.svg b/doc/images/hf-logo.svg
diff --git a/doc/images/initial_model_screen.png b/doc/images/initial_model_screen.png
diff --git a/doc/index.rst b/doc/index.rst
@@ -51,7 +51,7 @@ SpikeInterface is made of several modules to deal with different aspects of the
 
     overview
     get_started/index
-    tutorials/index
+    tutorials_custom_index
     how_to/index
     modules/index
     api

diff --git a/doc/modules/core.rst b/doc/modules/core.rst
@@ -385,7 +385,7 @@ and merging unit groups.
 
     sorting_analyzer_select = sorting_analyzer.select_units(unit_ids=[0, 1, 2, 3])
     sorting_analyzer_remove = sorting_analyzer.remove_units(remove_unit_ids=[0])
-    sorting_analyzer_merge = sorting_analyzer.merge_units([0, 1], [2, 3])
+    sorting_analyzer_merge = sorting_analyzer.merge_units([[0, 1], [2, 3]])
 
 All computed extensions will be automatically propagated or merged when curating. Please refer to the
 :ref:`modules/curation:Curation module` documentation for more information.

diff --git a/doc/modules/curation.rst b/doc/modules/curation.rst
@@ -88,7 +88,7 @@ The ``censored_period_ms`` parameter is the time window in milliseconds to consi
 The :py:func:`~spikeinterface.curation.remove_redundand_units` function removes
 redundant units from the sorting output. Redundant units are units that share over
 a certain percentage of spikes, by default 80%.
-The function can acto both on a ``BaseSorting`` or a ``SortingAnalyzer`` object.
+The function can act both on a ``BaseSorting`` or a ``SortingAnalyzer`` object.
 
 .. code-block:: python
 
@@ -102,13 +102,18 @@ The function can acto both on a ``BaseSorting`` or a ``SortingAnalyzer`` object.
     )
 
     # remove redundant units from SortingAnalyzer object
-    clean_sorting_analyzer = remove_redundant_units(
+    # note this returns a cleaned sorting
+    clean_sorting = remove_redundant_units(
         sorting_analyzer,
         duplicate_threshold=0.9,
         remove_strategy="min_shift"
     )
+    # in order to have a SortingAnalyer with only the non-redundant units one must
+    # select the designed units remembering to give format and folder if one wants
+    # a persistent SortingAnalyzer.
+    clean_sorting_analyzer = sorting_analyzer.select_units(clean_sorting.unit_ids)
 
-We recommend usinf the ``SortingAnalyzer`` approach, since the ``min_shift`` strategy keeps
+We recommend using the ``SortingAnalyzer`` approach, since the ``min_shift`` strategy keeps
 the unit (among the redundant ones), with a better template alignment.
 
 

diff --git a/doc/modules/qualitymetrics/synchrony.rst b/doc/modules/qualitymetrics/synchrony.rst
@@ -12,7 +12,7 @@ trains. This way synchronous events can be found both in multi-unit and single-u
 Complexity is calculated by counting the number of spikes (i.e. non-empty bins) that occur at the same sample index,
 within and across spike trains.
 
-Synchrony metrics can be computed for different synchrony sizes (>1), defining the number of simultaneous spikes to count.
+Synchrony metrics are computed for 2, 4 and 8 synchronous spikes.
 
 
 
@@ -29,7 +29,7 @@ Example code
 
     import spikeinterface.qualitymetrics as sqm
     # Combine a sorting and recording into a sorting_analyzer
-    synchrony = sqm.compute_synchrony_metrics(sorting_analyzer=sorting_analyzer synchrony_sizes=(2, 4, 8))
+    synchrony = sqm.compute_synchrony_metrics(sorting_analyzer=sorting_analyzer)
     # synchrony is a tuple of dicts with the synchrony metrics for each unit