Merge branch 'main' into preprocessing-pipeline

SpikeInterface · Jan 24, 2025 · 5e202c3 · 5e202c3
2 parents 5c19765 + ae19c2a
commit 5e202c3
Show file tree

Hide file tree

Showing 229 changed files with 9,683 additions and 4,266 deletions.
diff --git a/.github/actions/build-test-environment/action.yml b/.github/actions/build-test-environment/action.yml
@@ -1,41 +1,20 @@
 name: Install packages
 description: This action installs the package and its dependencies for testing
 
-inputs:
-  python-version:
-    description: 'Python version to set up'
-    required: false
-  os:
-    description: 'Operating system to set up'
-    required: false
-
 runs:
   using: "composite"
   steps:
     - name: Install dependencies
       run: |
-        sudo apt install git
         git config --global user.email "[email protected]"
         git config --global user.name "CI Almighty"
-        python -m venv ${{ github.workspace }}/test_env # Environment used in the caching step
-        python -m pip install -U pip  # Official recommended way
-        source ${{ github.workspace }}/test_env/bin/activate
         pip install tabulate  # This produces summaries at the end
         pip install -e .[test,extractors,streaming_extractors,test_extractors,full]
       shell: bash
-    - name: Force installation of latest dev from key-packages when running dev (not release)
-      run: |
-        source ${{ github.workspace }}/test_env/bin/activate
-        spikeinterface_is_dev_version=$(python -c "import spikeinterface; print(spikeinterface.DEV_MODE)")
-        if [ $spikeinterface_is_dev_version = "True" ]; then
-          echo "Running spikeinterface dev version"
-          pip install --no-cache-dir git+https://github.com/NeuralEnsemble/python-neo
-          pip install --no-cache-dir git+https://github.com/SpikeInterface/probeinterface
-        fi
-          echo "Running tests for release, using pyproject.toml versions of neo and probeinterface"
+    - name: Install git-annex
       shell: bash
-    - name: git-annex install
       run: |
+        pip install datalad-installer
         wget https://downloads.kitenet.net/git-annex/linux/current/git-annex-standalone-amd64.tar.gz
         mkdir /home/runner/work/installation
         mv git-annex-standalone-amd64.tar.gz /home/runner/work/installation/
@@ -44,4 +23,14 @@ runs:
         tar xvzf git-annex-standalone-amd64.tar.gz
         echo "$(pwd)/git-annex.linux" >> $GITHUB_PATH
         cd $workdir
+        git config --global filter.annex.process "git-annex filter-process"  # recommended for efficiency
+    - name: Force installation of latest dev from key-packages when running dev (not release)
+      run: |
+        spikeinterface_is_dev_version=$(python -c "import spikeinterface; print(spikeinterface.DEV_MODE)")
+        if [ $spikeinterface_is_dev_version = "True" ]; then
+          echo "Running spikeinterface dev version"
+          pip install --no-cache-dir git+https://github.com/NeuralEnsemble/python-neo
+          pip install --no-cache-dir git+https://github.com/SpikeInterface/probeinterface
+        fi
+          echo "Running tests for release, using pyproject.toml versions of neo and probeinterface"
       shell: bash
diff --git a/.github/workflows/all-tests.yml b/.github/workflows/all-tests.yml
@@ -47,7 +47,7 @@ jobs:
             echo "$file was changed"
           done
 
-      - name: Set testing environment  # This decides which tests are run and whether to install especial dependencies
+      - name: Set testing environment  # This decides which tests are run and whether to install special dependencies
         shell: bash
         run: |
           changed_files="${{ steps.changed-files.outputs.all_changed_files }}"

diff --git a/.github/workflows/full-test-with-codecov.yml b/.github/workflows/full-test-with-codecov.yml
@@ -45,7 +45,6 @@ jobs:
         env:
           HDF5_PLUGIN_PATH: ${{ github.workspace }}/hdf5_plugin_path_maxwell
         run: |
-          source ${{ github.workspace }}/test_env/bin/activate
           pytest -m "not sorters_external" --cov=./ --cov-report xml:./coverage.xml -vv -ra --durations=0 | tee report_full.txt; test ${PIPESTATUS[0]} -eq 0 || exit 1
           echo "# Timing profile of full tests" >> $GITHUB_STEP_SUMMARY
           python ./.github/scripts/build_job_summary.py report_full.txt >> $GITHUB_STEP_SUMMARY

diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
@@ -1,12 +1,12 @@
 repos:
 -   repo: https://github.com/pre-commit/pre-commit-hooks
-    rev: v4.6.0
+    rev: v5.0.0
     hooks:
     -   id: check-yaml
     -   id: end-of-file-fixer
     -   id: trailing-whitespace
 -   repo: https://github.com/psf/black
-    rev: 24.8.0
+    rev: 24.10.0
     hooks:
     -   id: black
         files: ^src/
diff --git a/doc/api.rst b/doc/api.rst
@@ -346,6 +346,9 @@ spikeinterface.curation
     .. autofunction:: remove_redundant_units
     .. autofunction:: remove_duplicated_spikes
     .. autofunction:: remove_excess_spikes
+    .. autofunction:: load_model
+    .. autofunction:: auto_label_units
+    .. autofunction:: train_model
 
 Deprecated
 ~~~~~~~~~~

diff --git a/doc/conf.py b/doc/conf.py
@@ -119,12 +119,15 @@
 
 # for sphinx gallery plugin
 sphinx_gallery_conf = {
-    'only_warn_on_example_error': True,
+    # This is the default but including here explicitly. Should build all docs and fail on gallery failures only.
+    # other option would be abort_on_example_error, but this fails on first failure. So we decided against this.
+    'only_warn_on_example_error': False,
     'examples_dirs': ['../examples/tutorials'],
     'gallery_dirs': ['tutorials' ],  # path where to save gallery generated examples
     'subsection_order': ExplicitOrder([
                                        '../examples/tutorials/core',
                                        '../examples/tutorials/extractors',
+                                       '../examples/tutorials/curation',
                                        '../examples/tutorials/qualitymetrics',
                                        '../examples/tutorials/comparison',
                                        '../examples/tutorials/widgets',

diff --git a/doc/development/development.rst b/doc/development/development.rst
@@ -192,6 +192,7 @@ Miscelleaneous Stylistic Conventions
 #. Avoid using abbreviations in variable names (e.g. use :code:`recording` instead of :code:`rec`). It is especially important to avoid single letter variables.
 #. Use index as singular and indices for plural following the NumPy convention. Avoid idx or indexes. Plus, id and ids are reserved for identifiers (i.e. channel_ids)
 #. We use file_path and folder_path (instead of file_name and folder_name) for clarity.
+#. For the titles of documentation pages, only capitalize the first letter of the first word and classes or software packages. For example, "How to use a SortingAnalyzer in SpikeInterface".
 #. For creating headers to divide sections of code we use the following convention (see issue `#3019 <https://github.com/SpikeInterface/spikeinterface/issues/3019>`_):
 
 
@@ -212,6 +213,25 @@ We use Sphinx to build the documentation. To build the documentation locally, yo
 
 This will build the documentation in the :code:`doc/_build/html` folder. You can open the :code:`index.html` file in your browser to see the documentation.
 
+Adding new documentation
+------------------------
+
+Documentation can be added as a
+`sphinx-gallery <https://sphinx-gallery.github.io/stable/index.html>`_
+python file ('tutorials')
+or a
+`sphinx rst <https://sphinx-tutorial.readthedocs.io/step-1/>`_
+file (all other sections).
+
+To add a new tutorial, add your ``.py`` file to ``spikeinterface/examples``.
+Then, update the ``spikeinterface/doc/tutorials_custom_index.rst`` file
+to make a new card linking to the page and an optional image. See
+``tutorials_custom_index.rst`` header for more information.
+
+For other sections, write your documentation in ``.rst`` format and add
+the page to the appropriate ``index.rst`` file found in the relevant
+folder (e.g. ``how_to/index.rst``).
+
 How to run code coverage locally
 --------------------------------
 To run code coverage locally, you can use the following command:

diff --git a/doc/get_started/quickstart.rst b/doc/get_started/quickstart.rst
@@ -287,7 +287,7 @@ available parameters are dictionaries and can be accessed with:
      'detect_threshold': 5,
      'freq_max': 5000.0,
      'freq_min': 400.0,
-     'max_threads_per_process': 1,
+     'max_threads_per_worker': 1,
      'mp_context': None,
      'n_jobs': 20,
      'nested_params': None,
@@ -673,7 +673,7 @@ compute quality metrics (some quality metrics require certain extensions
                               'min_spikes': 0,
                               'window_size_s': 1},
      'snr': {'peak_mode': 'extremum', 'peak_sign': 'neg'},
-     'synchrony': {'synchrony_sizes': (2, 4, 8)}}
+     'synchrony': {}
 
 
 Since the recording is very short, let’s change some parameters to

diff --git a/doc/how_to/auto_curation_prediction.rst b/doc/how_to/auto_curation_prediction.rst
@@ -0,0 +1,43 @@
+How to use a trained model to predict the curation labels
+=========================================================
+
+For a more detailed guide to using trained models, `read our tutorial here
+<https://spikeinterface.readthedocs.io/en/latest/tutorials/curation/plot_1_automated_curation.html>`_).
+
+There is a Collection of models for automated curation available on the
+`SpikeInterface HuggingFace page <https://huggingface.co/SpikeInterface>`_.
+
+We'll apply the model ``toy_tetrode_model`` from ``SpikeInterface`` on a SortingAnalyzer
+called ``sorting_analyzer``. We assume that the quality and template metrics have
+already been computed.
+
+We need to pass the ``sorting_analyzer``, the ``repo_id`` (which is just the part of the
+repo's URL after huggingface.co/) and that we trust the model.
+
+.. code::
+
+    from spikeinterface.curation import auto_label_units
+
+    labels_and_probabilities = auto_label_units(
+        sorting_analyzer = sorting_analyzer,
+        repo_id = "SpikeInterface/toy_tetrode_model",
+        trust_model = True
+    )
+
+If you have a local directory containing the model in a ``skops`` file you can use this to
+create the labels:
+
+.. code::
+
+    labels_and_probabilities = si.auto_label_units(
+        sorting_analyzer = sorting_analyzer,
+        model_folder = "my_folder_with_a_model_in_it",
+    )
+
+The returned labels are a dictionary of model's predictions and it's confidence. These
+are also saved as a property of your ``sorting_analyzer`` and can be accessed like so:
+
+.. code::
+
+    labels = sorting_analyzer.sorting.get_property("classifier_label")
+    probabilities = sorting_analyzer.sorting.get_property("classifier_probability")
diff --git a/doc/how_to/auto_curation_training.rst b/doc/how_to/auto_curation_training.rst
@@ -0,0 +1,58 @@
+How to train a model to predict curation labels
+===============================================
+
+A full tutorial for model-based curation can be found `here <https://spikeinterface.readthedocs.io/en/latest/tutorials/curation/plot_2_train_a_model.html>`_.
+
+Here, we assume that you have:
+
+* Two SortingAnalyzers called ``analyzer_1`` and
+  ``analyzer_2``, and have calculated some template and quality metrics for both
+* Manually curated labels for the units in each analyzer, in lists called
+  ``analyzer_1_labels`` and ``analyzer_2_labels``. If you have used phy, the lists can
+  be accessed using ``curated_labels = analyzer.sorting.get_property("quality")``.
+
+With these objects calculated, you can train a model as follows
+
+.. code::
+
+    from spikeinterface.curation import train_model
+
+    analyzer_list = [analyzer_1, analyzer_2]
+    labels_list = [analyzer_1_labels, analyzer_2_labels]
+    output_folder = "/path/to/output_folder"
+
+    trainer = train_model(
+        mode="analyzers",
+        labels=labels_list,
+        analyzers=analyzer_list,
+        output_folder=output_folder,
+        metric_names=None, # Set if you want to use a subset of metrics, defaults to all calculated quality and template metrics
+        imputation_strategies=None, # Default is all available imputation strategies
+        scaling_techniques=None, # Default is all available scaling techniques
+        classifiers=None, # Defaults to Random Forest classifier only - we usually find this gives the best results, but a range of classifiers is available
+        seed=None, # Set a seed for reproducibility
+    )
+
+
+The trainer tries several models and chooses the most accurate one. This model and
+some metadata are stored in the ``output_folder``, which can later be loaded using the
+``load_model`` function (`more details <https://spikeinterface.readthedocs.io/en/latest/tutorials/curation/plot_1_automated_curation.html#download-a-pretrained-model>`_).
+We can also access the model, which is an sklearn ``Pipeline``, from the trainer object
+
+.. code::
+
+    best_model = trainer.best_pipeline
+
+
+The training function can also be run in “csv” mode, if you prefer to
+store metrics in as .csv files. If the target labels are stored as a column in
+the file, you can point to these with the ``target_label`` parameter
+
+.. code::
+
+    trainer = train_model(
+        mode="csv",
+        metrics_paths = ["/path/to/csv_file_1", "/path/to/csv_file_2"],
+        target_label = "my_label",
+        output_folder=output_folder,
+    )
diff --git a/doc/how_to/benchmark_with_hybrid_recordings.rst b/doc/how_to/benchmark_with_hybrid_recordings.rst
@@ -2531,9 +2531,8 @@ Although non of the sorters find all units perfectly, ``Kilosort2.5``,
 ``Kilosort4``, and ``SpyKING CIRCUS 2`` all find around 10-12 hybrid
 units with accuracy greater than 80%. ``Kilosort4`` has a better overall
 curve, being able to find almost all units with an accuracy above 50%.
-``Kilosort2.5`` performs well when looking at precision (finding all
-spikes in a hybrid unit), at the cost of lower recall (finding spikes
-when it shouldn’t).
+``Kilosort2.5`` performs well when looking at precision (not finding spikes
+when it shouldn’t), but it has a lower recall (finding all spikes in the ground truth).
 
 In this example, we showed how to:
 

diff --git a/doc/how_to/combine_recordings.rst b/doc/how_to/combine_recordings.rst
@@ -1,4 +1,4 @@
-Combine Recordings in SpikeInterface
+Combine recordings in SpikeInterface
 ====================================
 
 In this tutorial we will walk through combining multiple recording objects. Sometimes this occurs due to hardware

diff --git a/doc/how_to/index.rst b/doc/how_to/index.rst
@@ -15,3 +15,5 @@ Guides on how to solve specific, short problems in SpikeInterface. Learn how to.
     load_your_data_into_sorting
     benchmark_with_hybrid_recordings
     drift_with_lfp
+    auto_curation_training
+    auto_curation_prediction
diff --git a/doc/how_to/load_matlab_data.rst b/doc/how_to/load_matlab_data.rst
@@ -1,4 +1,4 @@
-Export MATLAB Data to Binary & Load in SpikeInterface
+Export MATLAB data to binary & load in SpikeInterface
 ========================================================
 
 In this tutorial, we will walk through the process of exporting data from MATLAB in a binary format and subsequently loading it using SpikeInterface in Python.

diff --git a/doc/how_to/load_your_data_into_sorting.rst b/doc/how_to/load_your_data_into_sorting.rst
@@ -1,5 +1,5 @@
-Load Your Own Data into a Sorting
-=================================
+Load your own data into a Sorting object
+========================================
 
 Why make a :code:`Sorting`?
 

diff --git a/doc/how_to/process_by_channel_group.rst b/doc/how_to/process_by_channel_group.rst
@@ -1,4 +1,4 @@
-Process a Recording by Channel Group
+Process a recording by channel group
 ====================================
 
 In this tutorial, we will walk through how to preprocess and sort a recording

diff --git a/doc/how_to/viewers.rst b/doc/how_to/viewers.rst
@@ -1,4 +1,4 @@
-Visualize Data
+Visualize data
 ==============
 
 There are several ways to plot signals (raw, preprocessed) and spikes.

diff --git a/doc/images/files_screen.png b/doc/images/files_screen.png
diff --git a/doc/images/hf-logo.svg b/doc/images/hf-logo.svg
diff --git a/doc/images/initial_model_screen.png b/doc/images/initial_model_screen.png
diff --git a/doc/images/overview.png b/doc/images/overview.png
diff --git a/doc/index.rst b/doc/index.rst
@@ -51,7 +51,7 @@ SpikeInterface is made of several modules to deal with different aspects of the
 
     overview
     get_started/index
-    tutorials/index
+    tutorials_custom_index
     how_to/index
     modules/index
     api