Skip to content

Commit

Permalink
Merge branch 'main' into add_tests_for_all_os
Browse files Browse the repository at this point in the history
  • Loading branch information
h-mayorquin authored Jun 18, 2024
2 parents 2e3cb1c + a3527ea commit 844c65d
Show file tree
Hide file tree
Showing 17 changed files with 476 additions and 36 deletions.
105 changes: 105 additions & 0 deletions doc/modules/curation.rst
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,111 @@ The merging and splitting operations are handled by the :py:class:`~spikeinterfa
# here is the final clean sorting
clean_sorting = cs.sorting
Manual curation format
----------------------

SpikeInterface internally supports a JSON-based manual curation format.
When manual curation is necessary, modifying a dataset in place is a bad practice.
Instead, to ensure the reproducibility of the spike sorting pipelines, we have introduced a simple and JSON-based manual curation format.
This format defines at the moment : merges + deletions + manual tags.
The simple file can be kept along side the output of a sorter and applied on the result to have a "clean" result.

This format has two part:

* **definition** with the folowing keys:

* "format_version" : format specification
* "unit_ids" : the list of unit_ds
* "label_definitions" : list of label categories and possible labels per category.
Every category can be *exclusive=True* onely one label or *exclusive=False* several labels possible

* **manual output** curation with the folowing keys:

* "manual_labels"
* "merged_unit_groups"
* "removed_units"

Here is the description of the format with a simple example:

.. code-block:: json
{
# the first part of the format is the definitation
"format_version": "1",
"unit_ids": [
"u1",
"u2",
"u3",
"u6",
"u10",
"u14",
"u20",
"u31",
"u42"
],
"label_definitions": {
"quality": {
"label_options": [
"good",
"noise",
"MUA",
"artifact"
],
"exclusive": true
},
"putative_type": {
"label_options": [
"excitatory",
"inhibitory",
"pyramidal",
"mitral"
],
"exclusive": false
}
},
# the second part of the format is manual action
"manual_labels": [
{
"unit_id": "u1",
"quality": [
"good"
]
},
{
"unit_id": "u2",
"quality": [
"noise"
],
"putative_type": [
"excitatory",
"pyramidal"
]
},
{
"unit_id": "u3",
"putative_type": [
"inhibitory"
]
}
],
"merged_unit_groups": [
[
"u3",
"u6"
],
[
"u10",
"u14",
"u20"
]
],
"removed_units": [
"u31",
"u42"
]
}
Automatic curation tools
------------------------
Expand Down
10 changes: 4 additions & 6 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ classifiers = [


dependencies = [
"numpy",
"numpy>=1.26, <2.0", # 1.20 np.ptp, 1.26 for avoiding pickling errors when numpy >2.0
"threadpoolctl>=3.0.0",
"tqdm",
"zarr>=2.16,<2.18",
Expand Down Expand Up @@ -65,18 +65,16 @@ extractors = [
"pyedflib>=0.1.30",
"sonpy;python_version<'3.10'",
"lxml", # lxml for neuroscope
"scipy<1.13",
"scipy",
"ONE-api>=2.7.0", # alf sorter and streaming IBL
"ibllib>=2.32.5", # streaming IBL
"ibllib>=2.36.0", # streaming IBL
"pymatreader>=0.0.32", # For cell explorer matlab files
"zugbruecke>=0.2; sys_platform!='win32'", # For plexon2
]

streaming_extractors = [
"ONE-api>=2.7.0", # alf sorter and streaming IBL
"ibllib>=2.32.5", # streaming IBL
"scipy<1.13", # ibl has a dependency on scipy but it does not have an upper bound
# Remove this once https://github.com/int-brain-lab/ibllib/issues/753
"ibllib>=2.36.0", # streaming IBL
# Following dependencies are for streaming with nwb files
"pynwb>=2.6.0",
"fsspec",
Expand Down
4 changes: 2 additions & 2 deletions src/spikeinterface/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,5 +30,5 @@
# This flag must be set to False for release
# This avoids using versioning that contains ".dev0" (and this is a better choice)
# This is mainly useful when using run_sorter in a container and spikeinterface install
# DEV_MODE = True
DEV_MODE = False
DEV_MODE = True
# DEV_MODE = False
7 changes: 6 additions & 1 deletion src/spikeinterface/core/core_tools.py
Original file line number Diff line number Diff line change
Expand Up @@ -83,7 +83,12 @@ def default(self, obj):
if isinstance(obj, np.generic):
return obj.item()

if np.issctype(obj): # Cast numpy datatypes to their names
# Standard numpy dtypes like np.dtype('int32") are transformed this way
if isinstance(obj, np.dtype):
return np.dtype(obj).name

# This will transform to a string canonical representation of the dtype (e.g. np.int32 -> 'int32')
if isinstance(obj, type) and issubclass(obj, np.generic):
return np.dtype(obj).name

if isinstance(obj, np.ndarray):
Expand Down
1 change: 0 additions & 1 deletion src/spikeinterface/core/tests/test_jsonification.py
Original file line number Diff line number Diff line change
Expand Up @@ -122,7 +122,6 @@ def test_numpy_dtype_alises_encoding():
# People tend to use this a dtype instead of the proper classes
json.dumps(np.int32, cls=SIJsonEncoder)
json.dumps(np.float32, cls=SIJsonEncoder)
json.dumps(np.bool_, cls=SIJsonEncoder) # Note that np.bool was deperecated in numpy 1.20.0


def test_recording_encoding(numpy_generated_recording):
Expand Down
3 changes: 3 additions & 0 deletions src/spikeinterface/curation/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,4 +11,7 @@
from .mergeunitssorting import MergeUnitsSorting, merge_units_sorting
from .splitunitsorting import SplitUnitSorting, split_unit_sorting

# curation format
from .curation_format import validate_curation_dict, curation_label_to_dataframe

from .sortingview_curation import apply_sortingview_curation
163 changes: 163 additions & 0 deletions src/spikeinterface/curation/curation_format.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,163 @@
from itertools import combinations


supported_curation_format_versions = {"1"}


def validate_curation_dict(curation_dict):
"""
Validate that the curation dictionary given as parameter complies with the format
The function do not return anything. This raise an error if something is wring in the format.
Parameters
----------
curation_dict : dict
"""

# format
if "format_version" not in curation_dict:
raise ValueError("No version_format")

if curation_dict["format_version"] not in supported_curation_format_versions:
raise ValueError(
f"Format version ({curation_dict['format_version']}) not supported. "
f"Only {supported_curation_format_versions} are valid"
)

# unit_ids
labeled_unit_set = set([lbl["unit_id"] for lbl in curation_dict["manual_labels"]])
merged_units_set = set(sum(curation_dict["merged_unit_groups"], []))
removed_units_set = set(curation_dict["removed_units"])

if curation_dict["unit_ids"] is not None:
# old format v0 did not contain unit_ids so this can contains None
unit_set = set(curation_dict["unit_ids"])
if not labeled_unit_set.issubset(unit_set):
raise ValueError("Curation format: some labeled units are not in the unit list")
if not merged_units_set.issubset(unit_set):
raise ValueError("Curation format: some merged units are not in the unit list")
if not removed_units_set.issubset(unit_set):
raise ValueError("Curation format: some removed units are not in the unit list")

all_merging_groups = [set(group) for group in curation_dict["merged_unit_groups"]]
for gp_1, gp_2 in combinations(all_merging_groups, 2):
if len(gp_1.intersection(gp_2)) != 0:
raise ValueError("Some units belong to multiple merge groups")
if len(removed_units_set.intersection(merged_units_set)) != 0:
raise ValueError("Some units were merged and deleted")

# Check the labels exclusivity
for lbl in curation_dict["manual_labels"]:
for label_key in curation_dict["label_definitions"].keys():
if label_key in lbl:
unit_id = lbl["unit_id"]
label_value = lbl[label_key]
if not isinstance(label_value, list):
raise ValueError(f"Curation format: manual_labels {unit_id} is invalid shoudl be a list")

is_exclusive = curation_dict["label_definitions"][label_key]["exclusive"]

if is_exclusive and not len(label_value) <= 1:
raise ValueError(
f"Curation format: manual_labels {unit_id} {label_key} are exclusive labels. {label_value} is invalid"
)


def convert_from_sortingview_curation_format_v0(sortingview_dict, destination_format="1"):
"""
Converts the old sortingview curation format (v0) into a curation dictionary new format (v1)
Couple of caveats:
* The list of units is not available in the original sortingview dictionary. We set it to None
* Labels can not be mutually exclusive.
* Labels have no category, so we regroup them under the "all_labels" category
Parameters
----------
sortingview_dict : dict
Dictionary containing the curation information from sortingview
destination_format : str
Version of the format to use.
Default to "1"
Returns
-------
curation_dict: dict
A curation dictionary
"""

assert destination_format == "1"

merge_groups = sortingview_dict["mergeGroups"]
merged_units = sum(merge_groups, [])
if len(merged_units) > 0:
unit_id_type = int if isinstance(merged_units[0], int) else str
else:
unit_id_type = str
all_units = []
all_labels = []
manual_labels = []
general_cat = "all_labels"
for unit_id_, l_labels in sortingview_dict["labelsByUnit"].items():
all_labels.extend(l_labels)
# recorver the correct type for unit_id
unit_id = unit_id_type(unit_id_)
all_units.append(unit_id)
manual_labels.append({"unit_id": unit_id, general_cat: l_labels})
labels_def = {"all_labels": {"name": "all_labels", "label_options": list(set(all_labels)), "exclusive": False}}

curation_dict = {
"format_version": destination_format,
"unit_ids": None,
"label_definitions": labels_def,
"manual_labels": manual_labels,
"merged_unit_groups": merge_groups,
"removed_units": [],
}

return curation_dict


def curation_label_to_dataframe(curation_dict):
"""
Transform the curation dict into a pandas dataframe.
For label category with exclusive=True : a column is created and values are the unique label.
For label category with exclusive=False : one column per possible is created and values are boolean.
If exclusive=False and the same label appear several times then it raises an error.
Parameters
----------
curation_dict : dict
A curation dictionary
Returns
-------
labels : pd.DataFrame
dataframe with labels.
"""
import pandas as pd

labels = pd.DataFrame(index=curation_dict["unit_ids"])

for label_key, label_def in curation_dict["label_definitions"].items():
if label_def["exclusive"]:
assert label_key not in labels.columns, f"{label_key} is already a column"
labels[label_key] = pd.Series(dtype=str)
labels[label_key][:] = ""
for lbl in curation_dict["manual_labels"]:
value = lbl.get(label_key, [])
if len(value) == 1:
labels.at[lbl["unit_id"], label_key] = value[0]
else:
for label_opt in label_def["label_options"]:
assert label_opt not in labels.columns, f"{label_opt} is already a column"
labels[label_opt] = pd.Series(dtype=bool)
labels[label_opt][:] = False
for lbl in curation_dict["manual_labels"]:
values = lbl.get(label_key, [])
for value in values:
labels.at[lbl["unit_id"], value] = True

return labels
2 changes: 2 additions & 0 deletions src/spikeinterface/curation/sortingview_curation.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,8 @@
from .curationsorting import CurationSorting


# @alessio
# TODO later : this should be reimplemented using the new curation format
def apply_sortingview_curation(
sorting, uri_or_json, exclude_labels=None, include_labels=None, skip_merge=False, verbose=False
):
Expand Down
Loading

0 comments on commit 844c65d

Please sign in to comment.