From 1d7f796a604f66874e7f1be608839fa9b2b55c15 Mon Sep 17 00:00:00 2001 From: Samuel Garcia Date: Tue, 27 Feb 2024 11:39:12 +0100 Subject: [PATCH 1/2] analyzer in howto analyze_neuropixel --- doc/how_to/analyse_neuropixels.rst | 515 +++++++++++++------------ examples/how_to/analyse_neuropixels.py | 215 ++++++----- 2 files changed, 380 insertions(+), 350 deletions(-) diff --git a/doc/how_to/analyse_neuropixels.rst b/doc/how_to/analyse_neuropixels.rst index 37646c2146..255172efc0 100644 --- a/doc/how_to/analyse_neuropixels.rst +++ b/doc/how_to/analyse_neuropixels.rst @@ -4,22 +4,22 @@ Analyse Neuropixels datasets This example shows how to perform Neuropixels-specific analysis, including custom pre- and post-processing. -.. code:: ipython +.. code:: ipython3 %matplotlib inline -.. code:: ipython +.. code:: ipython3 import spikeinterface.full as si - + import numpy as np import matplotlib.pyplot as plt from pathlib import Path -.. code:: ipython - - base_folder = Path('/mnt/data/sam/DataSpikeSorting/neuropixel_example/') +.. code:: ipython3 + base_folder = Path('/mnt/data/sam/DataSpikeSorting/howto_si/neuropixel_example/') + spikeglx_folder = base_folder / 'Rec_1_10_11_2021_g0' @@ -29,7 +29,7 @@ Read the data The ``SpikeGLX`` folder can contain several “streams” (AP, LF and NIDQ). We need to specify which one to read: -.. code:: ipython +.. code:: ipython3 stream_names, stream_ids = si.get_neo_streams('spikeglx', spikeglx_folder) stream_names @@ -43,7 +43,7 @@ We need to specify which one to read: -.. code:: ipython +.. code:: ipython3 # we do not load the sync channel, so the probe is automatically loaded raw_rec = si.read_spikeglx(spikeglx_folder, stream_name='imec0.ap', load_sync_channel=False) @@ -54,11 +54,12 @@ We need to specify which one to read: .. parsed-literal:: - SpikeGLXRecordingExtractor: 384 channels - 1 segments - 30.0kHz - 1138.145s + SpikeGLXRecordingExtractor: 384 channels - 30.0kHz - 1 segments - 34,145,070 samples + 1,138.15s (18.97 minutes) - int16 dtype - 24.42 GiB -.. code:: ipython +.. code:: ipython3 # we automaticaly have the probe loaded! raw_rec.get_probe().to_dataframe() @@ -73,11 +74,11 @@ We need to specify which one to read: .dataframe tbody tr th:only-of-type { vertical-align: middle; } - + .dataframe tbody tr th { vertical-align: top; } - + .dataframe thead th { text-align: right; } @@ -201,7 +202,7 @@ We need to specify which one to read: -.. code:: ipython +.. code:: ipython3 fig, ax = plt.subplots(figsize=(15, 10)) si.plot_probe_map(raw_rec, ax=ax, with_channel_ids=True) @@ -229,13 +230,13 @@ Let’s do something similar to the IBL destriping chain (See - instead of interpolating bad channels, we remove then. - instead of highpass_spatial_filter() we use common_reference() -.. code:: ipython +.. code:: ipython3 rec1 = si.highpass_filter(raw_rec, freq_min=400.) bad_channel_ids, channel_labels = si.detect_bad_channels(rec1) rec2 = rec1.remove_channels(bad_channel_ids) print('bad_channel_ids', bad_channel_ids) - + rec3 = si.phase_shift(rec2) rec4 = si.common_reference(rec3, operator="median", reference="global") rec = rec4 @@ -251,7 +252,8 @@ Let’s do something similar to the IBL destriping chain (See .. parsed-literal:: - CommonReferenceRecording: 383 channels - 1 segments - 30.0kHz - 1138.145s + CommonReferenceRecording: 383 channels - 30.0kHz - 1 segments - 34,145,070 samples + 1,138.15s (18.97 minutes) - int16 dtype - 24.36 GiB @@ -264,18 +266,18 @@ the ipywydgets interactive ploter .. code:: python %matplotlib widget - si.plot_traces({'filter':rec1, 'cmr': rec4}, backend='ipywidgets') + si.plot_timeseries({'filter':rec1, 'cmr': rec4}, backend='ipywidgets') Note that using this ipywydgets make possible to explore diffrents preprocessing chain wihtout to save the entire file to disk. Everything is lazy, so you can change the previsous cell (parameters, step order, …) and visualize it immediatly. -.. code:: ipython +.. code:: ipython3 # here we use static plot using matplotlib backend fig, axs = plt.subplots(ncols=3, figsize=(20, 10)) - + si.plot_traces(rec1, backend='matplotlib', clim=(-50, 50), ax=axs[0]) si.plot_traces(rec4, backend='matplotlib', clim=(-50, 50), ax=axs[1]) si.plot_traces(rec, backend='matplotlib', clim=(-50, 50), ax=axs[2]) @@ -287,7 +289,7 @@ is lazy, so you can change the previsous cell (parameters, step order, .. image:: analyse_neuropixels_files/analyse_neuropixels_13_0.png -.. code:: ipython +.. code:: ipython3 # plot some channels fig, ax = plt.subplots(figsize=(20, 10)) @@ -299,7 +301,7 @@ is lazy, so you can change the previsous cell (parameters, step order, .. parsed-literal:: - + @@ -326,25 +328,13 @@ Depending on the complexity of the preprocessing chain, this operation can take a while. However, we can make use of the powerful parallelization mechanism of SpikeInterface. -.. code:: ipython +.. code:: ipython3 job_kwargs = dict(n_jobs=40, chunk_duration='1s', progress_bar=True) - + rec = rec.save(folder=base_folder / 'preprocess', format='binary', **job_kwargs) - -.. parsed-literal:: - - write_binary_recording with n_jobs = 40 and chunk_size = 30000 - - - -.. parsed-literal:: - - write_binary_recording: 0%| | 0/1139 [00:00`__ for more information and a list of all supported metrics. @@ -697,19 +714,24 @@ Some metrics are based on PCA (like ``'isolation_distance', 'l_ratio', 'd_prime'``) and require to estimate PCA for their computation. This can be achieved with: -``si.compute_principal_components(waveform_extractor)`` +``analyzer.compute("principal_components")`` -.. code:: ipython +.. code:: ipython3 - metrics = si.compute_quality_metrics(we, metric_names=['firing_rate', 'presence_ratio', 'snr', - 'isi_violation', 'amplitude_cutoff']) + metric_names=['firing_rate', 'presence_ratio', 'snr', 'isi_violation', 'amplitude_cutoff'] + + + # metrics = analyzer.compute("quality_metrics").get_data() + # equivalent to + metrics = si.compute_quality_metrics(analyzer, metric_names=metric_names) + metrics .. parsed-literal:: - /home/samuel.garcia/Documents/SpikeInterface/spikeinterface/spikeinterface/qualitymetrics/misc_metrics.py:511: UserWarning: Units [11, 13, 15, 18, 21, 22] have too few spikes and amplitude_cutoff is set to NaN - warnings.warn(f"Units {nan_units} have too few spikes and " + /home/samuel.garcia/Documents/SpikeInterface/spikeinterface/src/spikeinterface/qualitymetrics/misc_metrics.py:846: UserWarning: Some units have too few spikes : amplitude_cutoff is set to NaN + warnings.warn(f"Some units have too few spikes : amplitude_cutoff is set to NaN") @@ -721,11 +743,11 @@ PCA for their computation. This can be achieved with: .dataframe tbody tr th:only-of-type { vertical-align: middle; } - + .dataframe tbody tr th { vertical-align: top; } - + .dataframe thead th { text-align: right; } @@ -734,293 +756,293 @@ PCA for their computation. This can be achieved with: + amplitude_cutoff firing_rate - presence_ratio - snr isi_violations_ratio isi_violations_count - amplitude_cutoff + presence_ratio + snr 0 + 0.011528 0.798668 + 4.591436 + 10.0 1.000000 - 1.324698 - 4.591437 - 10 - 0.011528 + 1.430458 1 - 9.886261 - 1.000000 - 1.959527 - 5.333803 - 1780 0.000062 + 9.886262 + 5.333802 + 1780.0 + 1.000000 + 1.938214 2 + 0.002567 2.849373 - 1.000000 - 1.467690 3.859813 - 107 - 0.002567 + 107.0 + 1.000000 + 1.586939 3 + 0.000099 5.404408 + 3.519589 + 351.0 1.000000 - 1.253708 - 3.519590 - 351 - 0.000188 + 2.073651 4 + 0.001487 4.772678 - 1.000000 - 1.722377 3.947255 - 307 - 0.001487 + 307.0 + 1.000000 + 1.595303 5 + 0.001190 1.802055 - 1.000000 - 2.358286 6.403293 - 71 - 0.001422 + 71.0 + 1.000000 + 2.411436 6 + 0.003508 0.531567 + 94.320694 + 91.0 0.888889 - 3.359229 - 94.320701 - 91 - 0.004900 + 3.377035 7 - 5.400014 - 1.000000 - 4.653080 - 0.612662 - 61 0.000119 + 5.400015 + 0.612662 + 61.0 + 1.000000 + 4.631496 8 - 10.563679 - 1.000000 - 8.267220 - 0.073487 - 28 0.000265 + 10.563680 + 0.073487 + 28.0 + 1.000000 + 8.178637 9 + 0.000968 8.181734 - 1.000000 - 4.546735 0.730646 - 167 - 0.000968 + 167.0 + 1.000000 + 3.900670 10 - 16.839681 - 1.000000 - 5.094325 - 0.298477 - 289 0.000259 + 16.839682 + 0.298477 + 289.0 + 1.000000 + 5.044798 11 + NaN 0.007029 - 0.388889 - 4.032887 0.000000 - 0 - NaN + 0.0 + 0.388889 + 4.032886 12 - 10.184114 - 1.000000 - 4.780558 - 0.720070 - 255 0.000264 + 10.184115 + 0.720070 + 255.0 + 1.000000 + 4.767068 13 + NaN 0.005272 + 0.000000 + 0.0 0.222222 4.627749 - 0.000000 - 0 - NaN 14 - 10.047928 - 1.000000 - 4.984704 - 0.771631 - 266 0.000371 + 10.047929 + 0.771631 + 266.0 + 1.000000 + 5.185702 15 + NaN 0.107192 + 0.000000 + 0.0 0.888889 4.248180 - 0.000000 - 0 - NaN 16 + 0.000452 0.535081 - 0.944444 - 2.326990 8.183362 - 8 - 0.000452 + 8.0 + 0.944444 + 2.309993 17 - 4.650549 - 1.000000 - 1.998918 - 6.391674 - 472 0.000196 + 4.650550 + 6.391673 + 472.0 + 1.000000 + 2.064208 18 + NaN 0.077319 + 293.942411 + 6.0 0.722222 6.619197 - 293.942433 - 6 - NaN 19 - 7.088727 - 1.000000 - 1.715093 + 0.000053 + 7.088728 5.146421 - 883 - 0.000268 + 883.0 + 1.000000 + 2.057868 20 - 9.821243 + 0.000071 + 9.821244 + 5.322676 + 1753.0 1.000000 - 1.575338 - 5.322677 - 1753 - 0.000059 + 1.688922 21 + NaN 0.046567 + 405.178005 + 3.0 0.666667 - 5.899877 - 405.178035 - 3 - NaN + 5.899876 22 + NaN 0.094891 + 65.051727 + 2.0 0.722222 6.476350 - 65.051732 - 2 - NaN 23 + 0.002927 1.849501 + 13.699103 + 160.0 1.000000 - 2.493723 - 13.699104 - 160 - 0.002927 + 2.282473 24 + 0.003143 1.420733 - 1.000000 - 1.549977 4.352889 - 30 - 0.004044 + 30.0 + 1.000000 + 1.573989 25 + 0.002457 0.675661 + 56.455510 + 88.0 0.944444 - 4.110071 - 56.455515 - 88 - 0.002457 + 4.107643 26 + 0.003152 0.642273 - 1.000000 - 1.981111 2.129918 - 3 - 0.003152 + 3.0 + 1.000000 + 1.902601 27 + 0.000229 1.012173 + 6.860924 + 24.0 0.888889 - 1.843515 - 6.860925 - 24 - 0.000229 + 1.854307 28 + 0.002856 0.804818 + 38.433003 + 85.0 0.888889 - 3.662210 - 38.433006 - 85 - 0.002856 + 3.755829 29 + 0.002854 1.012173 - 1.000000 - 1.097260 1.143487 - 4 - 0.000845 + 4.0 + 1.000000 + 1.345607 30 + 0.005439 0.649302 + 63.910953 + 92.0 0.888889 - 4.243889 - 63.910958 - 92 - 0.005439 + 4.168347 @@ -1034,12 +1056,12 @@ Curation using metrics A very common curation approach is to threshold these metrics to select *good* units: -.. code:: ipython +.. code:: ipython3 amplitude_cutoff_thresh = 0.1 isi_violations_ratio_thresh = 1 presence_ratio_thresh = 0.9 - + our_query = f"(amplitude_cutoff < {amplitude_cutoff_thresh}) & (isi_violations_ratio < {isi_violations_ratio_thresh}) & (presence_ratio > {presence_ratio_thresh})" print(our_query) @@ -1049,7 +1071,7 @@ A very common curation approach is to threshold these metrics to select (amplitude_cutoff < 0.1) & (isi_violations_ratio < 1) & (presence_ratio > 0.9) -.. code:: ipython +.. code:: ipython3 keep_units = metrics.query(our_query) keep_unit_ids = keep_units.index.values @@ -1071,43 +1093,43 @@ In order to export the final results we need to make a copy of the the waveforms, but only for the selected units (so we can avoid to compute them again). -.. code:: ipython +.. code:: ipython3 - we_clean = we.select_units(keep_unit_ids, new_folder=base_folder / 'waveforms_clean') + analyzer_clean = analyzer.select_units(keep_unit_ids, folder=base_folder / 'analyzer_clean', format='binary_folder') -.. code:: ipython +.. code:: ipython3 - we_clean + analyzer_clean .. parsed-literal:: - WaveformExtractor: 383 channels - 6 units - 1 segments - before:45 after:60 n_per_units:500 - sparse + SortingAnalyzer: 383 channels - 6 units - 1 segments - binary_folder - sparse - has recording + Loaded 9 extenstions: random_spikes, waveforms, templates, noise_levels, correlograms, unit_locations, spike_amplitudes, template_similarity, quality_metrics Then we export figures to a report folder -.. code:: ipython +.. code:: ipython3 # export spike sorting report to a folder - si.export_report(we_clean, base_folder / 'report', format='png') + si.export_report(analyzer_clean, base_folder / 'report', format='png') -.. code:: ipython +.. code:: ipython3 - we_clean = si.load_waveforms(base_folder / 'waveforms_clean') - we_clean + analyzer_clean = si.load_sorting_analyzer(base_folder / 'analyzer_clean') + analyzer_clean .. parsed-literal:: - WaveformExtractor: 383 channels - 6 units - 1 segments - before:45 after:60 n_per_units:500 - sparse + SortingAnalyzer: 383 channels - 6 units - 1 segments - binary_folder - sparse - has recording + Loaded 9 extenstions: random_spikes, waveforms, templates, noise_levels, template_similarity, spike_amplitudes, correlograms, unit_locations, quality_metrics @@ -1115,4 +1137,5 @@ And push the results to sortingview webased viewer .. code:: python - si.plot_sorting_summary(we_clean, backend='sortingview') + si.plot_sorting_summary(analyzer_clean, backend='sortingview') + diff --git a/examples/how_to/analyse_neuropixels.py b/examples/how_to/analyse_neuropixels.py index 29d19f2331..3a936b072c 100644 --- a/examples/how_to/analyse_neuropixels.py +++ b/examples/how_to/analyse_neuropixels.py @@ -7,7 +7,7 @@ # extension: .py # format_name: light # format_version: '1.5' -# jupytext_version: 1.14.4 +# jupytext_version: 1.14.6 # kernelspec: # display_name: Python 3 (ipykernel) # language: python @@ -28,9 +28,9 @@ from pathlib import Path # + -base_folder = Path("/mnt/data/sam/DataSpikeSorting/neuropixel_example/") +base_folder = Path('/mnt/data/sam/DataSpikeSorting/howto_si/neuropixel_example/') -spikeglx_folder = base_folder / "Rec_1_10_11_2021_g0" +spikeglx_folder = base_folder / 'Rec_1_10_11_2021_g0' # - @@ -40,14 +40,14 @@ # We need to specify which one to read: # -stream_names, stream_ids = si.get_neo_streams("spikeglx", spikeglx_folder) +stream_names, stream_ids = si.get_neo_streams('spikeglx', spikeglx_folder) stream_names # we do not load the sync channel, so the probe is automatically loaded -raw_rec = si.read_spikeglx(spikeglx_folder, stream_name="imec0.ap", load_sync_channel=False) +raw_rec = si.read_spikeglx(spikeglx_folder, stream_name='imec0.ap', load_sync_channel=False) raw_rec -# we automatically have the probe loaded! +# we automaticaly have the probe loaded! raw_rec.get_probe().to_dataframe() fig, ax = plt.subplots(figsize=(15, 10)) @@ -58,15 +58,15 @@ # # Let's do something similar to the IBL destriping chain (See :ref:`ibl_destripe`) to preprocess the data but: # -# * instead of interpolating bad channels, we remove them. +# * instead of interpolating bad channels, we remove then. # * instead of highpass_spatial_filter() we use common_reference() # # + -rec1 = si.highpass_filter(raw_rec, freq_min=400.0) +rec1 = si.highpass_filter(raw_rec, freq_min=400.) bad_channel_ids, channel_labels = si.detect_bad_channels(rec1) rec2 = rec1.remove_channels(bad_channel_ids) -print("bad_channel_ids", bad_channel_ids) +print('bad_channel_ids', bad_channel_ids) rec3 = si.phase_shift(rec2) rec4 = si.common_reference(rec3, operator="median", reference="global") @@ -78,39 +78,33 @@ # # -# The preprocessing steps can be interactively explored with the ipywidgets interactive plotter +# Interactive explore the preprocess steps could de done with this with the ipywydgets interactive ploter # # ```python # # %matplotlib widget -# si.plot_traces({'filter':rec1, 'cmr': rec4}, backend='ipywidgets') +# si.plot_timeseries({'filter':rec1, 'cmr': rec4}, backend='ipywidgets') # ``` # -# Note that using this ipywidgets make possible to explore different preprocessing chains without saving the entire file to disk. -# Everything is lazy, so you can change the previous cell (parameters, step order, ...) and visualize it immediately. +# Note that using this ipywydgets make possible to explore diffrents preprocessing chain wihtout to save the entire file to disk. +# Everything is lazy, so you can change the previsous cell (parameters, step order, ...) and visualize it immediatly. # # # + -# here we use a static plot using matplotlib backend +# here we use static plot using matplotlib backend fig, axs = plt.subplots(ncols=3, figsize=(20, 10)) -si.plot_traces(rec1, backend="matplotlib", clim=(-50, 50), ax=axs[0]) -si.plot_traces(rec4, backend="matplotlib", clim=(-50, 50), ax=axs[1]) -si.plot_traces(rec, backend="matplotlib", clim=(-50, 50), ax=axs[2]) -for i, label in enumerate(("filter", "cmr", "final")): +si.plot_traces(rec1, backend='matplotlib', clim=(-50, 50), ax=axs[0]) +si.plot_traces(rec4, backend='matplotlib', clim=(-50, 50), ax=axs[1]) +si.plot_traces(rec, backend='matplotlib', clim=(-50, 50), ax=axs[2]) +for i, label in enumerate(('filter', 'cmr', 'final')): axs[i].set_title(label) # - # plot some channels fig, ax = plt.subplots(figsize=(20, 10)) -some_chans = rec.channel_ids[ - [ - 100, - 150, - 200, - ] -] -si.plot_traces({"filter": rec1, "cmr": rec4}, backend="matplotlib", mode="line", ax=ax, channel_ids=some_chans) +some_chans = rec.channel_ids[[100, 150, 200, ]] +si.plot_traces({'filter':rec1, 'cmr': rec4}, backend='matplotlib', mode='line', ax=ax, channel_ids=some_chans) # ### Should we save the preprocessed data to a binary file? @@ -119,14 +113,14 @@ # # Saving is not necessarily a good choice, as it consumes a lot of disk space and sometimes the writing to disk can be slower than recomputing the preprocessing chain on-the-fly. # -# Here, we decide to save it because Kilosort requires a binary file as input, so the preprocessed recording will need to be saved at some point. +# Here, we decide to do save it because Kilosort requires a binary file as input, so the preprocessed recording will need to be saved at some point. # # Depending on the complexity of the preprocessing chain, this operation can take a while. However, we can make use of the powerful parallelization mechanism of SpikeInterface. # + -job_kwargs = dict(n_jobs=40, chunk_duration="1s", progress_bar=True) +job_kwargs = dict(n_jobs=40, chunk_duration='1s', progress_bar=True) -rec = rec.save(folder=base_folder / "preprocess", format="binary", **job_kwargs) +rec = rec.save(folder=base_folder / 'preprocess', format='binary', **job_kwargs) # - # our recording now points to the new binary folder @@ -134,7 +128,7 @@ # ## Check spiking activity and drift before spike sorting # -# A good practice before running a spike sorter is to check the "peaks activity" and the presence of drift. +# A good practice before running a spike sorter is to check the "peaks activity" and the presence of drifts. # # SpikeInterface has several tools to: # @@ -148,24 +142,24 @@ # Noise levels can be estimated on the scaled traces or on the raw (`int16`) traces. # -# we can estimate the noise on the scaled traces (microV) or on the raw ones (which in our case are int16). +# we can estimate the noise on the scaled traces (microV) or on the raw one (which is in our case int16). noise_levels_microV = si.get_noise_levels(rec, return_scaled=True) noise_levels_int16 = si.get_noise_levels(rec, return_scaled=False) fig, ax = plt.subplots() _ = ax.hist(noise_levels_microV, bins=np.arange(5, 30, 2.5)) -ax.set_xlabel("noise [microV]") +ax.set_xlabel('noise [microV]') # ### Detect and localize peaks # -# SpikeInterface includes built-in algorithms to detect peaks and also to localize their positions. +# SpikeInterface includes built-in algorithms to detect peaks and also to localize their position. # # This is part of the **sortingcomponents** module and needs to be imported explicitly. # # The two functions (detect + localize): # -# * can be run in parallel +# * can be run parallel # * are very fast when the preprocessed recording is already saved (and a bit slower otherwise) # * implement several methods # @@ -174,54 +168,53 @@ # + from spikeinterface.sortingcomponents.peak_detection import detect_peaks -job_kwargs = dict(n_jobs=40, chunk_duration="1s", progress_bar=True) -peaks = detect_peaks( - rec, method="locally_exclusive", noise_levels=noise_levels_int16, detect_threshold=5, radius_um=50.0, **job_kwargs -) +job_kwargs = dict(n_jobs=40, chunk_duration='1s', progress_bar=True) +peaks = detect_peaks(rec, method='locally_exclusive', noise_levels=noise_levels_int16, + detect_threshold=5, local_radius_um=50., **job_kwargs) peaks # + from spikeinterface.sortingcomponents.peak_localization import localize_peaks -peak_locations = localize_peaks(rec, peaks, method="center_of_mass", radius_um=50.0, **job_kwargs) +peak_locations = localize_peaks(rec, peaks, method='center_of_mass', local_radius_um=50., **job_kwargs) # - -# ### Check for drift +# ### Check for drifts # -# We can *manually* check for drift with a simple scatter plots of peak times VS estimated peak depths. +# We can *manually* check for drifts with a simple scatter plots of peak times VS estimated peak depths. # # In this example, we do not see any apparent drift. # -# In case we notice apparent drift in the recording, one can use the SpikeInterface modules to estimate and correct motion. See the documentation for motion estimation and correction for more details. +# In case we notice apparent drifts in the recording, one can use the SpikeInterface modules to estimate and correct motion. See the documentation for motion estimation and correction for more details. -# check for drift +# check for drifts fs = rec.sampling_frequency fig, ax = plt.subplots(figsize=(10, 8)) -ax.scatter(peaks["sample_index"] / fs, peak_locations["y"], color="k", marker=".", alpha=0.002) +ax.scatter(peaks['sample_ind'] / fs, peak_locations['y'], color='k', marker='.', alpha=0.002) # + -# we can also use the peak location estimates to have insight of cluster separation before sorting +# we can also use the peak location estimates to have an insight of cluster separation before sorting fig, ax = plt.subplots(figsize=(15, 10)) si.plot_probe_map(rec, ax=ax, with_channel_ids=True) ax.set_ylim(-100, 150) -ax.scatter(peak_locations["x"], peak_locations["y"], color="purple", alpha=0.002) +ax.scatter(peak_locations['x'], peak_locations['y'], color='purple', alpha=0.002) # - # ## Run a spike sorter # -# Despite beingthe most critical part of the pipeline, spike sorting in SpikeInterface is dead-simple: one function. +# Even if running spike sorting is probably the most critical part of the pipeline, in SpikeInterface this is dead-simple: one function. # # **Important notes**: # -# * most of sorters are wrapped from external tools (kilosort, kilosort2.5, spykingcircus, mountainsort4 ...) that often also need other requirements (e.g., MATLAB, CUDA) -# * some sorters are internally developed (spykingcircus2) -# * external sorters can be run inside of a container (docker, singularity) WITHOUT pre-installation +# * most of sorters are wrapped from external tools (kilosort, kisolort2.5, spykingcircus, montainsort4 ...) that often also need other requirements (e.g., MATLAB, CUDA) +# * some sorters are internally developed (spyekingcircus2) +# * external sorter can be run inside a container (docker, singularity) WITHOUT pre-installation # -# Please carefully read the `spikeinterface.sorters` documentation for more information. +# Please carwfully read the `spikeinterface.sorters` documentation for more information. # -# In this example: +# In this example: # # * we will run kilosort2.5 # * we apply no drift correction (because we don't have drift) @@ -229,82 +222,94 @@ # # check default params for kilosort2.5 -si.get_default_sorter_params("kilosort2_5") +si.get_default_sorter_params('kilosort2_5') # + # run kilosort2.5 without drift correction -params_kilosort2_5 = {"do_correction": False} - -sorting = si.run_sorter( - "kilosort2_5", - rec, - output_folder=base_folder / "kilosort2.5_output", - docker_image=True, - verbose=True, - **params_kilosort2_5, -) +params_kilosort2_5 = {'do_correction': False} + +sorting = si.run_sorter('kilosort2_5', rec, output_folder=base_folder / 'kilosort2.5_output', + docker_image=True, verbose=True, **params_kilosort2_5) # - -# the results can be read back for future sessions -sorting = si.read_sorter_folder(base_folder / "kilosort2.5_output") +# the results can be read back for futur session +sorting = si.read_sorter_folder(base_folder / 'kilosort2.5_output') -# here we have 31 units in our recording +# here we have 31 untis in our recording sorting # ## Post processing # -# All postprocessing steps are based on the **WaveformExtractor** object. +# All the postprocessing step is based on the **SortingAnalyzer** object. +# +# This object combines a `sorting` and a `recording` object. It will also help to run some computation aka "extensions" to +# get an insight on the qulity of units. # -# This object combines a `recording` and a `sorting` object and extracts some waveform snippets (500 by default) for each unit. +# The first extentions we will run are: +# * select some spikes per units +# * etxract waveforms +# * compute templates +# * compute noise levels # # Note that we use the `sparse=True` option. This option is important because the waveforms will be extracted only for a few channels around the main channel of each unit. This saves tons of disk space and speeds up the waveforms extraction and further processing. # +# Note that our object is not persistent to disk because we use `format="memory"` we could use `format="binary_folder"` or `format="zarr"`. -we = si.extract_waveforms( - rec, - sorting, - folder=base_folder / "waveforms_kilosort2.5", - sparse=True, - max_spikes_per_unit=500, - ms_before=1.5, - ms_after=2.0, - **job_kwargs, -) +# + -# the `WaveformExtractor` contains all information and is persistent on disk -print(we) -print(we.folder) +analyzer = si.create_sorting_analyzer(sorting, rec, sparse=True, format="memory") +analyzer +# - -# the `WaveformExtrator` can be easily loaded back from its folder -we = si.load_waveforms(base_folder / "waveforms_kilosort2.5") -we +analyzer.compute("random_spikes", method="uniform", max_spikes_per_unit=500) +analyzer.compute("waveforms", ms_before=1.5,ms_after=2., **job_kwargs) +analyzer.compute("templates", operators=["average", "median", "std"]) +analyzer.compute("noise_levels") +analyzer -# Many additional computations rely on the `WaveformExtractor`. +# Many additional computations rely on the `SortingAnalyzer`. # Some computations are slower than others, but can be performed in parallel using the `**job_kwargs` mechanism. # -# Every computation will also be persistent on disk in the same folder, since they represent waveform extensions. +# -_ = si.compute_noise_levels(we) -_ = si.compute_correlograms(we) -_ = si.compute_unit_locations(we) -_ = si.compute_spike_amplitudes(we, **job_kwargs) -_ = si.compute_template_similarity(we) +analyzer.compute("correlograms") +analyzer.compute("unit_locations") +analyzer.compute("spike_amplitudes", **job_kwargs) +analyzer.compute("template_similarity") +analyzer +# Our `SortingAnalyzer` can be saved to disk using `save_as()` which make a copy of the analyzer and all computed extensions. + +analyzer_saved = analyzer.save_as(folder=base_folder / "analyzer", format="binary_folder") +analyzer_saved + # ## Quality metrics # -# We have a single function `compute_quality_metrics(WaveformExtractor)` that returns a `pandas.Dataframe` with the desired metrics. +# We have a single function `compute_quality_metrics(SortingAnalyzer)` that returns a `pandas.Dataframe` with the desired metrics. +# +# Note that this function is also an extension and so can be saved. And so this is equivalent to do : +# `metrics = analyzer.compute("quality_metrics").get_data()` +# # # Please visit the [metrics documentation](https://spikeinterface.readthedocs.io/en/latest/modules/qualitymetrics.html) for more information and a list of all supported metrics. # -# Some metrics are based on PCA (like `'isolation_distance', 'l_ratio', 'd_prime'`) and require PCA values for their computation. This can be achieved with: +# Some metrics are based on PCA (like `'isolation_distance', 'l_ratio', 'd_prime'`) and require to estimate PCA for their computation. This can be achieved with: # -# `si.compute_principal_components(waveform_extractor)` +# `analyzer.compute("principal_components")` +# +# + +# + +metric_names=['firing_rate', 'presence_ratio', 'snr', 'isi_violation', 'amplitude_cutoff'] + + +# metrics = analyzer.compute("quality_metrics").get_data() +# equivalent to +metrics = si.compute_quality_metrics(analyzer, metric_names=metric_names) -metrics = si.compute_quality_metrics( - we, metric_names=["firing_rate", "presence_ratio", "snr", "isi_violation", "amplitude_cutoff"] -) metrics +# - # ## Curation using metrics # @@ -325,22 +330,24 @@ # ## Export final results to disk folder and visulize with sortingview # -# In order to export the final results we need to make a copy of the the waveforms, but only for the selected units (so we can avoid computing them again). +# In order to export the final results we need to make a copy of the the waveforms, but only for the selected units (so we can avoid to compute them again). -we_clean = we.select_units(keep_unit_ids, new_folder=base_folder / "waveforms_clean") +analyzer_clean = analyzer.select_units(keep_unit_ids, folder=base_folder / 'analyzer_clean', format='binary_folder') -we_clean +analyzer_clean # Then we export figures to a report folder # export spike sorting report to a folder -si.export_report(we_clean, base_folder / "report", format="png") +si.export_report(analyzer_clean, base_folder / 'report', format='png') -we_clean = si.load_waveforms(base_folder / "waveforms_clean") -we_clean +analyzer_clean = si.load_sorting_analyzer(base_folder / 'analyzer_clean') +analyzer_clean # And push the results to sortingview webased viewer # # ```python -# si.plot_sorting_summary(we_clean, backend='sortingview') +# si.plot_sorting_summary(analyzer_clean, backend='sortingview') # ``` + + From 38fc6b4bb2ebde16547b24676db2c92a9bea73c4 Mon Sep 17 00:00:00 2001 From: Samuel Garcia Date: Mon, 4 Mar 2024 14:43:29 +0100 Subject: [PATCH 2/2] oups --- doc/how_to/analyse_neuropixels.rst | 6 +++--- examples/how_to/analyse_neuropixels.py | 6 +++--- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/doc/how_to/analyse_neuropixels.rst b/doc/how_to/analyse_neuropixels.rst index 255172efc0..36eb0a0f63 100644 --- a/doc/how_to/analyse_neuropixels.rst +++ b/doc/how_to/analyse_neuropixels.rst @@ -266,7 +266,7 @@ the ipywydgets interactive ploter .. code:: python %matplotlib widget - si.plot_timeseries({'filter':rec1, 'cmr': rec4}, backend='ipywidgets') + si.plot_traces({'filter':rec1, 'cmr': rec4}, backend='ipywidgets') Note that using this ipywydgets make possible to explore diffrents preprocessing chain wihtout to save the entire file to disk. Everything @@ -417,7 +417,7 @@ Let’s use here the ``locally_exclusive`` method for detection and the job_kwargs = dict(n_jobs=40, chunk_duration='1s', progress_bar=True) peaks = detect_peaks(rec, method='locally_exclusive', noise_levels=noise_levels_int16, - detect_threshold=5, local_radius_um=50., **job_kwargs) + detect_threshold=5, radius_um=50., **job_kwargs) peaks @@ -442,7 +442,7 @@ Let’s use here the ``locally_exclusive`` method for detection and the from spikeinterface.sortingcomponents.peak_localization import localize_peaks - peak_locations = localize_peaks(rec, peaks, method='center_of_mass', local_radius_um=50., **job_kwargs) + peak_locations = localize_peaks(rec, peaks, method='center_of_mass', radius_um=50., **job_kwargs) diff --git a/examples/how_to/analyse_neuropixels.py b/examples/how_to/analyse_neuropixels.py index 3a936b072c..92ccfca602 100644 --- a/examples/how_to/analyse_neuropixels.py +++ b/examples/how_to/analyse_neuropixels.py @@ -82,7 +82,7 @@ # # ```python # # %matplotlib widget -# si.plot_timeseries({'filter':rec1, 'cmr': rec4}, backend='ipywidgets') +# si.plot_traces({'filter':rec1, 'cmr': rec4}, backend='ipywidgets') # ``` # # Note that using this ipywydgets make possible to explore diffrents preprocessing chain wihtout to save the entire file to disk. @@ -170,13 +170,13 @@ job_kwargs = dict(n_jobs=40, chunk_duration='1s', progress_bar=True) peaks = detect_peaks(rec, method='locally_exclusive', noise_levels=noise_levels_int16, - detect_threshold=5, local_radius_um=50., **job_kwargs) + detect_threshold=5, radius_um=50., **job_kwargs) peaks # + from spikeinterface.sortingcomponents.peak_localization import localize_peaks -peak_locations = localize_peaks(rec, peaks, method='center_of_mass', local_radius_um=50., **job_kwargs) +peak_locations = localize_peaks(rec, peaks, method='center_of_mass', radius_um=50., **job_kwargs) # - # ### Check for drifts