Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

WaveSurfer File Loading Classes #1040

Open
wants to merge 7 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 7 additions & 0 deletions neo/io/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,7 @@
* :attr:`StimfitIO`
* :attr:`TdtIO`
* :attr:`TiffIO`
* :attr:`WaveSurferIO`
* :attr:`WinEdrIO`
* :attr:`WinWcpIO`

Expand Down Expand Up @@ -242,6 +243,10 @@

.. autoattribute:: extensions

.. autoclass:: neo.io.WaveSurferIO

.. autoattribute:: extensions

.. autoclass:: neo.io.WinEdrIO

.. autoattribute:: extensions
Expand Down Expand Up @@ -316,6 +321,7 @@
from neo.io.stimfitio import StimfitIO
from neo.io.tdtio import TdtIO
from neo.io.tiffio import TiffIO
from neo.io.wavesurferio import WaveSurferIO
from neo.io.winedrio import WinEdrIO
from neo.io.winwcpio import WinWcpIO

Expand Down Expand Up @@ -366,6 +372,7 @@
StimfitIO,
TdtIO,
TiffIO,
WaveSurferIO,
WinEdrIO,
WinWcpIO
]
Expand Down
139 changes: 139 additions & 0 deletions neo/io/wavesurferio.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,139 @@
"""
Class for reading data from WaveSurfer, a software written by
Boaz Mohar and Adam Taylor https://wavesurfer.janelia.org/

This is a wrapper around the PyWaveSurfer module written by Boaz Mohar and Adam Taylor,
using the "double" argument to load the data as 64-bit double.
"""
import numpy as np
import quantities as pq

from neo.io.baseio import BaseIO
from neo.core import Block, Segment, AnalogSignal
from ..rawio.baserawio import _signal_channel_dtype, _signal_stream_dtype, _spike_channel_dtype, _event_channel_dtype # TODO: not sure about this # from ..rawio.

try:
from pywavesurfer import ws
except ImportError as err:
HAS_PYWAVESURFER = False
PYWAVESURFER_ERR = err
else:
HAS_PYWAVESURFER = True
PYWAVESURFER_ERR = None


class WaveSurferIO(BaseIO):
"""
"""

is_readable = True
is_writable = False

supported_objects = [Block, Segment, AnalogSignal]
readable_objects = [Block]
writeable_objects = []

has_header = True
is_streameable = False

read_params = {Block: []}
write_params = None

name = 'WaveSurfer'
extensions = ['.h5']

mode = 'file'

def __init__(self, filename=None):
"""
Arguments:
filename : a filename
"""
if not HAS_PYWAVESURFER:
raise PYWAVESURFER_ERR

BaseIO.__init__(self)

self.filename = filename
self.ws_rec = None
self.header = {}
self._sampling_rate = None
self.ai_channel_names = None
self.ai_channel_units = None

self.read_block()

def read_block(self, lazy=False):
assert not lazy, 'Do not support lazy'

self.ws_rec = ws.loadDataFile(self.filename, format_string="double")

ai_channel_names = self.ws_rec["header"]["AIChannelNames"].astype(str)
ai_channel_units = self.ws_rec["header"]["AIChannelUnits"].astype(str)
sampling_rate = np.float64(self.ws_rec["header"]["AcquisitionSampleRate"]) * 1 / pq.s

self.fill_header(ai_channel_names,
ai_channel_units)

bl = Block()

# iterate over sections first:
for seg_index in range(int(self.ws_rec["header"]["NSweepsPerRun"])):

seg = Segment(index=seg_index)
seg_id = "sweep_{0:04d}".format(seg_index + 1) # e.g. "sweep_0050"

ws_seg = self.ws_rec[seg_id]
t_start = np.float64(ws_seg["timestamp"]) * pq.s

# iterate over channels:
for chan_idx, recsig in enumerate(ws_seg["analogScans"]):

unit = ai_channel_units[chan_idx]
name = ai_channel_names[chan_idx]

signal = pq.Quantity(recsig, unit).T

anaSig = AnalogSignal(signal, sampling_rate=sampling_rate,
t_start=t_start, name=name,
channel_index=chan_idx)
seg.analogsignals.append(anaSig)
bl.segments.append(seg)

bl.create_many_to_one_relationship()

return bl

def fill_header(self, ai_channel_names, ai_channel_units):

signal_channels = []

for ch_idx, (ch_name, ch_units) in enumerate(zip(ai_channel_names,
ai_channel_units)):
ch_id = ch_idx + 1
dtype = "float64" # as loaded with "double" argument from PyWaveSurfer
gain = 1
offset = 0
stream_id = "0"
signal_channels.append((ch_name, ch_id, self._sampling_rate, dtype, ch_units, gain, offset, stream_id))

signal_channels = np.array(signal_channels, dtype=_signal_channel_dtype)

# Spike Channels (no spikes)
spike_channels = []
spike_channels = np.array(spike_channels, dtype=_spike_channel_dtype)

# Event Channels (no events)
event_channels = []
event_channels = np.array(event_channels, dtype=_event_channel_dtype)

# Signal Streams
signal_streams = np.array([('Signals', '0')], dtype=_signal_stream_dtype)

# Header Dict
self.header['nb_block'] = 1
self.header['nb_segment'] = [int(self.ws_rec["header"]["NSweepsPerRun"])]
self.header['signal_streams'] = signal_streams
self.header['signal_channels'] = signal_channels
self.header['spike_channels'] = spike_channels
self.header['event_channels'] = event_channels
7 changes: 7 additions & 0 deletions neo/rawio/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@
* :attr:`SpikeGadgetsRawIO`
* :attr:`SpikeGLXRawIO`
* :attr:`TdtRawIO`
* :attr:`WaveSurferRawIO`
* :attr:`WinEdrRawIO`
* :attr:`WinWcpRawIO`

Expand Down Expand Up @@ -141,6 +142,10 @@

.. autoattribute:: extensions

.. autoclass:: neo.rawio.WaveSurferRawIO

.. autoattribute:: extensions

.. autoclass:: neo.rawio.WinEdrRawIO

.. autoattribute:: extensions
Expand Down Expand Up @@ -178,6 +183,7 @@
from neo.rawio.spikegadgetsrawio import SpikeGadgetsRawIO
from neo.rawio.spikeglxrawio import SpikeGLXRawIO
from neo.rawio.tdtrawio import TdtRawIO
from neo.rawio.wavesurferrawio import WaveSurferRawIO
from neo.rawio.winedrrawio import WinEdrRawIO
from neo.rawio.winwcprawio import WinWcpRawIO

Expand Down Expand Up @@ -207,6 +213,7 @@
SpikeGadgetsRawIO,
SpikeGLXRawIO,
TdtRawIO,
WaveSurferRawIO,
WinEdrRawIO,
WinWcpRawIO,
]
Expand Down
128 changes: 128 additions & 0 deletions neo/rawio/wavesurferrawio.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,128 @@
"""
In development 16/10/2021

Class for reading data from WaveSurfer, a software written by
Boaz Mohar and Adam Taylor https://wavesurfer.janelia.org/

Requires the PyWaveSurfer module written by Boaz Mohar and Adam Taylor.

To Discuss:
- Wavesurfer also has analog output, and digital input / output channels, but here only supported analog input. Is this okay?
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

yes. As soon as it is clear in the doc. Go for the most important for you first. Enhance later on demand.

- I believe the signal streams field is configured correctly here, used AxonRawIO as a guide.
- each segment (sweep) has it's own timestamp, so I beleive no events_signals is correct (similar to winwcprawio not axonrawio)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This I don't anderstand


1) Upload test files (kindly provided by Boaz Mohar and Adam Taylor) to g-node portal
2) write RawIO and IO tests

2. Step 2: RawIO test:
* create a file in neo/rawio/tests with the same name with "test_" prefix
* copy paste neo/rawio/tests/test_examplerawio.py and do the same

4.Step 4 : IO test
* create a file in neo/test/iotest with the same previous name with "test_" prefix
* copy/paste from neo/test/iotest/test_exampleio.py
"""

from .baserawio import (BaseRawIO, _signal_channel_dtype, _signal_stream_dtype,
_spike_channel_dtype, _event_channel_dtype)
import numpy as np

try:
from pywavesurfer import ws
except ImportError as err:
HAS_PYWAVESURFER = False
PYWAVESURFER_ERR = err
else:
HAS_PYWAVESURFER = True
PYWAVESURFER_ERR = None

class WaveSurferRawIO(BaseRawIO):

extensions = ['fake']
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Use the true extention

rawmode = 'one-file'

def __init__(self, filename=''):
BaseRawIO.__init__(self)
self.filename = filename

if not HAS_PYWAVESURFER:
raise PYWAVESURFER_ERR

def _source_name(self):
return self.filename

def _parse_header(self):

pyws_data = ws.loadDataFile(self.filename, format_string="double")
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Are you sure this is lazy ?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

In short this load metadata but not load buffer in memory

Copy link
Author

@easy-electrophysiology easy-electrophysiology Nov 2, 2021

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is not actually lazy - apologies I understand better now how memmap is working e.g. in the AxonIO, and the logic behind the RawIO / IO API.

Lazy loading could be supported by re-writing PyWaveSurfer or incorporating that code more extensively into a new RawIO module, but my initial thought was to provide a wrapper around PyWaveSurfers IO only. I see now that the format of an IO module with only Block readable and the lazy=True argument not allowed, similar to StimFitIO, is the appropriate way to achieve this, rather than the RawIO API.

If you are happy with the approach, I will start again and provide an IO class only based on StimfitIO. I believe in this case it would also be appropriate to return the data scaled.

header = pyws_data["header"]

# Raw Data
self._raw_signals = {}
self._t_starts = {}

for seg_index in range(int(header["NSweepsPerRun"])):

sweep_id = "sweep_{0:04d}".format(seg_index + 1) # e.g. "sweep_0050"
self._raw_signals[seg_index] = pyws_data[sweep_id]["analogScans"].T # reshape to data x channel for Neo standard
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This reshape make a copy in memory I guess, you should make the reshape on the fly in _get_analogsignal_chunk

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think reshaping does not necessarily create a copy, so maybe this would be worth a test instead of reshaping in every get_chunk call...

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think reshape on h5py buffer need to create a np.array and so a copy.
Maybe this is already an array which is also not good.

self._t_starts[seg_index] = np.float64(pyws_data[sweep_id]["timestamp"])

# Signal Channels
signal_channels = []
ai_channel_names = header["AIChannelNames"].astype(str)
ai_channel_units = header["AIChannelUnits"].astype(str)
self._sampling_rate = np.float64(pyws_data["header"]["AcquisitionSampleRate"])

for ch_idx, (ch_name, ch_units) in enumerate(zip(ai_channel_names,
ai_channel_units)):
ch_id = ch_idx + 1
dtype = "float64" # as loaded with "double" argument from PyWaveSurfer
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

In there any way to load the data as "raw" (certainly int16) because in this case we let PyWaveSurfer to make the scaling.
But the idea of this rawio layer is to be able to load data in raw mode to avoid memory.

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Just as a note / warning, raw data from wavesurfer (and pywavwsurfer) is uncorrected for a NI card specific calibration. I think it should never be used. Not sure if this is relevant to the selection of dtype or memory considerations.
See here for the documentation on it as it might be very confusing to users that would assume linear scaling of A/D values to voltage. I know it tripped us :)

gain = 1
offset = 0
stream_id = "0" # chan_id
signal_channels.append((ch_name, ch_id, self._sampling_rate, dtype, ch_units, gain, offset, stream_id))

signal_channels = np.array(signal_channels, dtype=_signal_channel_dtype)

# Spike Channels (no spikes)
spike_channels = []
spike_channels = np.array(spike_channels, dtype=_spike_channel_dtype)

# Event Channels (no events)
event_channels = []
event_channels = np.array(event_channels, dtype=_event_channel_dtype)

# Signal Streams
signal_streams = np.array([('Signals', '0')], dtype=_signal_stream_dtype)

# Header Dict
self.header = {}
self.header['nb_block'] = 1
self.header['nb_segment'] = [int(header["NSweepsPerRun"])]
self.header['signal_streams'] = signal_streams
self.header['signal_channels'] = signal_channels
self.header['spike_channels'] = spike_channels
self.header['event_channels'] = event_channels

self._generate_minimal_annotations() # TODO: return to this and add annotations

def _segment_t_start(self, block_index, seg_index):
return self._t_starts[seg_index]

def _segment_t_stop(self, block_index, seg_index):
t_stop = self._t_starts[seg_index] + \
self._raw_signals[seg_index].shape[0] / self._sampling_rate
return t_stop

def _get_signal_size(self, block_index, seg_index, stream_index):
shape = self._raw_signals[seg_index].shape
return shape[0]

def _get_signal_t_start(self, block_index, seg_index, stream_index):
return self._t_starts[seg_index]

def _get_analogsignal_chunk(self, block_index, seg_index, i_start, i_stop,
stream_index, channel_indexes):
if channel_indexes is None:
channel_indexes = slice(None)
raw_signals = self._raw_signals[seg_index][slice(i_start, i_stop), channel_indexes]
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Make the transopose here.

return raw_signals