Merge pull request #2 from jonahpearl/workflows

Workflows
jonahpearl · Aug 30, 2021 · 3c9613a · 3c9613a
2 parents 56e44a5 + ef6d332
commit 3c9613a
Show file tree

Hide file tree

Showing 18 changed files with 772 additions and 341 deletions.
diff --git a/.gitignore b/.gitignore
@@ -0,0 +1,8 @@
+debugImgs/oe_minus_mkv_diffs.tiff
+debugImgs/pdb.tiff
+debugImgs/rpi_minus_mkv_diffs.tiff
+debugImgs/rpi_minus_oe_diffs.tiff
+build
+dist
+*.egg-info
+*.png
diff --git a/README.md b/README.md
@@ -8,7 +8,7 @@ To get started, try the following in termanal (these instructions assume you're
 4. `git clone https://github.com/guitchounts/moseq2-ephys-sync.git`
 5. `cd ./moseq2-ephys-sync/`
 6. `python setup.py install`
-7. `pip install git+ssh://[email protected]/dattalab/moseq2-extract.git@autosetting-params` (alternatively, try: `pip install git+https://github.com/dattalab/moseq2-extract.git@autosetting-params`)
+7. `pip install git+ssh://[email protected]/dattalab/moseq2-extract.git@autosetting-params` (alternatively, try: `pip install git+https://github.com/dattalab/moseq2-extract.git@autosetting-params`) (alternatively, try using conda)
 8. `conda install scikit-learn=0.24` (moseq2-extract pins `scikit` to an earlier version; need to update to `0.24`
 9. `module load ffmpeg`
 

diff --git a/dist/moseq2_ephys_sync-0.0.1-py3.7.egg b/dist/moseq2_ephys_sync-0.0.1-py3.7.egg
diff --git a/moseq2-extract b/moseq2-extract
diff --git a/moseq2_ephys_sync/.gitignore b/moseq2_ephys_sync/.gitignore
@@ -0,0 +1,138 @@
+# Byte-compiled / optimized / DLL files
+__pycache__/
+*.py[cod]
+*$py.class
+
+# C extensions
+*.so
+
+# Distribution / packaging
+.Python
+build/
+develop-eggs/
+dist/
+downloads/
+eggs/
+.eggs/
+lib/
+lib64/
+parts/
+sdist/
+var/
+wheels/
+share/python-wheels/
+*.egg-info/
+.installed.cfg
+*.egg
+MANIFEST
+
+# PyInstaller
+#  Usually these files are written by a python script from a template
+#  before PyInstaller builds the exe, so as to inject date/other infos into it.
+*.manifest
+*.spec
+
+# Installer logs
+pip-log.txt
+pip-delete-this-directory.txt
+
+# Unit test / coverage reports
+htmlcov/
+.tox/
+.nox/
+.coverage
+.coverage.*
+.cache
+nosetests.xml
+coverage.xml
+*.cover
+*.py,cover
+.hypothesis/
+.pytest_cache/
+cover/
+
+# Translations
+*.mo
+*.pot
+
+# Django stuff:
+*.log
+local_settings.py
+db.sqlite3
+db.sqlite3-journal
+
+# Flask stuff:
+instance/
+.webassets-cache
+
+# Scrapy stuff:
+.scrapy
+
+# Sphinx documentation
+docs/_build/
+
+# PyBuilder
+.pybuilder/
+target/
+
+# Jupyter Notebook
+.ipynb_checkpoints
+
+# IPython
+profile_default/
+ipython_config.py
+
+# pyenv
+#   For a library or package, you might want to ignore these files since the code is
+#   intended to run in multiple environments; otherwise, check them in:
+# .python-version
+
+# pipenv
+#   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
+#   However, in case of collaboration, if having platform-specific dependencies or dependencies
+#   having no cross-platform support, pipenv may install dependencies that don't work, or not
+#   install all needed dependencies.
+#Pipfile.lock
+
+# PEP 582; used by e.g. github.com/David-OConnor/pyflow
+__pypackages__/
+
+# Celery stuff
+celerybeat-schedule
+celerybeat.pid
+
+# SageMath parsed files
+*.sage.py
+
+# Environments
+.env
+.venv
+env/
+venv/
+ENV/
+env.bak/
+venv.bak/
+
+# Spyder project settings
+.spyderproject
+.spyproject
+
+# Rope project settings
+.ropeproject
+
+# mkdocs documentation
+/site
+
+# mypy
+.mypy_cache/
+.dmypy.json
+dmypy.json
+
+# Pyre type checker
+.pyre/
+
+# pytype static type analyzer
+.pytype/
+
+# Cython debug symbols
+cython_debug/
diff --git a/moseq2_ephys_sync/__init__.py b/moseq2_ephys_sync/__init__.py
@@ -1,5 +0,0 @@
-import moseq2_ephys_sync
-from . import extract_leds
-from . import sync
-from . import video
-from . import plotting

diff --git a/moseq2_ephys_sync/__pycache__/__init__.cpython-37.pyc b/moseq2_ephys_sync/__pycache__/__init__.cpython-37.pyc
diff --git a/moseq2_ephys_sync/__pycache__/extract_leds.cpython-37.pyc b/moseq2_ephys_sync/__pycache__/extract_leds.cpython-37.pyc
diff --git a/moseq2_ephys_sync/__pycache__/sync.cpython-37.pyc b/moseq2_ephys_sync/__pycache__/sync.cpython-37.pyc
diff --git a/moseq2_ephys_sync/__pycache__/video.cpython-37.pyc b/moseq2_ephys_sync/__pycache__/video.cpython-37.pyc
diff --git a/moseq2_ephys_sync/arduino.py b/moseq2_ephys_sync/arduino.py
@@ -0,0 +1,96 @@
+import pandas as pd
+import numpy as np
+from glob import glob
+import pdb
+
+import sync
+
+def arduino_workflow(base_path, save_path, num_leds, led_blink_interval, arduino_spec):
+    """
+    Workflow to get codes from arduino txt file. Note arduino sampling rate is calculated empirically below because it's not stable from datapoint to datapoint.
+    
+    """
+    assert num_leds==4, "Arduino code expects 4 LED channels, other nums of channels not yet supported"
+    assert arduino_spec is not None, "Arduino source requires a spec for the column names and datatypes (see arg arduino_spec)"
+    arduino_colnames, arduino_dtypes = get_col_info(arduino_spec)
+    ino_data = load_arduino_data(base_path, arduino_colnames, arduino_dtypes, file_glob='*.txt')
+    ino_timestamps = ino_data.time  # these are in milliseconds
+    ino_events = list_to_events(ino_timestamps, ino_data.led1, ino_data.led2, ino_data.led3, ino_data.led4)
+    ino_average_fs = 1/(np.mean(np.diff(ino_timestamps)))*1000  # fs = sampling freq in Hz
+    ino_codes, _ = sync.events_to_codes(ino_events, nchannels=4, minCodeTime=(led_blink_interval-1)*1000)  # I think as long as the column 'timestamps' in events and the minCodeTime are in the same units, it's fine (for ephys, its nsamples, for arudino, it's ms)
+    ino_codes = np.asarray(ino_codes)
+    ino_codes[:,0] = ino_codes[:,0] / 1000  ## convert to seconds
+
+    return ino_codes, ino_average_fs
+
+
+
+def get_col_info(spec):
+    """
+    Given a string specifying the experiment type, return expected list of columns in arudino text file
+    """
+    if spec == "fictive_olfaction":
+        arduino_colnames = ['time', 'led1', 'led2', 'led3', 'led4', 'yaw', 'roll', 'pitch', 'accx', 'accy', 'accz', 'therm', 'olfled']
+        arduino_dtypes = ['int64', 'int64', 'int64', 'int64','int64', 'float64', 'float64', 'float64', 'float64', 'float64', 'float64']
+    elif spec == "odor_on_wheel":
+        arduino_colnames = ['time', 'led1', 'led2', 'led3', 'led4', 'wheel']
+        arduino_dtypes = ['int64', 'int64', 'int64', 'int64','int64', 'int64']
+    return arduino_colnames, arduino_dtypes
+
+
+
+def load_arduino_data(base_path, colnames, dtypes, file_glob='*.txt'):
+    arduino_data = glob(f'{base_path}/{file_glob}')
+    try:
+        arduino_data = arduino_data[0]
+    except IndexError:
+        raise FileNotFoundError("Could not find arduino data (*.txt) in specified location!")
+
+    dtype_dict = {colname: dtype for colname, dtype in zip(colnames, dtypes)}
+    try:
+        # Try loading the entire thing first. 
+        data = pd.read_csv(arduino_data, header=0, names=colnames, dtype=dtype_dict, error_bad_lines=False)
+    except ValueError:
+        try:
+            # If needed, try ignoring the last line. This is slower so we don't use as default.
+            data = pd.read_csv(arduino_data, header=0, names=colnames, dtype=dtype_dict, error_bad_lines=False, warn_bad_lines=True, skipfooter=1)
+        except:
+            raise RuntimeError('Could not load arduino data -- check text file for weirdness. \
+            Most common issues text file issues are: \
+            -- line that ends with a "-" (minus sign), "." (decima) \
+            -- line that begins with a "," (comma) \
+            -- usually no more than one issue like this per txt file')
+    return data
+
+
+def list_to_events(time_list, led1, led2, led3, led4):
+    """
+    Transforms list of times and led states into list of led change events.
+    ---
+    Input: pd.Series from arduino text file
+    ---
+    Output: 
+    events : 2d array
+        Array of pixel clock events (single channel transitions) where:
+            events[:,0] = times
+            events[:,1] = channels (0-indexed)
+            events[:,2] = directions (1 or -1)
+    """
+    led_states = [led1, led2, led3, led4]
+
+    # Get lists of relevant times and events
+    times = pd.Series(dtype='int64', name='times')
+    channels = pd.Series(dtype='int8', name='channels')
+    directions = pd.Series(dtype='int8', name='directions')
+    for i in range(4):
+        states = led_states[i]
+        diffs = np.diff(states)
+        events_idx = np.asarray(diffs != 0).nonzero()[0] + 1  # plus 1, because the event should be the first timepoint where it's different
+        times = times.append(pd.Series(time_list[events_idx], name='times'), ignore_index=True)
+        channels = channels.append(pd.Series(np.repeat(i,len(events_idx)), name='channels'), ignore_index=True)
+        directions = directions.append(pd.Series(np.sign(diffs[events_idx-1]), name='directions'), ignore_index=True)
+    events = pd.concat([times, channels, directions], axis=1)
+    sorting = np.argsort(events.loc[:,'times'])
+    events = events.loc[sorting, :]
+    assert np.all(np.diff(events.times)>=0), 'Event times are not sorted!'
+    return np.array(events)