Calibration (#20)

* Added in functionality for calibrations via json * Adjusted interpolation scheme to use pandas > 2.0.0 * Calibration changes the answers for kaslo tests
AdventureData · Dec 23, 2024 · 4e71e24 · 4e71e24
1 parent 1f74680
commit 4e71e24
Show file tree

Hide file tree

Showing 9 changed files with 188 additions and 52 deletions.
diff --git a/Makefile b/Makefile
@@ -79,9 +79,8 @@ release: dist ## package and upload a release
 	twine upload dist/*
 
 dist: clean ## builds source and wheel package
-	python setup.py sdist
-	python setup.py bdist_wheel
+	python3 -m build --sdist
 	ls -l dist
 
 install: clean ## install the package to the active Python's site-packages
-	python setup.py install
+	pip install .
diff --git a/study_lyte/adjustments.py b/study_lyte/adjustments.py
@@ -91,28 +91,23 @@ def get_normalized_at_border(series: pd.Series, fractional_basis: float = 0.01,
     return border_norm
 
 def merge_on_to_time(df_list, final_time):
-    """
-    Reindex the df from the list onto a final time stamp
-    """
-    # Build dummy result in case no data is passed
-    result = pd.DataFrame()
+    """"""
+    result = None
+    for df in df_list:
 
-    # Merge everything else to it
-    for i, df in enumerate(df_list):
         time_df = df.copy()
         if df.index.name != 'time':
             time_df = time_df.set_index('time')
-        else:
-            time_df = df.copy()
 
-        data = time_df.reindex(time_df.index.union(final_time)).interpolate(method='cubic').reindex(final_time)
-        if i == 0:
-            result = data
+        new = pd.DataFrame(columns=time_df.columns, index=final_time)
+        for c in time_df.columns:
+            new[c] = np.interp(final_time,  # Target indices (100 Hz)
+                               time_df.index,  # Original 75 Hz indices
+                               time_df[c])
+        if result is None:
+            result = new
         else:
-            result = pd.merge_ordered(result, data, on='time')
-
-    # interpolate the nan's
-    result = result.interpolate(method='nearest', limit_direction='both')
+            result = result.join(new)
     return result
 
 

diff --git a/study_lyte/calibrations.py b/study_lyte/calibrations.py
@@ -0,0 +1,41 @@
+from enum import Enum
+import json
+from pathlib import Path
+from .logging import setup_log
+import logging
+from dataclasses import dataclass
+from typing import List
+setup_log()
+
+LOG = logging.getLogger('study_lyte.calibrations')
+
+
+@dataclass()
+class Calibration:
+    """Small class to make accessing calibration data a bit more convenient"""
+    serial: str
+    calibration: dict[str, List[float]]
+
+
+class Calibrations:
+    """
+    Class to read in a json containing calibrations, keyed by serial number and
+     valued by dictionary of sensor names containing cal values
+    """
+    def __init__(self, filename:Path):
+        with open(filename, mode='r') as fp:
+            self._info = json.load(fp)
+
+    def from_serial(self, serial:str) -> Calibration:
+        """ Build data object from the calibration result """
+        cal = self._info.get(serial)
+        if cal is None:
+            LOG.warning(f"No Calibration found for serial {serial}, using default")
+            cal = self._info['default']
+            serial = 'UNKNOWN'
+
+        else:
+            LOG.info(f"Calibration found ({serial})!")
+
+        result = Calibration(serial=serial, calibration=cal)
+        return result
diff --git a/study_lyte/detect.py b/study_lyte/detect.py
@@ -171,7 +171,7 @@ def get_acceleration_stop(acceleration, threshold=-0.2, max_threshold=0.1):
                                          n_points=n,
                                          search_direction='backward')
 
-    if acceleration_stop is None:
+    if acceleration_stop is None or acceleration_stop == 0:
         acceleration_stop = len(acceleration) - 1
     else:
         acceleration_stop = acceleration_stop + search_start

diff --git a/study_lyte/profile.py b/study_lyte/profile.py
@@ -11,10 +11,12 @@
 from .detect import get_acceleration_start, get_acceleration_stop, get_nir_surface, get_nir_stop, get_sensor_start, get_ground_strike
 from .depth import AccelerometerDepth, BarometerDepth
 from .logging import setup_log
+from .calibrations import Calibrations
 import logging
 setup_log()
 
 LOG = logging.getLogger('study_lyte.profile')
+
 @dataclass
 class Event:
     name: str
@@ -39,30 +41,32 @@ def __init__(self, filename, surface_detection_offset=4.5, calibration=None,
         """
         self.filename = Path(filename)
         self.surface_detection_offset = surface_detection_offset
-        self.calibration = calibration or {}
         self.tip_diameter_mm = tip_diameter_mm
 
         # Properties
         self._raw = None
         self._meta = None
         self._point = None
+        self._serial_number = None
+        self._calibration = calibration or None
         self.header_position = None
 
+        # Dataframes
         self._depth = None  # Final depth series used for analysis
         self._acceleration = None  # No gravity acceleration
         self._cropped = None  # Full dataframe cropped to surface and stop
         self._force = None
         self._nir = None
 
-        # Useful stats/info
+        # Useful stats/info properties
         self._distance_traveled = None  # distance travelled while moving
         self._distance_through_snow = None  # Distance travelled while in snow
         self._avg_velocity = None  # avg velocity of the probe while in the snow
         self._resolution = None  # Vertical resolution of the profile in the snow
         self._datetime = None
         self._has_upward_motion = None  # Flag for datasets containing upward motion
 
-        # Events
+        # Time series events
         self._start = None
         self._stop = None
         self._surface = None
@@ -75,6 +79,27 @@ def assign_event_depths(self):
         for event in [self._start, self._stop, self._surface.nir, self._surface.force]:
             event.depth = self.depth.iloc[event.index]
 
+    @property
+    def serial_number(self):
+        if self._serial_number is None:
+            self._serial_number = self.metadata.get('Serial Num.')
+            if self._serial_number is None:
+                self._serial_number = 'UNKNOWN'
+        return self._serial_number
+
+    def set_calibration(self, ext_calibrations:Calibrations):
+        """
+        Assign new calibration using a collection of calibrations
+        Args:
+            ext_calibrations: External collection of calibrations
+        """
+        cal = ext_calibrations.from_serial(self.serial_number)
+        self._calibration = cal.calibration
+
+    @property
+    def calibration(self):
+        return self._calibration
+
     @classmethod
     def from_metadata(cls, filename, **kwargs):
         profile = cls(filename)
@@ -83,7 +108,6 @@ def from_metadata(cls, filename, **kwargs):
         else:
             return LyteProfileV6(filename)
 
-
     @property
     def raw(self):
         """
@@ -144,7 +168,7 @@ def force(self):
         if self._force is None:
             if 'Sensor1' in self.calibration.keys():
                 force = apply_calibration(self.raw['Sensor1'].values, self.calibration['Sensor1'], minimum=0, maximum=15000)
-                # force = force - force[0]
+                force = force - np.nanmean(force[0:20])
             else:
                 force = self.raw['Sensor1'].values
 
@@ -432,7 +456,7 @@ def stop(self):
             if idx is not None:
                 self._stop = Event(name='stop', index=idx, depth=None, time=self.raw['time'].iloc[idx])
             else:
-                self._stop = Event(name='stop', index=0, depth=None, time=self.raw['time'].iloc[0])
+                self._stop = Event(name='stop', index=len(self.raw) - 1, depth=None, time=self.raw['time'].iloc[0])
 
         return self._stop
 

diff --git a/tests/data/calibrations.json b/tests/data/calibrations.json
@@ -0,0 +1,7 @@
+{
+	"252813070A020004":{"Sensor1":[0, 0, -10, 409],
+						"comment": "Test"},
+
+	"default":{"Sensor1":[0, 0, -1, 4096],
+		       "comment": "default"}
+}
diff --git a/tests/test_adjustments.py b/tests/test_adjustments.py
@@ -29,15 +29,23 @@ def test_get_points_from_fraction(n_samples, fraction, maximum, expected):
     ([1, 1, 2, 2], 0.5, 'backward', 2),
     #  fractional basis
     ([1, 3, 4, 6], 0.25, 'forward', 1),
-    ([1, 3, 5, 6], 0.75, 'forward', 3)
+    ([1, 3, 5, 6], 0.75, 'forward', 3),
+
+    # Test for nans
+    ([1]*10 + [2] * 5 + [np.nan]*5, 0.5, 'backward', 2),
+    ([np.nan] * 10, 0.5, 'backward', np.nan)
+
 ])
 def test_directional_mean(data, fractional_basis, direction, expected):
     """
     Test the directional mean function
     """
     df = pd.DataFrame({'data': np.array(data)})
     value = get_directional_mean(df['data'], fractional_basis=fractional_basis, direction=direction)
-    assert value == expected
+    if np.isnan(expected):  # Handle the NaN case
+        assert np.isnan(value)
+    else:
+        assert value == expected
 
 
 @pytest.mark.parametrize('data, fractional_basis, direction, zero_bias_idx', [
@@ -70,20 +78,41 @@ def test_get_normalized_at_border(data, fractional_basis, direction, ideal_norm_
     result = get_normalized_at_border(df['data'], fractional_basis=fractional_basis, direction=direction)
     assert result.iloc[ideal_norm_index] == 1
 
+def poly_function(elapsed, amplitude=4096, frequency=1):
+    return amplitude * np.sin(2 * np.pi * frequency * elapsed)
+
+
 @pytest.mark.parametrize('data1_hz, data2_hz, desired_hz', [
-    (10, 5, 20)
+    (75, 100, 16000),
+    (100, 75, 100),
+
 ])
 def test_merge_on_to_time(data1_hz, data2_hz, desired_hz):
-    df1 = pd.DataFrame({'data1':np.arange(1,stop=data1_hz+1), 'time': np.arange(0, 1, 1 / data1_hz)})
-    df2 = pd.DataFrame({'data2':np.arange(100,stop=data2_hz+100), 'time': np.arange(0, 1, 1 / data2_hz)})
-    desired =  np.arange(0, 1, 1 / desired_hz)
+    """
+    Test merging multi sample rate timeseries into a single dataframe
+    specifically focused on timing of the final product
+    """
+    t1 = np.arange(0, 1, 1 / data1_hz)
+    d1 = poly_function(t1)
+    df1 = pd.DataFrame({'data1':d1, 'time': t1})
+    df1 = df1.set_index('time')
 
+    t2 = np.arange(0, 1, 1 / data2_hz)
+    d2 = poly_function(t2)
+    df2 = pd.DataFrame({'data2':d2, 'time': t2})
+    df2 = df2.set_index('time')
+
+    desired = np.arange(0, 1, 1 / desired_hz)
     final = merge_on_to_time([df1, df2], desired)
-    # Ensure we have essentially the same timestep
-    tsteps = np.unique(np.round(final['time'].diff(), 6))
-    tsteps = tsteps[~np.isnan(tsteps)]
-    # Assert only a nan and a real number exist for timesteps
-    assert tsteps == np.round(1/desired_hz, 6)
+
+    # Check timing on both dataframes
+    assert df1['data1'].idxmax() == pytest.approx(final['data1'].idxmax(), abs=3e-2)
+    assert df1['data1'].idxmin() == pytest.approx(final['data1'].idxmin(), abs=3e-2)
+    # Confirm the handling of multiple datasets
+    assert len(final.columns) == 2
+    # Confirm an exact match of length of data
+    assert len(final['data1'][~np.isnan(final['data1'])]) == len(desired)
+
 
 @pytest.mark.parametrize('data_list, expected', [
     # Typical use, low sample to high res
@@ -125,7 +154,7 @@ def test_merge_time_series(data_list, expected):
     # Test normal situation with ambient present
     ([200, 200, 400, 1000], [200, 200, 50, 50], 100, [1.0, 1.0, 275, 1000]),
     # Test no cleaning required
-    # ([200, 200, 400, 400], [210, 210, 200, 200], 90, [200, 200, 400, 400])
+    ([200, 200, 400, 400], [210, 210, 200, 200], 90, [200, 200, 400, 400])
 ])
 def test_remove_ambient(active, ambient, min_ambient_range, expected):
     """
@@ -137,10 +166,6 @@ def test_remove_ambient(active, ambient, min_ambient_range, expected):
     result = remove_ambient(active, ambient, min_ambient_range=100)
     np.testing.assert_equal(result.values, expected)
 
-# @pytest.mark.parametrize('fname', ['2024-01-31--104419.csv'])
-# def test_remove_ambient_real(raw_df, fname):
-#     remove_ambient(raw_df['Sensor3'], raw_df['Sensor2'])
-
 @pytest.mark.parametrize('data, coefficients, expected', [
     ([1, 2, 3, 4], [2, 0], [2, 4, 6, 8])
 ])
@@ -153,15 +178,13 @@ def test_apply_calibration(data, coefficients, expected):
 
 @pytest.mark.parametrize("data, depth, new_depth, resolution, agg_method, expected_data", [
     # Test with negative depths
-    #([[2, 4, 6, 8]], [-10, -20, -30, -40], [-20, -40], None, 'mean', [[3, 7]]),
+    ([[2, 4, 6, 8]], [-10, -20, -30, -40], [-20, -40], None, 'mean', [[3, 7]]),
     # Test with column specific agg methods
-    #([[2, 4, 6, 8], [1, 1, 1, 1]], [-10, -20, -30, -40], [-20, -40], None, {'data0': 'mean','data1':'sum'}, [[3, 7], [2, 2]]),
+    ([[2, 4, 6, 8], [1, 1, 1, 1]], [-10, -20, -30, -40], [-20, -40], None, {'data0': 'mean','data1':'sum'}, [[3, 7], [2, 2]]),
     # Test with resolution
     ([[2, 4, 6, 8]], [-10, -20, -30, -40], None, 20, 'mean', [[3, 7]]),
 ])
 def test_aggregate_by_depth(data, depth, new_depth, resolution, agg_method, expected_data):
-    """
-    """
     data_dict = {f'data{i}':d for i,d in enumerate(data)}
     data_dict['depth'] = depth
     df = pd.DataFrame.from_dict(data_dict)

diff --git a/tests/test_calibration.py b/tests/test_calibration.py
@@ -0,0 +1,25 @@
+import pytest
+from os.path import join
+from pathlib import Path
+from study_lyte.calibrations import Calibrations
+
+
+class TestCalibrations:
+    @pytest.fixture(scope='function')
+    def calibration_json(self, data_dir):
+        return Path(join(data_dir, 'calibrations.json'))
+
+    @pytest.fixture(scope='function')
+    def calibrations(self, calibration_json):
+        return Calibrations(calibration_json)
+
+    @pytest.mark.parametrize("serial, expected", [
+        ("252813070A020004", -10),
+        ("NONSENSE", -1),
+    ])
+    def test_attributes(self, calibrations, serial, expected):
+        """"""
+        result = calibrations.from_serial(serial)
+        assert result.calibration['Sensor1'][2] == expected
+
+