From 4e71e249f645bd6fa998d0d9eaaec3597f946ba9 Mon Sep 17 00:00:00 2001 From: Micah Johnson Date: Mon, 23 Dec 2024 09:25:43 -0700 Subject: [PATCH] Calibration (#20) * Added in functionality for calibrations via json * Adjusted interpolation scheme to use pandas > 2.0.0 * Calibration changes the answers for kaslo tests --- Makefile | 5 ++- study_lyte/adjustments.py | 27 +++++++--------- study_lyte/calibrations.py | 41 +++++++++++++++++++++++ study_lyte/detect.py | 2 +- study_lyte/profile.py | 36 +++++++++++++++++---- tests/data/calibrations.json | 7 ++++ tests/test_adjustments.py | 63 ++++++++++++++++++++++++------------ tests/test_calibration.py | 25 ++++++++++++++ tests/test_profile.py | 34 +++++++++++++++---- 9 files changed, 188 insertions(+), 52 deletions(-) create mode 100644 study_lyte/calibrations.py create mode 100644 tests/data/calibrations.json create mode 100644 tests/test_calibration.py diff --git a/Makefile b/Makefile index 374896a..4b2c890 100644 --- a/Makefile +++ b/Makefile @@ -79,9 +79,8 @@ release: dist ## package and upload a release twine upload dist/* dist: clean ## builds source and wheel package - python setup.py sdist - python setup.py bdist_wheel + python3 -m build --sdist ls -l dist install: clean ## install the package to the active Python's site-packages - python setup.py install + pip install . diff --git a/study_lyte/adjustments.py b/study_lyte/adjustments.py index 4ad1645..2d8e87f 100644 --- a/study_lyte/adjustments.py +++ b/study_lyte/adjustments.py @@ -91,28 +91,23 @@ def get_normalized_at_border(series: pd.Series, fractional_basis: float = 0.01, return border_norm def merge_on_to_time(df_list, final_time): - """ - Reindex the df from the list onto a final time stamp - """ - # Build dummy result in case no data is passed - result = pd.DataFrame() + """""" + result = None + for df in df_list: - # Merge everything else to it - for i, df in enumerate(df_list): time_df = df.copy() if df.index.name != 'time': time_df = time_df.set_index('time') - else: - time_df = df.copy() - data = time_df.reindex(time_df.index.union(final_time)).interpolate(method='cubic').reindex(final_time) - if i == 0: - result = data + new = pd.DataFrame(columns=time_df.columns, index=final_time) + for c in time_df.columns: + new[c] = np.interp(final_time, # Target indices (100 Hz) + time_df.index, # Original 75 Hz indices + time_df[c]) + if result is None: + result = new else: - result = pd.merge_ordered(result, data, on='time') - - # interpolate the nan's - result = result.interpolate(method='nearest', limit_direction='both') + result = result.join(new) return result diff --git a/study_lyte/calibrations.py b/study_lyte/calibrations.py new file mode 100644 index 0000000..5aae964 --- /dev/null +++ b/study_lyte/calibrations.py @@ -0,0 +1,41 @@ +from enum import Enum +import json +from pathlib import Path +from .logging import setup_log +import logging +from dataclasses import dataclass +from typing import List +setup_log() + +LOG = logging.getLogger('study_lyte.calibrations') + + +@dataclass() +class Calibration: + """Small class to make accessing calibration data a bit more convenient""" + serial: str + calibration: dict[str, List[float]] + + +class Calibrations: + """ + Class to read in a json containing calibrations, keyed by serial number and + valued by dictionary of sensor names containing cal values + """ + def __init__(self, filename:Path): + with open(filename, mode='r') as fp: + self._info = json.load(fp) + + def from_serial(self, serial:str) -> Calibration: + """ Build data object from the calibration result """ + cal = self._info.get(serial) + if cal is None: + LOG.warning(f"No Calibration found for serial {serial}, using default") + cal = self._info['default'] + serial = 'UNKNOWN' + + else: + LOG.info(f"Calibration found ({serial})!") + + result = Calibration(serial=serial, calibration=cal) + return result diff --git a/study_lyte/detect.py b/study_lyte/detect.py index 4db367a..0949f0b 100644 --- a/study_lyte/detect.py +++ b/study_lyte/detect.py @@ -171,7 +171,7 @@ def get_acceleration_stop(acceleration, threshold=-0.2, max_threshold=0.1): n_points=n, search_direction='backward') - if acceleration_stop is None: + if acceleration_stop is None or acceleration_stop == 0: acceleration_stop = len(acceleration) - 1 else: acceleration_stop = acceleration_stop + search_start diff --git a/study_lyte/profile.py b/study_lyte/profile.py index 41e95d2..715c6f7 100644 --- a/study_lyte/profile.py +++ b/study_lyte/profile.py @@ -11,10 +11,12 @@ from .detect import get_acceleration_start, get_acceleration_stop, get_nir_surface, get_nir_stop, get_sensor_start, get_ground_strike from .depth import AccelerometerDepth, BarometerDepth from .logging import setup_log +from .calibrations import Calibrations import logging setup_log() LOG = logging.getLogger('study_lyte.profile') + @dataclass class Event: name: str @@ -39,22 +41,24 @@ def __init__(self, filename, surface_detection_offset=4.5, calibration=None, """ self.filename = Path(filename) self.surface_detection_offset = surface_detection_offset - self.calibration = calibration or {} self.tip_diameter_mm = tip_diameter_mm # Properties self._raw = None self._meta = None self._point = None + self._serial_number = None + self._calibration = calibration or None self.header_position = None + # Dataframes self._depth = None # Final depth series used for analysis self._acceleration = None # No gravity acceleration self._cropped = None # Full dataframe cropped to surface and stop self._force = None self._nir = None - # Useful stats/info + # Useful stats/info properties self._distance_traveled = None # distance travelled while moving self._distance_through_snow = None # Distance travelled while in snow self._avg_velocity = None # avg velocity of the probe while in the snow @@ -62,7 +66,7 @@ def __init__(self, filename, surface_detection_offset=4.5, calibration=None, self._datetime = None self._has_upward_motion = None # Flag for datasets containing upward motion - # Events + # Time series events self._start = None self._stop = None self._surface = None @@ -75,6 +79,27 @@ def assign_event_depths(self): for event in [self._start, self._stop, self._surface.nir, self._surface.force]: event.depth = self.depth.iloc[event.index] + @property + def serial_number(self): + if self._serial_number is None: + self._serial_number = self.metadata.get('Serial Num.') + if self._serial_number is None: + self._serial_number = 'UNKNOWN' + return self._serial_number + + def set_calibration(self, ext_calibrations:Calibrations): + """ + Assign new calibration using a collection of calibrations + Args: + ext_calibrations: External collection of calibrations + """ + cal = ext_calibrations.from_serial(self.serial_number) + self._calibration = cal.calibration + + @property + def calibration(self): + return self._calibration + @classmethod def from_metadata(cls, filename, **kwargs): profile = cls(filename) @@ -83,7 +108,6 @@ def from_metadata(cls, filename, **kwargs): else: return LyteProfileV6(filename) - @property def raw(self): """ @@ -144,7 +168,7 @@ def force(self): if self._force is None: if 'Sensor1' in self.calibration.keys(): force = apply_calibration(self.raw['Sensor1'].values, self.calibration['Sensor1'], minimum=0, maximum=15000) - # force = force - force[0] + force = force - np.nanmean(force[0:20]) else: force = self.raw['Sensor1'].values @@ -432,7 +456,7 @@ def stop(self): if idx is not None: self._stop = Event(name='stop', index=idx, depth=None, time=self.raw['time'].iloc[idx]) else: - self._stop = Event(name='stop', index=0, depth=None, time=self.raw['time'].iloc[0]) + self._stop = Event(name='stop', index=len(self.raw) - 1, depth=None, time=self.raw['time'].iloc[0]) return self._stop diff --git a/tests/data/calibrations.json b/tests/data/calibrations.json new file mode 100644 index 0000000..ee29cfa --- /dev/null +++ b/tests/data/calibrations.json @@ -0,0 +1,7 @@ +{ + "252813070A020004":{"Sensor1":[0, 0, -10, 409], + "comment": "Test"}, + + "default":{"Sensor1":[0, 0, -1, 4096], + "comment": "default"} +} diff --git a/tests/test_adjustments.py b/tests/test_adjustments.py index 98df124..4dcb330 100644 --- a/tests/test_adjustments.py +++ b/tests/test_adjustments.py @@ -29,7 +29,12 @@ def test_get_points_from_fraction(n_samples, fraction, maximum, expected): ([1, 1, 2, 2], 0.5, 'backward', 2), # fractional basis ([1, 3, 4, 6], 0.25, 'forward', 1), - ([1, 3, 5, 6], 0.75, 'forward', 3) + ([1, 3, 5, 6], 0.75, 'forward', 3), + + # Test for nans + ([1]*10 + [2] * 5 + [np.nan]*5, 0.5, 'backward', 2), + ([np.nan] * 10, 0.5, 'backward', np.nan) + ]) def test_directional_mean(data, fractional_basis, direction, expected): """ @@ -37,7 +42,10 @@ def test_directional_mean(data, fractional_basis, direction, expected): """ df = pd.DataFrame({'data': np.array(data)}) value = get_directional_mean(df['data'], fractional_basis=fractional_basis, direction=direction) - assert value == expected + if np.isnan(expected): # Handle the NaN case + assert np.isnan(value) + else: + assert value == expected @pytest.mark.parametrize('data, fractional_basis, direction, zero_bias_idx', [ @@ -70,20 +78,41 @@ def test_get_normalized_at_border(data, fractional_basis, direction, ideal_norm_ result = get_normalized_at_border(df['data'], fractional_basis=fractional_basis, direction=direction) assert result.iloc[ideal_norm_index] == 1 +def poly_function(elapsed, amplitude=4096, frequency=1): + return amplitude * np.sin(2 * np.pi * frequency * elapsed) + + @pytest.mark.parametrize('data1_hz, data2_hz, desired_hz', [ - (10, 5, 20) + (75, 100, 16000), + (100, 75, 100), + ]) def test_merge_on_to_time(data1_hz, data2_hz, desired_hz): - df1 = pd.DataFrame({'data1':np.arange(1,stop=data1_hz+1), 'time': np.arange(0, 1, 1 / data1_hz)}) - df2 = pd.DataFrame({'data2':np.arange(100,stop=data2_hz+100), 'time': np.arange(0, 1, 1 / data2_hz)}) - desired = np.arange(0, 1, 1 / desired_hz) + """ + Test merging multi sample rate timeseries into a single dataframe + specifically focused on timing of the final product + """ + t1 = np.arange(0, 1, 1 / data1_hz) + d1 = poly_function(t1) + df1 = pd.DataFrame({'data1':d1, 'time': t1}) + df1 = df1.set_index('time') + t2 = np.arange(0, 1, 1 / data2_hz) + d2 = poly_function(t2) + df2 = pd.DataFrame({'data2':d2, 'time': t2}) + df2 = df2.set_index('time') + + desired = np.arange(0, 1, 1 / desired_hz) final = merge_on_to_time([df1, df2], desired) - # Ensure we have essentially the same timestep - tsteps = np.unique(np.round(final['time'].diff(), 6)) - tsteps = tsteps[~np.isnan(tsteps)] - # Assert only a nan and a real number exist for timesteps - assert tsteps == np.round(1/desired_hz, 6) + + # Check timing on both dataframes + assert df1['data1'].idxmax() == pytest.approx(final['data1'].idxmax(), abs=3e-2) + assert df1['data1'].idxmin() == pytest.approx(final['data1'].idxmin(), abs=3e-2) + # Confirm the handling of multiple datasets + assert len(final.columns) == 2 + # Confirm an exact match of length of data + assert len(final['data1'][~np.isnan(final['data1'])]) == len(desired) + @pytest.mark.parametrize('data_list, expected', [ # Typical use, low sample to high res @@ -125,7 +154,7 @@ def test_merge_time_series(data_list, expected): # Test normal situation with ambient present ([200, 200, 400, 1000], [200, 200, 50, 50], 100, [1.0, 1.0, 275, 1000]), # Test no cleaning required - # ([200, 200, 400, 400], [210, 210, 200, 200], 90, [200, 200, 400, 400]) + ([200, 200, 400, 400], [210, 210, 200, 200], 90, [200, 200, 400, 400]) ]) def test_remove_ambient(active, ambient, min_ambient_range, expected): """ @@ -137,10 +166,6 @@ def test_remove_ambient(active, ambient, min_ambient_range, expected): result = remove_ambient(active, ambient, min_ambient_range=100) np.testing.assert_equal(result.values, expected) -# @pytest.mark.parametrize('fname', ['2024-01-31--104419.csv']) -# def test_remove_ambient_real(raw_df, fname): -# remove_ambient(raw_df['Sensor3'], raw_df['Sensor2']) - @pytest.mark.parametrize('data, coefficients, expected', [ ([1, 2, 3, 4], [2, 0], [2, 4, 6, 8]) ]) @@ -153,15 +178,13 @@ def test_apply_calibration(data, coefficients, expected): @pytest.mark.parametrize("data, depth, new_depth, resolution, agg_method, expected_data", [ # Test with negative depths - #([[2, 4, 6, 8]], [-10, -20, -30, -40], [-20, -40], None, 'mean', [[3, 7]]), + ([[2, 4, 6, 8]], [-10, -20, -30, -40], [-20, -40], None, 'mean', [[3, 7]]), # Test with column specific agg methods - #([[2, 4, 6, 8], [1, 1, 1, 1]], [-10, -20, -30, -40], [-20, -40], None, {'data0': 'mean','data1':'sum'}, [[3, 7], [2, 2]]), + ([[2, 4, 6, 8], [1, 1, 1, 1]], [-10, -20, -30, -40], [-20, -40], None, {'data0': 'mean','data1':'sum'}, [[3, 7], [2, 2]]), # Test with resolution ([[2, 4, 6, 8]], [-10, -20, -30, -40], None, 20, 'mean', [[3, 7]]), ]) def test_aggregate_by_depth(data, depth, new_depth, resolution, agg_method, expected_data): - """ - """ data_dict = {f'data{i}':d for i,d in enumerate(data)} data_dict['depth'] = depth df = pd.DataFrame.from_dict(data_dict) diff --git a/tests/test_calibration.py b/tests/test_calibration.py new file mode 100644 index 0000000..baa6389 --- /dev/null +++ b/tests/test_calibration.py @@ -0,0 +1,25 @@ +import pytest +from os.path import join +from pathlib import Path +from study_lyte.calibrations import Calibrations + + +class TestCalibrations: + @pytest.fixture(scope='function') + def calibration_json(self, data_dir): + return Path(join(data_dir, 'calibrations.json')) + + @pytest.fixture(scope='function') + def calibrations(self, calibration_json): + return Calibrations(calibration_json) + + @pytest.mark.parametrize("serial, expected", [ + ("252813070A020004", -10), + ("NONSENSE", -1), + ]) + def test_attributes(self, calibrations, serial, expected): + """""" + result = calibrations.from_serial(serial) + assert result.calibration['Sensor1'][2] == expected + + diff --git a/tests/test_profile.py b/tests/test_profile.py index 4a3ed2d..f7d07d6 100644 --- a/tests/test_profile.py +++ b/tests/test_profile.py @@ -1,5 +1,7 @@ import pytest from os.path import join +from pathlib import Path +from study_lyte.calibrations import Calibrations from study_lyte.profile import ProcessedProfileV6, LyteProfileV6, Sensor from operator import attrgetter from shapely.geometry import Point @@ -8,7 +10,7 @@ class TestLyteProfile: @pytest.fixture() def profile(self, data_dir, filename, depth_method): - return LyteProfileV6(join(data_dir, filename), calibration={'Sensor1': [1, 0]}, depth_method=depth_method) + return LyteProfileV6(join(data_dir, filename), calibration={'Sensor1': [-1, 4096]}, depth_method=depth_method) @pytest.mark.parametrize('filename, depth_method, expected', [ ('kaslo.csv', 'fused', 9539) @@ -66,17 +68,17 @@ def test_moving_time(self, profile, expected): assert pytest.approx(delta, abs=0.05) == expected @pytest.mark.parametrize('filename, depth_method, expected_points, mean_force', [ - ('kaslo.csv', 'fused', 16377, 3500) + ('kaslo.csv', 'fused', 16377, 544) ]) def test_force_profile(self, profile, filename, depth_method, expected_points, mean_force): assert pytest.approx(len(profile.force), len(profile.raw)*0.05) == expected_points assert pytest.approx(profile.force['force'].mean(), abs=150) == mean_force @pytest.mark.parametrize('filename, depth_method, expected_points, mean_pressure', [ - ('kaslo.csv', 'fused', 16377, 179) + ('kaslo.csv', 'fused', 16377, 27) ]) def test_pressure_profile(self, profile, filename, depth_method, expected_points, mean_pressure): - assert pytest.approx(len(profile.pressure), len(profile.raw)*0.05) == expected_points + assert pytest.approx(len(profile.pressure), len(profile.raw) * 0.05) == expected_points assert pytest.approx(profile.pressure['pressure'].mean(), abs=10) == mean_pressure @@ -217,6 +219,27 @@ def test_report_card(self, profile, filename, depth_method, expected): result = profile.report_card() assert True + @pytest.mark.parametrize('filename, depth_method, expected', [ + # Test assignment of the serial number from file + ("open_air.csv", 'fused', "252813070A020004"), + # Test serial number not found + ("mores_20230119.csv", 'fused', "UNKNOWN"), + ]) + def test_serial_number(self, profile,filename, depth_method, expected): + assert profile.serial_number == expected + + @pytest.mark.parametrize('filename, depth_method, expected', [ + # Test assignment of the serial number from file + ("open_air.csv", 'fused', -10), + # Test serial number not found + ("mores_20230119.csv", 'fused', -1), + ]) + def test_set_calibration(self, data_dir, profile,filename, depth_method, expected): + p = Path(join(data_dir,'calibrations.json')) + calibrations = Calibrations(p) + profile.set_calibration(calibrations) + assert profile.calibration['Sensor1'][2] == expected + class TestLegacyProfile: @pytest.fixture() @@ -260,5 +283,4 @@ def test_app(data_dir): """Functionality test""" fname = data_dir + '/ls_app.csv' profile = ProcessedProfileV6(fname) - print(profile) - assert False # TODO: Add more detailed checking \ No newline at end of file + assert False # TODO: Add more detailed checking