diff --git a/python/mspasspy/algorithms/resample.py b/python/mspasspy/algorithms/resample.py index 131dbd090..1f891c61f 100755 --- a/python/mspasspy/algorithms/resample.py +++ b/python/mspasspy/algorithms/resample.py @@ -229,6 +229,21 @@ def resample(self, mspass_object): ) mspass_object.set_npts(n_resampled) mspass_object.dt = self.dt + # Check for "sampling_rate" attribute + if mspass_object.is_defined("sampling_rate"): + sampling_rate = mspass_object["sampling_rate"] + # Check if sampling_rate is consistent with 1/dt + if abs(sampling_rate - 1.0 / mspass_object.dt) > 1e-6: + # Record inconsistency in error log (elog) + message = "sampling_rate inconsistent with 1/dt; updating to 1/dt" + mspass_object.elog.log_error("resample", + message, + ErrorSeverity.Complaint) + # Update sampling_rate to 1/dt + mspass_object["sampling_rate"] = 1.0 / mspass_object.dt + else: + # Set sampling_rate to 1/dt + mspass_object["sampling_rate"] = 1.0 / mspass_object.dt # We have to go through this conversion to avoid TypeError exceptions # i.e we can't just copy the entire vector rsdata to the data vector dv = DoubleVector(rsdata) @@ -243,6 +258,21 @@ def resample(self, mspass_object): ) mspass_object.set_npts(n_resampled) mspass_object.dt = self.dt + # Check for "sampling_rate" attribute + if mspass_object.is_defined("sampling_rate"): + sampling_rate = mspass_object["sampling_rate"] + # Check if sampling_rate is consistent with 1/dt + if abs(sampling_rate - 1.0 / mspass_object.dt) > 1e-6: + # Record inconsistency in error log (elog) + message = "sampling_rate inconsistent with 1/dt; updating to 1/dt" + mspass_object.elog.log_error("resample", + message, + ErrorSeverity.Complaint) + # Update sampling_rate to 1/dt + mspass_object["sampling_rate"] = 1.0 / mspass_object.dt + else: + # Set sampling_rate to 1/dt + mspass_object["sampling_rate"] = 1.0 / mspass_object.dt # We have to go through this conversion to avoid TypeError exceptions # i.e we can't just copy the entire vector rsdata to the data vector dm = dmatrix(rsdata) @@ -376,6 +406,21 @@ def resample(self, mspass_object): dsdata_npts = len(dsdata) mspass_object.set_npts(dsdata_npts) mspass_object.dt = self.dt + # Check for "sampling_rate" attribute + if mspass_object.is_defined("sampling_rate"): + sampling_rate = mspass_object["sampling_rate"] + # Check if sampling_rate is consistent with 1/dt + if abs(sampling_rate - 1.0 / mspass_object.dt) > 1e-6: + # Record inconsistency in error log (elog) + message = "sampling_rate inconsistent with 1/dt; updating to 1/dt" + mspass_object.elog.log_error("resample", + message, + ErrorSeverity.Complaint) + # Update sampling_rate to 1/dt + mspass_object["sampling_rate"] = 1.0 / mspass_object.dt + else: + # Set sampling_rate to 1/dt + mspass_object["sampling_rate"] = 1.0 / mspass_object.dt # We have to go through this conversion to avoid TypeError exceptions # i.e we can't just copy the entire vector rsdata to the data vector mspass_object.data = DoubleVector(dsdata) @@ -405,6 +450,21 @@ def resample(self, mspass_object): dsdata_npts = msize[1] mspass_object.set_npts(dsdata_npts) mspass_object.dt = self.dt + # Check for "sampling_rate" attribute + if mspass_object.is_defined("sampling_rate"): + sampling_rate = mspass_object["sampling_rate"] + # Check if sampling_rate is consistent with 1/dt + if abs(sampling_rate - 1.0 / mspass_object.dt) > 1e-6: + # Record inconsistency in error log (elog) + message = "sampling_rate inconsistent with 1/dt; updating to 1/dt" + mspass_object.elog.log_error("resample", + message, + ErrorSeverity.Complaint) + # Update sampling_rate to 1/dt + mspass_object["sampling_rate"] = 1.0 / mspass_object.dt + else: + # Set sampling_rate to 1/dt + mspass_object["sampling_rate"] = 1.0 / mspass_object.dt # We have to go through this conversion to avoid TypeError exceptions # i.e we can't just copy the entire vector rsdata to the data vector mspass_object.data = dmatrix(dsdata) diff --git a/python/mspasspy/util/converter.py b/python/mspasspy/util/converter.py index e2febb451..2dc75a9eb 100644 --- a/python/mspasspy/util/converter.py +++ b/python/mspasspy/util/converter.py @@ -139,6 +139,25 @@ def TimeSeries2Trace(ts): # These are required by obspy but optional in mspass. Hence, we have # to extract them with caution. Note defaults are identical to # Trace constructor + + # Check for "sampling_rate" attribute + if ts.is_defined(Keywords.sampling_rate): + sampling_rate = ts["sampling_rate"] + # Check if sampling_rate is consistent with 1/dt + if abs(sampling_rate - 1.0 / ts.dt) > 1e-6: + # Record inconsistency in error log (elog) + message = "sampling_rate inconsistent with 1/dt; updating to 1/dt" + ts.elog.log_error("TimeSeries2Trace", + message, + ErrorSeverity.Complaint) + # Update sampling_rate to 1/dt + ts["sampling_rate"] = 1.0 / ts.dt + dresult.stats["sampling_rate"] = 1.0 / ts.dt + else: + # Set sampling_rate to 1/dt + ts["sampling_rate"] = 1.0 / ts.dt + dresult.stats["sampling_rate"] = 1.0 / ts.dt + if ts.is_defined(Keywords.net): dresult.stats["network"] = ts.get_string(Keywords.net) else: @@ -167,6 +186,7 @@ def TimeSeries2Trace(ts): # are computed by Trace (i.e. endtime) or are already set above do_not_copy = [ "delta", + "sampling_rate", "npts", "starttime", "endtime", diff --git a/python/tests/algorithms/test_resample.py b/python/tests/algorithms/test_resample.py index 6253fcbc2..ec4520c38 100755 --- a/python/tests/algorithms/test_resample.py +++ b/python/tests/algorithms/test_resample.py @@ -30,59 +30,91 @@ def test_resample(): upsampler = ScipyResampler(250.0) tsup = upsampler.resample(ts) assert np.isclose(tsup.dt, 0.004) + updateSamplingRateMessage = "sampling_rate inconsistent with 1/dt; updating to 1/dt" + assert np.isclose(tsup["sampling_rate"], 250.0) and tsup.elog.get_error_log()[0].algorithm == "resample" and tsup.elog.get_error_log()[0].message == updateSamplingRateMessage # This computed npts is more robust. Otherwise changes in helper # would break it npup = int(ts_npts * 250.0 / 100.0) assert tsup.npts == npup # weird number = int(255*250/100) + + # test for the case when sampling_rate is not defined + ts = get_sin_timeseries(sampling_rate=100.0) + # have to save this because ts is overwritten by resample method + ts0 = TimeSeries(ts) + ts_npts = ts.npts + # remove sampling_rate + del ts["sampling_rate"] + assert not ts.is_defined("sampling_rate") + # upsample test for resampler + upsampler = ScipyResampler(250.0) + + tsup = upsampler.resample(ts) + assert np.isclose(tsup.dt, 0.004) + assert ts.is_defined("sampling_rate") and np.isclose(tsup["sampling_rate"], 250.0) and (not tsup.elog.get_error_log() or tsup.elog.get_error_log()[0].message == updateSamplingRateMessage) + # now repeat for downsampling with resample algorithm ts = TimeSeries(ts0) ds_resampler = ScipyResampler(5.0) + assert ts.is_defined("sampling_rate") tsds = ds_resampler.resample(ts) # Note plots of this output show the auto antialiasing works as # advertised in scipy assert np.isclose(tsds.dt, 0.2) + assert np.isclose(tsds["sampling_rate"], 5.0) and tsds.elog.get_error_log()[0].algorithm == "resample" and tsds.elog.get_error_log()[0].message == updateSamplingRateMessage assert tsds.npts == int(ts_npts * 5.0 / 100.0) # Repeat same downsampling with decimate ts = TimeSeries(ts0) decimator = ScipyDecimator(5.0) + assert ts.is_defined("sampling_rate") tsds = decimator.resample(ts) assert np.isclose(tsds.dt, 0.2) + assert np.isclose(tsds["sampling_rate"], 5.0) and tsds.elog.get_error_log()[0].algorithm == "resample" and tsds.elog.get_error_log()[0].message == updateSamplingRateMessage # the documentation doesn't tell me why by the scipy decimate # function seems to round npts up rather than use int assert tsds.npts == int(ts_npts * 5.0 / 100.0) + 1 seis = get_live_seismogram() seis0 = Seismogram(seis) + assert seis.is_defined("sampling_rate") seis = upsampler.resample(seis) assert np.isclose(seis.dt, 0.004) + assert np.isclose(seis["sampling_rate"], 250.0) and seis.elog.get_error_log()[0].algorithm == "resample" and seis.elog.get_error_log()[0].message == updateSamplingRateMessage npup = int(seis0.npts * 250.0 / 20.0) assert seis.npts == npup seis = Seismogram(seis0) + assert seis.is_defined("sampling_rate") seis = ds_resampler.resample(seis) assert np.isclose(seis.dt, 0.2) + assert np.isclose(seis["sampling_rate"], 5.0) and seis.elog.get_error_log()[0].algorithm == "resample" and seis.elog.get_error_log()[0].message == updateSamplingRateMessage assert seis.npts == int(ts_npts * 5.0 / 20.0) seis = Seismogram(seis0) + assert seis.is_defined("sampling_rate") seis = decimator.resample(seis) # again the round issue noted above dec_npts = int(seis0.npts * 5.0 / 20.0) + 1 assert np.isclose(seis.dt, 0.2) + assert np.isclose(seis["sampling_rate"], 5.0) and seis.elog.get_error_log()[0].algorithm == "resample" and seis.elog.get_error_log()[0].message == updateSamplingRateMessage assert seis.npts == dec_npts tse = get_live_timeseries_ensemble(5) tse.set_live() tse0 = TimeSeriesEnsemble(tse) + assert tse.member[0].is_defined("sampling_rate") tse = upsampler.resample(tse) npup = int(tse0.member[0].npts * 250.0 / 20.0) for d in tse.member: assert d.live assert np.isclose(d.dt, 0.004) + assert np.isclose(d["sampling_rate"], 250.0) and d.elog.get_error_log()[0].algorithm == "resample" and d.elog.get_error_log()[0].message == updateSamplingRateMessage assert d.npts == npup tse = TimeSeriesEnsemble(tse0) + assert tse.member[0].is_defined("sampling_rate") tse = ds_resampler.resample(tse) npup = int(tse0.member[0].npts * 5.0 / 20.0) for d in tse.member: assert d.live assert np.isclose(d.dt, 0.2) + assert np.isclose(d["sampling_rate"], 5.0) and d.elog.get_error_log()[0].algorithm == "resample" and d.elog.get_error_log()[0].message == updateSamplingRateMessage assert d.npts == npup tse = TimeSeriesEnsemble(tse0) @@ -91,78 +123,98 @@ def test_resample(): for d in tse.member: assert d.live assert np.isclose(d.dt, 0.2) + assert np.isclose(d["sampling_rate"], 5.0) and d.elog.get_error_log()[0].algorithm == "resample" and d.elog.get_error_log()[0].message == updateSamplingRateMessage assert d.npts == npup seis_e = get_live_seismogram_ensemble(3) seis_e0 = SeismogramEnsemble(seis_e) + assert seis_e.member[0].is_defined("sampling_rate") seis_e = upsampler.resample(seis_e) npup = int(seis_e0.member[0].npts * 250.0 / 20.0) for d in seis_e.member: assert d.live assert np.isclose(d.dt, 0.004) + assert np.isclose(d["sampling_rate"], 250.0) and d.elog.get_error_log()[0].algorithm == "resample" and d.elog.get_error_log()[0].message == updateSamplingRateMessage assert d.npts == npup seis_e = SeismogramEnsemble(seis_e0) + assert seis_e.member[0].is_defined("sampling_rate") seis_e = ds_resampler.resample(seis_e) npup = int(seis_e0.member[0].npts * 5.0 / 20.0) for d in seis_e.member: assert d.live assert np.isclose(d.dt, 0.2) + assert np.isclose(d["sampling_rate"], 5.0) and d.elog.get_error_log()[0].algorithm == "resample" and d.elog.get_error_log()[0].message == updateSamplingRateMessage assert d.npts == npup seis_e = SeismogramEnsemble(seis_e0) + assert seis_e.member[0].is_defined("sampling_rate") seis_e = decimator.resample(seis_e) npup = int(seis_e0.member[0].npts * 5.0 / 20.0) + 1 for d in seis_e.member: assert d.live assert np.isclose(d.dt, 0.2) + assert np.isclose(d["sampling_rate"], 5.0) and d.elog.get_error_log()[0].algorithm == "resample" and d.elog.get_error_log()[0].message == updateSamplingRateMessage assert d.npts == npup # Now test resample function. We define two operators # for 40 sps target resample40 = ScipyResampler(40.0) decimate40 = ScipyDecimator(40.0) - + assert ts.is_defined("sampling_rate") d = resample(ts, decimate40, resample40) assert d.dt == 0.025 + assert d["sampling_rate"] == 40.0 and d.elog.get_error_log()[0].algorithm == "resample" and d.elog.get_error_log()[0].message == updateSamplingRateMessage assert d.live assert d.npts == 104 # print(d.dt,d.live,d.npts) + assert ts0.is_defined("sampling_rate") d = resample(ts0, decimate40, resample40) # print(d.dt,d.live,d.npts) assert d.dt == 0.025 + assert d["sampling_rate"] == 40.0 and d.elog.get_error_log()[0].algorithm == "resample" and d.elog.get_error_log()[0].message == updateSamplingRateMessage assert d.live assert d.npts == 101 + assert tse0.member[0].is_defined("sampling_rate") d = resample(tse0, decimate40, resample40) # print("tse0") for d in tse0.member: # print(d.dt,d.live,d.npts) assert d.dt == 0.025 + assert d["sampling_rate"] == 40.0 and d.elog.get_error_log()[0].algorithm == "resample" and d.elog.get_error_log()[0].message == updateSamplingRateMessage assert d.live assert d.npts == 510 + assert tse.member[0].is_defined("sampling_rate") d = resample(tse, decimate40, resample40) # print('tse') for d in tse0.member: # print(d.dt,d.live,d.npts) assert d.dt == 0.025 + assert d["sampling_rate"] == 40.0 and d.elog.get_error_log()[0].algorithm == "resample" and d.elog.get_error_log()[0].message == updateSamplingRateMessage assert d.live assert d.npts == 510 + assert seis.is_defined("sampling_rate") d = resample(seis, decimate40, resample40) # print(d.dt,d.live,d.npts) + assert seis0.is_defined("sampling_rate") d = resample(seis0, decimate40, resample40) # print(d.dt,d.live,d.npts) + assert seis_e.member[0].is_defined("sampling_rate") d = resample(seis_e, decimate40, resample40) # print('seis_e') for d in seis_e.member: # print(d.dt,d.live,d.npts) assert d.dt == 0.025 + assert d["sampling_rate"] == 40.0 and d.elog.get_error_log()[0].algorithm == "resample" and d.elog.get_error_log()[0].message == updateSamplingRateMessage assert d.live assert d.npts == 512 + assert seis_e0.member[0].is_defined("sampling_rate") d = resample(seis_e0, decimate40, resample40) # print('seis_e0') for d in seis_e0.member: # print(d.dt,d.live,d.npts) assert d.dt == 0.025 + assert d["sampling_rate"] == 40.0 and d.elog.get_error_log()[0].algorithm == "resample" and d.elog.get_error_log()[0].message == updateSamplingRateMessage assert d.live assert d.npts == 510 diff --git a/python/tests/util/test_converter.py b/python/tests/util/test_converter.py index 9bb4a1589..17f4141f2 100644 --- a/python/tests/util/test_converter.py +++ b/python/tests/util/test_converter.py @@ -14,10 +14,11 @@ from unittest import mock with mock.patch.dict( - sys.modules, {"pyspark": None, "dask": None, "dask.dataframe": None} + sys.modules, {"pyspark": None, "dask": None, "dask.dataframe": None} ): from mspasspy.util.converter import Textfile2Dataframe + def test_Textfile2Dataframe_no_parallel(): pf = AntelopePf("python/tests/data/test_import.pf") attributes = Pf2AttributeNameTbl(pf, tag="wfprocess") @@ -65,7 +66,6 @@ def test_Textfile2Dataframe_no_parallel(): textfile, header_line=0, parallel=p, insert_column={"test_col": 1} ) - from mspasspy.ccore.utility import dmatrix, Metadata, AntelopePf, MsPASSError from mspasspy.ccore.seismic import DoubleVector, Seismogram, TimeSeries from mspasspy.util.converter import ( @@ -193,6 +193,7 @@ def test_Metadata2dict(): def test_TimeSeries2Trace(): + updateSamplingRateMessage = "sampling_rate inconsistent with 1/dt; updating to 1/dt" tr = TimeSeries2Trace(test_TimeSeries2Trace.ts1) assert tr.stats["delta"] == test_TimeSeries2Trace.ts1.dt assert tr.stats["sampling_rate"] == 1.0 / test_TimeSeries2Trace.ts1.dt @@ -202,6 +203,65 @@ def test_TimeSeries2Trace(): assert tr.stats["net"] == test_TimeSeries2Trace.ts1.get("net") assert tr.stats["npts"] == test_TimeSeries2Trace.ts1.get("npts") assert tr.stats["sampling_rate"] == test_TimeSeries2Trace.ts1.get("sampling_rate") + # error log should be empty or the message should not be updateSamplingRateMessage because the sampling_rate is defined and correct + assert test_TimeSeries2Trace.ts1.elog.size() == 0 or not test_TimeSeries2Trace.ts1.elog.get_error_log()[ + 0].message != updateSamplingRateMessage + # test for case when "sampling_rate" is not defined in ts1 + # create a new copy of ts1 without "sampling_rate" defined + ts_size = 255 + ts1_copy = TimeSeries() + ts1_copy.data = DoubleVector(np.random.rand(ts_size)) + ts1_copy.live = True + ts1_copy.dt = test_TimeSeries2Trace.ts1.dt + ts1_copy.t0 = 0 + ts1_copy.npts = ts_size + ts1_copy.put("net", "IU") + ts1_copy.put("npts", ts_size) + assert not ts1_copy.is_defined("sampling_rate") + tr = TimeSeries2Trace(ts1_copy) + # same test as former case with defined "sampling_rate" + assert tr.stats["delta"] == ts1_copy.dt + assert tr.stats["sampling_rate"] == 1.0 / ts1_copy.dt + assert tr.stats["npts"] == ts1_copy.npts + assert tr.stats["starttime"] == obspy.core.UTCDateTime(ts1_copy.t0) + + assert tr.stats["net"] == ts1_copy.get("net") + assert tr.stats["npts"] == ts1_copy.get("npts") + assert tr.stats["sampling_rate"] == ts1_copy.get("sampling_rate") + # error log should be empty or the message should not be updateSamplingRateMessage because the sampling_rate is not defined + assert ts1_copy.elog.size() == 0 or not ts1_copy.elog.get_error_log()[0].message != updateSamplingRateMessage + + # test for "sampling_rate" of ts1_copy + assert ts1_copy.is_defined("sampling_rate") + assert ts1_copy.get("sampling_rate") == 1.0 / ts1_copy.dt + + # test for case when "sampling_rate" is defined wrongly in ts1 + # create a new copy of ts1 with "sampling_rate" defined wrongly(a negative value) + ts_size = 255 + sampling_rate = -20.0 + ts1_copy = TimeSeries() + ts1_copy.data = DoubleVector(np.random.rand(ts_size)) + ts1_copy.live = True + ts1_copy.dt = test_TimeSeries2Trace.ts1.dt + ts1_copy.t0 = 0 + ts1_copy.npts = ts_size + ts1_copy.put("net", "IU") + ts1_copy.put("npts", ts_size) + ts1_copy.put("sampling_rate", sampling_rate) + assert ts1_copy.is_defined("sampling_rate") + tr = TimeSeries2Trace(ts1_copy) + # same test as former case with defined "sampling_rate" + assert tr.stats["delta"] == ts1_copy.dt + assert tr.stats["sampling_rate"] == 1.0 / ts1_copy.dt + assert tr.stats["npts"] == ts1_copy.npts + assert tr.stats["starttime"] == obspy.core.UTCDateTime(ts1_copy.t0) + + assert tr.stats["net"] == ts1_copy.get("net") + assert tr.stats["npts"] == ts1_copy.get("npts") + assert tr.stats["sampling_rate"] == ts1_copy.get("sampling_rate") + # message of error log should be updateSamplingRateMessage because the sampling_rate is defined wrongly and need to be updated + assert ts1_copy.elog.get_error_log()[0].algorithm == "TimeSeries2Trace" and ts1_copy.elog.get_error_log()[ + 0].message == updateSamplingRateMessage def test_Trace2TimeSeries():