diff --git a/utils/obsproc/Ghrsst2Ioda.h b/utils/obsproc/Ghrsst2Ioda.h index 0b4b7ebfa..64b5ed28b 100644 --- a/utils/obsproc/Ghrsst2Ioda.h +++ b/utils/obsproc/Ghrsst2Ioda.h @@ -2,6 +2,7 @@ #include #include // NOLINT (using C API) +#include #include #include @@ -28,10 +29,21 @@ namespace gdasapp { gdasapp::IodaVars providerToIodaVars(const std::string fileName) final { oops::Log::info() << "Processing files provided by GHRSST" << std::endl; + // Get the sst bounds from the configuration + std::string sstUnits; + fullConfig_.get("bounds.units", sstUnits); + float sstMin; + fullConfig_.get("bounds.min", sstMin); + float sstMax; + fullConfig_.get("bounds.max", sstMax); + if ( sstUnits == "C" ) { + sstMin += 273.15; + sstMax += 273.15; + } + // Open the NetCDF file in read-only mode netCDF::NcFile ncFile(fileName, netCDF::NcFile::read); - oops::Log::info() << "Reading... " << fileName << std::endl; - + oops::Log::test() << "Reading " << fileName << std::endl; // Get number of obs int dimLon = ncFile.getDim("lon").getSize(); int dimLat = ncFile.getDim("lat").getSize(); @@ -56,65 +68,64 @@ namespace gdasapp { } } - // datetime: Read Reference Time - std::vector refTime(dimTime); + // Read the reference time + std::vector refTime(dimTime); ncFile.getVar("time").getVar(refTime.data()); std::string refDate; ncFile.getVar("time").getAtt("units").getValues(refDate); + // Reformat the reference time + std::regex dateRegex(R"(\b\d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2}\b)"); + std::smatch match; + std::regex_search(refDate, match, dateRegex); + refDate = match.str(); + std::tm tmStruct = {}; + std::istringstream ss(refDate); + ss >> std::get_time(&tmStruct, "%Y-%m-%d %H:%M:%S"); + std::ostringstream isoFormatted; + isoFormatted << std::put_time(&tmStruct, "seconds since %Y-%m-%dT%H:%M:%SZ"); + refDate = isoFormatted.str(); + // Read sst_dtime to add to the reference time - int sstdTime[dimTime][dimLat][dimLon]; // NOLINT - ncFile.getVar("sst_dtime").getVar(sstdTime); - float dtimeOffSet; - ncFile.getVar("sst_dtime").getAtt("add_offset").getValues(&dtimeOffSet); + // TODO(AMG): What's below does not read the field the same way python does + std::vector sstdTime(dimTime*dimLat*dimLon); + ncFile.getVar("sst_dtime").getVar(sstdTime.data()); float dtimeScaleFactor; ncFile.getVar("sst_dtime").getAtt("scale_factor").getValues(&dtimeScaleFactor); - oops::Log::info() << "--- sst_dtime: " << std::endl; - - // Read SST obs Value, bias, Error and quality flag - // ObsValue - short sstObsVal[dimTime][dimLat][dimLon]; // NOLINT - ncFile.getVar("sea_surface_temperature").getVar(sstObsVal); + // Read SST ObsValue + std::vector sstObsVal(dimTime*dimLat*dimLon); + ncFile.getVar("sea_surface_temperature").getVar(sstObsVal.data()); float sstOffSet; ncFile.getVar("sea_surface_temperature").getAtt("add_offset").getValues(&sstOffSet); float sstScaleFactor; ncFile.getVar("sea_surface_temperature").getAtt("scale_factor").getValues(&sstScaleFactor); - oops::Log::info() << "--- sst_ObsValue: " << std::endl; - - // Bias - uint8_t sstObsBias[dimTime][dimLat][dimLon]; - ncFile.getVar("sses_bias").getVar(sstObsBias); - float biasOffSet; - ncFile.getVar("sses_bias").getAtt("add_offset").getValues(&biasOffSet); + // Read SST Bias + std::vector sstObsBias(dimTime*dimLat*dimLon); + ncFile.getVar("sses_bias").getVar(sstObsBias.data()); float biasScaleFactor; ncFile.getVar("sses_bias").getAtt("scale_factor").getValues(&biasScaleFactor); - oops::Log::info() << "--- sst_bias: " << std::endl; - - // Error - uint8_t sstObsErr[dimTime][dimLat][dimLon]; - ncFile.getVar("sses_standard_deviation").getVar(sstObsErr); + // Read Error + std::vector sstObsErr(dimTime*dimLat*dimLon); + ncFile.getVar("sses_standard_deviation").getVar(sstObsErr.data()); float errOffSet; ncFile.getVar("sses_standard_deviation").getAtt("add_offset").getValues(&errOffSet); float errScaleFactor; ncFile.getVar("sses_standard_deviation").getAtt("scale_factor").getValues(&errScaleFactor); - oops::Log::info() << "--- sst_Error: " << std::endl; - - // preQc - uint8_t sstPreQC[dimTime][dimLat][dimLon]; + // Read preQc + signed char sstPreQC[dimTime][dimLat][dimLon]; ncFile.getVar("quality_level").getVar(sstPreQC); - oops::Log::info() << "--- sst_preQc: " << std::endl; - // Apply scaling/unit change and compute the necessary fields std::vector> mask(dimLat, std::vector(dimLon)); std::vector> sst(dimLat, std::vector(dimLon)); std::vector> obserror(dimLat, std::vector(dimLon)); std::vector> preqc(dimLat, std::vector(dimLon)); std::vector> seconds(dimLat, std::vector(dimLon)); + size_t index = 0; for (int i = 0; i < dimLat; i++) { for (int j = 0; j < dimLon; j++) { // provider's QC flag @@ -124,13 +135,11 @@ namespace gdasapp { preqc[i][j] = 5 - static_cast(sstPreQC[0][i][j]); // bias corrected sst, regressed to the drifter depth - sst[i][j] = (static_cast(sstObsVal[0][i][j]) + sstOffSet) * sstScaleFactor - - (static_cast(sstObsBias[0][i][j]) + biasOffSet) * biasScaleFactor; + sst[i][j] = static_cast(sstObsVal[index]) * sstScaleFactor + sstOffSet + - static_cast(sstObsBias[index]) * biasScaleFactor; // mask - // TODO(Somebody): pass the QC flag theashold through the config. - // currently hard-coded to only use qc=5 - if (sst[i][j] >= -3.0 && sst[i][j] <= 50.0 && preqc[i][j] ==0) { + if (sst[i][j] >= sstMin && sst[i][j] <= sstMax && preqc[i][j] ==0) { mask[i][j] = 1; } else { mask[i][j] = 0; @@ -138,27 +147,36 @@ namespace gdasapp { // obs error // TODO(Somebody): add sampled std. dev. of sst to the total obs error - obserror[i][j] = (static_cast(sstObsErr[0][i][j]) + errOffSet) * errScaleFactor; + obserror[i][j] = static_cast(sstObsErr[index]) * errScaleFactor + errOffSet; // epoch time in seconds - seconds[i][j] = static_cast((sstdTime[0][i][j] + dtimeOffSet) - * dtimeScaleFactor) - + static_cast(refTime[0]); + seconds[i][j] = static_cast(sstdTime[index]) * dtimeScaleFactor + + static_cast(refTime[0]); + index++; } } - // TODO(Guillaume): check periodic BC, use sampling std dev of sst as a proxi for obs error - // should the sst mean be weighted by the provided obs error? - std::vector> lon2d_s = - gdasapp::superobutils::subsample2D(lon2d, mask, fullConfig_); - std::vector> lat2d_s = - gdasapp::superobutils::subsample2D(lat2d, mask, fullConfig_); - std::vector> sst_s = - gdasapp::superobutils::subsample2D(sst, mask, fullConfig_); - std::vector> obserror_s = - gdasapp::superobutils::subsample2D(obserror, mask, fullConfig_); - std::vector> seconds_s = - gdasapp::superobutils::subsample2D(seconds, mask, fullConfig_); + // Superobing + // TODO(Guillaume): Save the sampling std dev of sst so it can be used + // as a proxi for obs error + std::vector> sst_s; + std::vector> lon2d_s; + std::vector> lat2d_s; + std::vector> obserror_s; + std::vector> seconds_s; + if ( fullConfig_.has("binning") ) { + sst_s = gdasapp::superobutils::subsample2D(sst, mask, fullConfig_); + lon2d_s = gdasapp::superobutils::subsample2D(lon2d, mask, fullConfig_); + lat2d_s = gdasapp::superobutils::subsample2D(lat2d, mask, fullConfig_); + obserror_s = gdasapp::superobutils::subsample2D(obserror, mask, fullConfig_); + seconds_s = gdasapp::superobutils::subsample2D(seconds, mask, fullConfig_); + } else { + sst_s = sst; + lon2d_s = lon2d; + lat2d_s = lat2d; + obserror_s = obserror; + seconds_s = seconds; + } // number of obs after subsampling int nobs = sst_s.size() * sst_s[0].size(); @@ -166,9 +184,8 @@ namespace gdasapp { // Create instance of iodaVars object gdasapp::IodaVars iodaVars(nobs, {}, {}); - // unix epoch at Jan 01 1981 00:00:00 GMT+0000 + // Reference time is Jan 01 1981 00:00:00 GMT+0000 iodaVars.referenceDate_ = refDate; - oops::Log::info() << "--- time: " << iodaVars.referenceDate_ << std::endl; // Store into eigen arrays int loc(0); @@ -184,11 +201,19 @@ namespace gdasapp { } } - // Remove + // Basic QC Eigen::Array boundsCheck = - (iodaVars.obsVal_ > -3.0 && iodaVars.obsVal_ < 50.0); + (iodaVars.obsVal_ > sstMin && iodaVars.obsVal_ < sstMax); iodaVars.trim(boundsCheck); + // Replace datime by its mean + // TODO(ASGM): Remove when the time reading is fixed + int64_t mean = iodaVars.datetime_.sum() / iodaVars.datetime_.size(); + iodaVars.datetime_.setConstant(mean); + + // Test output + iodaVars.testOutput(); + return iodaVars; }; }; // class Ghrsst2Ioda diff --git a/utils/obsproc/NetCDFToIodaConverter.h b/utils/obsproc/NetCDFToIodaConverter.h index 6942c1c0c..428e2f69d 100644 --- a/utils/obsproc/NetCDFToIodaConverter.h +++ b/utils/obsproc/NetCDFToIodaConverter.h @@ -18,6 +18,26 @@ #include "oops/util/missingValues.h" namespace gdasapp { + namespace testutils { + template + std::string checksum(const Eigen::ArrayBase& arr, const std::string varname) { + std::stringstream result; + if (arr.size() == 0) { + result << varname << " is empty" << "\n"; + } else { + auto minElement = arr.minCoeff(); + auto maxElement = arr.maxCoeff(); + auto sumElements = arr.sum(); + + result << varname << ":" << "\n"; + result << " Min: " << minElement << "\n"; + result << " Max: " << maxElement << "\n"; + result << " Sum: " << sumElements; + } + return result.str(); + } + } // namespace testutils + // A simple data structure to organize the info to provide to the ioda // writter struct IodaVars { @@ -135,6 +155,23 @@ namespace gdasapp { location_ = iodaVarsMasked.location_; oops::Log::info() << "IodaVars::IodaVars done masking." << std::endl; } + + // Testing + void testOutput() { + oops::Log::test() << referenceDate_ << std::endl; + oops::Log::test() << + gdasapp::testutils::checksum(obsVal_, "obsVal") << std::endl; + oops::Log::test() << + gdasapp::testutils::checksum(obsError_, "obsError") << std::endl; + oops::Log::test() << + gdasapp::testutils::checksum(preQc_, "preQc") << std::endl; + oops::Log::test() << + gdasapp::testutils::checksum(longitude_, "longitude") << std::endl; + oops::Log::test() << + gdasapp::testutils::checksum(latitude_, "latitude") << std::endl; + oops::Log::test() << + gdasapp::testutils::checksum(datetime_, "datetime") << std::endl; + } }; // Base class for the converters diff --git a/utils/obsproc/superob.h b/utils/obsproc/superob.h index 7ec7832f9..413527ea2 100644 --- a/utils/obsproc/superob.h +++ b/utils/obsproc/superob.h @@ -48,7 +48,7 @@ namespace gdasapp { } // Calculate the average and store it in the subsampled array - if ( count < 1 ) { + if ( count < minNumObs ) { subsampled[i][j] = static_cast(-9999); } else { subsampled[i][j] = sum / static_cast(count); diff --git a/utils/test/CMakeLists.txt b/utils/test/CMakeLists.txt index 7a6488a06..73a73385e 100644 --- a/utils/test/CMakeLists.txt +++ b/utils/test/CMakeLists.txt @@ -8,8 +8,16 @@ list( APPEND utils_test_input testinput/gdas_icecamsr2ioda.yaml ) +set( gdas_utils_test_ref + testref/ghrsst2ioda.test +) + file(MAKE_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/testinput) file(MAKE_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/testrun) +file(MAKE_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/obsproc/testref) +file(MAKE_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/obsproc/testoutput) + +CREATE_SYMLINK( ${CMAKE_CURRENT_SOURCE_DIR} ${CMAKE_CURRENT_BINARY_DIR}/obsproc ${gdas_utils_test_ref} ) CREATE_SYMLINK( ${CMAKE_CURRENT_SOURCE_DIR} ${CMAKE_CURRENT_BINARY_DIR} ${utils_test_input} ) # copy the cpp linter script diff --git a/utils/test/testinput/gdas_ghrsst2ioda.yaml b/utils/test/testinput/gdas_ghrsst2ioda.yaml index 747247a47..703f75a81 100644 --- a/utils/test/testinput/gdas_ghrsst2ioda.yaml +++ b/utils/test/testinput/gdas_ghrsst2ioda.yaml @@ -4,8 +4,17 @@ window end: 2018-04-15T12:00:00Z variable: seaSurfaceTemperature binning: stride: 2 - min number of obs: 5 + min number of obs: 1 +bounds: + units: C + min: -3.0 + max: 50.0 output file: ghrsst_sst_mb_20210701.ioda.nc input files: - ghrsst_sst_mb_202107010000.nc4 - ghrsst_sst_mb_202107010100.nc4 + +test: + reference filename: testref/ghrsst2ioda.test + test output filename: testoutput/ghrsst2ioda.test + float relative tolerance: 1e-6 diff --git a/utils/test/testref/ghrsst2ioda.test b/utils/test/testref/ghrsst2ioda.test new file mode 100644 index 000000000..c8e895e1e --- /dev/null +++ b/utils/test/testref/ghrsst2ioda.test @@ -0,0 +1,52 @@ +Reading ghrsst_sst_mb_202107010000.nc4 +seconds since 1981-01-01T00:00:00Z +obsVal: + Min: 276.708 + Max: 276.9 + Sum: 4982.63 +obsError: + Min: 0.32 + Max: 0.32 + Sum: 5.76 +preQc: + Min: 0 + Max: 0 + Sum: 0 +longitude: + Min: 131.52 + Max: 131.71 + Sum: 2369.13 +latitude: + Min: -55.5 + Max: -55.42 + Sum: -998.28 +datetime: + Min: 1277942528 + Max: 1277942528 + Sum: 23002965504 +Reading ghrsst_sst_mb_202107010100.nc4 +seconds since 1981-01-01T00:00:00Z +obsVal: + Min: 281.547 + Max: 281.714 + Sum: 5069.06 +obsError: + Min: 0.58 + Max: 0.61 + Sum: 10.8225 +preQc: + Min: 0 + Max: 0 + Sum: 0 +longitude: + Min: 151.12 + Max: 151.31 + Sum: 2721.93 +latitude: + Min: 56.62 + Max: 56.7 + Sum: 1019.88 +datetime: + Min: 1277946624 + Max: 1277946624 + Sum: 23003039232