Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

GHRSST bug fix ... an more #717

Merged
merged 12 commits into from
Nov 14, 2023
139 changes: 82 additions & 57 deletions utils/obsproc/Ghrsst2Ioda.h
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

#include <iostream>
#include <netcdf> // NOLINT (using C API)
#include <regex>
#include <string>
#include <vector>

Expand All @@ -28,10 +29,21 @@ namespace gdasapp {
gdasapp::IodaVars providerToIodaVars(const std::string fileName) final {
oops::Log::info() << "Processing files provided by GHRSST" << std::endl;

// Get the sst bounds from the configuration
std::string sstUnits;
fullConfig_.get("bounds.units", sstUnits);
float sstMin;
fullConfig_.get("bounds.min", sstMin);
float sstMax;
fullConfig_.get("bounds.max", sstMax);
if ( sstUnits == "C" ) {
guillaumevernieres marked this conversation as resolved.
Show resolved Hide resolved
sstMin += 273.15;
sstMax += 273.15;
}

// Open the NetCDF file in read-only mode
netCDF::NcFile ncFile(fileName, netCDF::NcFile::read);
oops::Log::info() << "Reading... " << fileName << std::endl;

oops::Log::test() << "Reading " << fileName << std::endl;
// Get number of obs
int dimLon = ncFile.getDim("lon").getSize();
int dimLat = ncFile.getDim("lat").getSize();
Expand All @@ -56,65 +68,64 @@ namespace gdasapp {
}
}

// datetime: Read Reference Time
std::vector<int> refTime(dimTime);
// Read the reference time
std::vector<int32_t> refTime(dimTime);
ncFile.getVar("time").getVar(refTime.data());
std::string refDate;
ncFile.getVar("time").getAtt("units").getValues(refDate);

// Reformat the reference time
std::regex dateRegex(R"(\b\d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2}\b)");
std::smatch match;
std::regex_search(refDate, match, dateRegex);
refDate = match.str();
std::tm tmStruct = {};
std::istringstream ss(refDate);
ss >> std::get_time(&tmStruct, "%Y-%m-%d %H:%M:%S");
std::ostringstream isoFormatted;
isoFormatted << std::put_time(&tmStruct, "seconds since %Y-%m-%dT%H:%M:%SZ");
refDate = isoFormatted.str();

// Read sst_dtime to add to the reference time
int sstdTime[dimTime][dimLat][dimLon]; // NOLINT
ncFile.getVar("sst_dtime").getVar(sstdTime);
float dtimeOffSet;
ncFile.getVar("sst_dtime").getAtt("add_offset").getValues(&dtimeOffSet);
// TODO(AMG): What's below does not read the field the same way python does
std::vector<int32_t> sstdTime(dimTime*dimLat*dimLon);
guillaumevernieres marked this conversation as resolved.
Show resolved Hide resolved
ncFile.getVar("sst_dtime").getVar(sstdTime.data());
float dtimeScaleFactor;
ncFile.getVar("sst_dtime").getAtt("scale_factor").getValues(&dtimeScaleFactor);

oops::Log::info() << "--- sst_dtime: " << std::endl;

// Read SST obs Value, bias, Error and quality flag
// ObsValue
short sstObsVal[dimTime][dimLat][dimLon]; // NOLINT
ncFile.getVar("sea_surface_temperature").getVar(sstObsVal);
// Read SST ObsValue
std::vector<int16_t> sstObsVal(dimTime*dimLat*dimLon);
ncFile.getVar("sea_surface_temperature").getVar(sstObsVal.data());
float sstOffSet;
ncFile.getVar("sea_surface_temperature").getAtt("add_offset").getValues(&sstOffSet);
float sstScaleFactor;
ncFile.getVar("sea_surface_temperature").getAtt("scale_factor").getValues(&sstScaleFactor);

oops::Log::info() << "--- sst_ObsValue: " << std::endl;

// Bias
uint8_t sstObsBias[dimTime][dimLat][dimLon];
ncFile.getVar("sses_bias").getVar(sstObsBias);
float biasOffSet;
ncFile.getVar("sses_bias").getAtt("add_offset").getValues(&biasOffSet);
// Read SST Bias
std::vector<signed char> sstObsBias(dimTime*dimLat*dimLon);
ncFile.getVar("sses_bias").getVar(sstObsBias.data());
float biasScaleFactor;
ncFile.getVar("sses_bias").getAtt("scale_factor").getValues(&biasScaleFactor);

oops::Log::info() << "--- sst_bias: " << std::endl;

// Error
uint8_t sstObsErr[dimTime][dimLat][dimLon];
ncFile.getVar("sses_standard_deviation").getVar(sstObsErr);
// Read Error
std::vector<signed char> sstObsErr(dimTime*dimLat*dimLon);
ncFile.getVar("sses_standard_deviation").getVar(sstObsErr.data());
float errOffSet;
ncFile.getVar("sses_standard_deviation").getAtt("add_offset").getValues(&errOffSet);
float errScaleFactor;
ncFile.getVar("sses_standard_deviation").getAtt("scale_factor").getValues(&errScaleFactor);

oops::Log::info() << "--- sst_Error: " << std::endl;

// preQc
uint8_t sstPreQC[dimTime][dimLat][dimLon];
// Read preQc
signed char sstPreQC[dimTime][dimLat][dimLon];
ncFile.getVar("quality_level").getVar(sstPreQC);

oops::Log::info() << "--- sst_preQc: " << std::endl;

// Apply scaling/unit change and compute the necessary fields
std::vector<std::vector<int>> mask(dimLat, std::vector<int>(dimLon));
std::vector<std::vector<float>> sst(dimLat, std::vector<float>(dimLon));
std::vector<std::vector<float>> obserror(dimLat, std::vector<float>(dimLon));
std::vector<std::vector<int>> preqc(dimLat, std::vector<int>(dimLon));
std::vector<std::vector<float>> seconds(dimLat, std::vector<float>(dimLon));
size_t index = 0;
for (int i = 0; i < dimLat; i++) {
for (int j = 0; j < dimLon; j++) {
// provider's QC flag
Expand All @@ -124,51 +135,57 @@ namespace gdasapp {
preqc[i][j] = 5 - static_cast<int>(sstPreQC[0][i][j]);

// bias corrected sst, regressed to the drifter depth
sst[i][j] = (static_cast<float>(sstObsVal[0][i][j]) + sstOffSet) * sstScaleFactor
- (static_cast<float>(sstObsBias[0][i][j]) + biasOffSet) * biasScaleFactor;
sst[i][j] = static_cast<float>(sstObsVal[index]) * sstScaleFactor + sstOffSet
- static_cast<float>(sstObsBias[index]) * biasScaleFactor;

// mask
// TODO(Somebody): pass the QC flag theashold through the config.
// currently hard-coded to only use qc=5
if (sst[i][j] >= -3.0 && sst[i][j] <= 50.0 && preqc[i][j] ==0) {
if (sst[i][j] >= sstMin && sst[i][j] <= sstMax && preqc[i][j] ==0) {
mask[i][j] = 1;
} else {
mask[i][j] = 0;
}

// obs error
// TODO(Somebody): add sampled std. dev. of sst to the total obs error
obserror[i][j] = (static_cast<float>(sstObsErr[0][i][j]) + errOffSet) * errScaleFactor;
obserror[i][j] = static_cast<float>(sstObsErr[index]) * errScaleFactor + errOffSet;

// epoch time in seconds
seconds[i][j] = static_cast<int64_t>((sstdTime[0][i][j] + dtimeOffSet)
* dtimeScaleFactor)
+ static_cast<int64_t>(refTime[0]);
seconds[i][j] = static_cast<float>(sstdTime[index]) * dtimeScaleFactor
+ static_cast<float>(refTime[0]);
index++;
}
}

// TODO(Guillaume): check periodic BC, use sampling std dev of sst as a proxi for obs error
// should the sst mean be weighted by the provided obs error?
std::vector<std::vector<float>> lon2d_s =
gdasapp::superobutils::subsample2D(lon2d, mask, fullConfig_);
std::vector<std::vector<float>> lat2d_s =
gdasapp::superobutils::subsample2D(lat2d, mask, fullConfig_);
std::vector<std::vector<float>> sst_s =
gdasapp::superobutils::subsample2D(sst, mask, fullConfig_);
std::vector<std::vector<float>> obserror_s =
gdasapp::superobutils::subsample2D(obserror, mask, fullConfig_);
std::vector<std::vector<float>> seconds_s =
gdasapp::superobutils::subsample2D(seconds, mask, fullConfig_);
// Superobing
// TODO(Guillaume): Save the sampling std dev of sst so it can be used
// as a proxi for obs error
std::vector<std::vector<float>> sst_s;
std::vector<std::vector<float>> lon2d_s;
std::vector<std::vector<float>> lat2d_s;
std::vector<std::vector<float>> obserror_s;
std::vector<std::vector<float>> seconds_s;
if ( fullConfig_.has("binning") ) {
sst_s = gdasapp::superobutils::subsample2D(sst, mask, fullConfig_);
lon2d_s = gdasapp::superobutils::subsample2D(lon2d, mask, fullConfig_);
lat2d_s = gdasapp::superobutils::subsample2D(lat2d, mask, fullConfig_);
obserror_s = gdasapp::superobutils::subsample2D(obserror, mask, fullConfig_);
seconds_s = gdasapp::superobutils::subsample2D(seconds, mask, fullConfig_);
} else {
sst_s = sst;
lon2d_s = lon2d;
lat2d_s = lat2d;
obserror_s = obserror;
seconds_s = seconds;
}

// number of obs after subsampling
int nobs = sst_s.size() * sst_s[0].size();

// Create instance of iodaVars object
gdasapp::IodaVars iodaVars(nobs, {}, {});

// unix epoch at Jan 01 1981 00:00:00 GMT+0000
// Reference time is Jan 01 1981 00:00:00 GMT+0000
iodaVars.referenceDate_ = refDate;
guillaumevernieres marked this conversation as resolved.
Show resolved Hide resolved
oops::Log::info() << "--- time: " << iodaVars.referenceDate_ << std::endl;

// Store into eigen arrays
int loc(0);
Expand All @@ -184,11 +201,19 @@ namespace gdasapp {
}
}

// Remove
// Basic QC
Eigen::Array<bool, Eigen::Dynamic, 1> boundsCheck =
(iodaVars.obsVal_ > -3.0 && iodaVars.obsVal_ < 50.0);
(iodaVars.obsVal_ > sstMin && iodaVars.obsVal_ < sstMax);
iodaVars.trim(boundsCheck);

// Replace datime by its mean
// TODO(ASGM): Remove when the time reading is fixed
int64_t mean = iodaVars.datetime_.sum() / iodaVars.datetime_.size();
iodaVars.datetime_.setConstant(mean);

// Test output
iodaVars.testOutput();

return iodaVars;
};
}; // class Ghrsst2Ioda
Expand Down
37 changes: 37 additions & 0 deletions utils/obsproc/NetCDFToIodaConverter.h
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,26 @@
#include "oops/util/missingValues.h"

namespace gdasapp {
namespace testutils {
template <typename Derived>
std::string checksum(const Eigen::ArrayBase<Derived>& arr, const std::string varname) {
std::stringstream result;
if (arr.size() == 0) {
result << varname << " is empty" << "\n";
} else {
auto minElement = arr.minCoeff();
auto maxElement = arr.maxCoeff();
auto sumElements = arr.sum();

result << varname << ":" << "\n";
result << " Min: " << minElement << "\n";
result << " Max: " << maxElement << "\n";
result << " Sum: " << sumElements;
}
return result.str();
}
} // namespace testutils

// A simple data structure to organize the info to provide to the ioda
// writter
struct IodaVars {
Expand Down Expand Up @@ -135,6 +155,23 @@ namespace gdasapp {
location_ = iodaVarsMasked.location_;
oops::Log::info() << "IodaVars::IodaVars done masking." << std::endl;
}

// Testing
void testOutput() {
oops::Log::test() << referenceDate_ << std::endl;
oops::Log::test() <<
gdasapp::testutils::checksum(obsVal_, "obsVal") << std::endl;
oops::Log::test() <<
gdasapp::testutils::checksum(obsError_, "obsError") << std::endl;
oops::Log::test() <<
gdasapp::testutils::checksum(preQc_, "preQc") << std::endl;
oops::Log::test() <<
gdasapp::testutils::checksum(longitude_, "longitude") << std::endl;
oops::Log::test() <<
gdasapp::testutils::checksum(latitude_, "latitude") << std::endl;
oops::Log::test() <<
gdasapp::testutils::checksum(datetime_, "datetime") << std::endl;
}
};

// Base class for the converters
Expand Down
2 changes: 1 addition & 1 deletion utils/obsproc/superob.h
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,7 @@ namespace gdasapp {
}

// Calculate the average and store it in the subsampled array
if ( count < 1 ) {
if ( count < minNumObs ) {
subsampled[i][j] = static_cast<T>(-9999);
} else {
subsampled[i][j] = sum / static_cast<T>(count);
Expand Down
8 changes: 8 additions & 0 deletions utils/test/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -8,8 +8,16 @@ list( APPEND utils_test_input
testinput/gdas_icecamsr2ioda.yaml
)

set( gdas_utils_test_ref
testref/ghrsst2ioda.test
)

file(MAKE_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/testinput)
file(MAKE_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/testrun)
file(MAKE_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/obsproc/testref)
file(MAKE_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/obsproc/testoutput)

CREATE_SYMLINK( ${CMAKE_CURRENT_SOURCE_DIR} ${CMAKE_CURRENT_BINARY_DIR}/obsproc ${gdas_utils_test_ref} )
CREATE_SYMLINK( ${CMAKE_CURRENT_SOURCE_DIR} ${CMAKE_CURRENT_BINARY_DIR} ${utils_test_input} )

# copy the cpp linter script
Expand Down
11 changes: 10 additions & 1 deletion utils/test/testinput/gdas_ghrsst2ioda.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,17 @@ window end: 2018-04-15T12:00:00Z
variable: seaSurfaceTemperature
binning:
stride: 2
min number of obs: 5
min number of obs: 1
bounds:
units: C
min: -3.0
max: 50.0
output file: ghrsst_sst_mb_20210701.ioda.nc
input files:
- ghrsst_sst_mb_202107010000.nc4
- ghrsst_sst_mb_202107010100.nc4

test:
reference filename: testref/ghrsst2ioda.test
test output filename: testoutput/ghrsst2ioda.test
float relative tolerance: 1e-6
52 changes: 52 additions & 0 deletions utils/test/testref/ghrsst2ioda.test
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
Reading ghrsst_sst_mb_202107010000.nc4
seconds since 1981-01-01T00:00:00Z
obsVal:
Min: 276.708
Max: 276.9
Sum: 4982.63
obsError:
Min: 0.32
Max: 0.32
Sum: 5.76
preQc:
Min: 0
Max: 0
Sum: 0
longitude:
Min: 131.52
Max: 131.71
Sum: 2369.13
latitude:
Min: -55.5
Max: -55.42
Sum: -998.28
datetime:
Min: 1277942528
Max: 1277942528
Sum: 23002965504
Reading ghrsst_sst_mb_202107010100.nc4
seconds since 1981-01-01T00:00:00Z
obsVal:
Min: 281.547
Max: 281.714
Sum: 5069.06
obsError:
Min: 0.58
Max: 0.61
Sum: 10.8225
preQc:
Min: 0
Max: 0
Sum: 0
longitude:
Min: 151.12
Max: 151.31
Sum: 2721.93
latitude:
Min: 56.62
Max: 56.7
Sum: 1019.88
datetime:
Min: 1277946624
Max: 1277946624
Sum: 23003039232
Loading