Skip to content

Commit

Permalink
Merge branch 'develop' into feature/NickE_gpsro_bufr
Browse files Browse the repository at this point in the history
  • Loading branch information
CoryMartin-NOAA authored Nov 14, 2023
2 parents be29743 + 03fb9d5 commit e71d6b4
Show file tree
Hide file tree
Showing 6 changed files with 190 additions and 59 deletions.
139 changes: 82 additions & 57 deletions utils/obsproc/Ghrsst2Ioda.h
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

#include <iostream>
#include <netcdf> // NOLINT (using C API)
#include <regex>
#include <string>
#include <vector>

Expand All @@ -28,10 +29,21 @@ namespace gdasapp {
gdasapp::IodaVars providerToIodaVars(const std::string fileName) final {
oops::Log::info() << "Processing files provided by GHRSST" << std::endl;

// Get the sst bounds from the configuration
std::string sstUnits;
fullConfig_.get("bounds.units", sstUnits);
float sstMin;
fullConfig_.get("bounds.min", sstMin);
float sstMax;
fullConfig_.get("bounds.max", sstMax);
if ( sstUnits == "C" ) {
sstMin += 273.15;
sstMax += 273.15;
}

// Open the NetCDF file in read-only mode
netCDF::NcFile ncFile(fileName, netCDF::NcFile::read);
oops::Log::info() << "Reading... " << fileName << std::endl;

oops::Log::test() << "Reading " << fileName << std::endl;
// Get number of obs
int dimLon = ncFile.getDim("lon").getSize();
int dimLat = ncFile.getDim("lat").getSize();
Expand All @@ -56,65 +68,64 @@ namespace gdasapp {
}
}

// datetime: Read Reference Time
std::vector<int> refTime(dimTime);
// Read the reference time
std::vector<int32_t> refTime(dimTime);
ncFile.getVar("time").getVar(refTime.data());
std::string refDate;
ncFile.getVar("time").getAtt("units").getValues(refDate);

// Reformat the reference time
std::regex dateRegex(R"(\b\d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2}\b)");
std::smatch match;
std::regex_search(refDate, match, dateRegex);
refDate = match.str();
std::tm tmStruct = {};
std::istringstream ss(refDate);
ss >> std::get_time(&tmStruct, "%Y-%m-%d %H:%M:%S");
std::ostringstream isoFormatted;
isoFormatted << std::put_time(&tmStruct, "seconds since %Y-%m-%dT%H:%M:%SZ");
refDate = isoFormatted.str();

// Read sst_dtime to add to the reference time
int sstdTime[dimTime][dimLat][dimLon]; // NOLINT
ncFile.getVar("sst_dtime").getVar(sstdTime);
float dtimeOffSet;
ncFile.getVar("sst_dtime").getAtt("add_offset").getValues(&dtimeOffSet);
// TODO(AMG): What's below does not read the field the same way python does
std::vector<int32_t> sstdTime(dimTime*dimLat*dimLon);
ncFile.getVar("sst_dtime").getVar(sstdTime.data());
float dtimeScaleFactor;
ncFile.getVar("sst_dtime").getAtt("scale_factor").getValues(&dtimeScaleFactor);

oops::Log::info() << "--- sst_dtime: " << std::endl;

// Read SST obs Value, bias, Error and quality flag
// ObsValue
short sstObsVal[dimTime][dimLat][dimLon]; // NOLINT
ncFile.getVar("sea_surface_temperature").getVar(sstObsVal);
// Read SST ObsValue
std::vector<int16_t> sstObsVal(dimTime*dimLat*dimLon);
ncFile.getVar("sea_surface_temperature").getVar(sstObsVal.data());
float sstOffSet;
ncFile.getVar("sea_surface_temperature").getAtt("add_offset").getValues(&sstOffSet);
float sstScaleFactor;
ncFile.getVar("sea_surface_temperature").getAtt("scale_factor").getValues(&sstScaleFactor);

oops::Log::info() << "--- sst_ObsValue: " << std::endl;

// Bias
uint8_t sstObsBias[dimTime][dimLat][dimLon];
ncFile.getVar("sses_bias").getVar(sstObsBias);
float biasOffSet;
ncFile.getVar("sses_bias").getAtt("add_offset").getValues(&biasOffSet);
// Read SST Bias
std::vector<signed char> sstObsBias(dimTime*dimLat*dimLon);
ncFile.getVar("sses_bias").getVar(sstObsBias.data());
float biasScaleFactor;
ncFile.getVar("sses_bias").getAtt("scale_factor").getValues(&biasScaleFactor);

oops::Log::info() << "--- sst_bias: " << std::endl;

// Error
uint8_t sstObsErr[dimTime][dimLat][dimLon];
ncFile.getVar("sses_standard_deviation").getVar(sstObsErr);
// Read Error
std::vector<signed char> sstObsErr(dimTime*dimLat*dimLon);
ncFile.getVar("sses_standard_deviation").getVar(sstObsErr.data());
float errOffSet;
ncFile.getVar("sses_standard_deviation").getAtt("add_offset").getValues(&errOffSet);
float errScaleFactor;
ncFile.getVar("sses_standard_deviation").getAtt("scale_factor").getValues(&errScaleFactor);

oops::Log::info() << "--- sst_Error: " << std::endl;

// preQc
uint8_t sstPreQC[dimTime][dimLat][dimLon];
// Read preQc
signed char sstPreQC[dimTime][dimLat][dimLon];
ncFile.getVar("quality_level").getVar(sstPreQC);

oops::Log::info() << "--- sst_preQc: " << std::endl;

// Apply scaling/unit change and compute the necessary fields
std::vector<std::vector<int>> mask(dimLat, std::vector<int>(dimLon));
std::vector<std::vector<float>> sst(dimLat, std::vector<float>(dimLon));
std::vector<std::vector<float>> obserror(dimLat, std::vector<float>(dimLon));
std::vector<std::vector<int>> preqc(dimLat, std::vector<int>(dimLon));
std::vector<std::vector<float>> seconds(dimLat, std::vector<float>(dimLon));
size_t index = 0;
for (int i = 0; i < dimLat; i++) {
for (int j = 0; j < dimLon; j++) {
// provider's QC flag
Expand All @@ -124,51 +135,57 @@ namespace gdasapp {
preqc[i][j] = 5 - static_cast<int>(sstPreQC[0][i][j]);

// bias corrected sst, regressed to the drifter depth
sst[i][j] = (static_cast<float>(sstObsVal[0][i][j]) + sstOffSet) * sstScaleFactor
- (static_cast<float>(sstObsBias[0][i][j]) + biasOffSet) * biasScaleFactor;
sst[i][j] = static_cast<float>(sstObsVal[index]) * sstScaleFactor + sstOffSet
- static_cast<float>(sstObsBias[index]) * biasScaleFactor;

// mask
// TODO(Somebody): pass the QC flag theashold through the config.
// currently hard-coded to only use qc=5
if (sst[i][j] >= -3.0 && sst[i][j] <= 50.0 && preqc[i][j] ==0) {
if (sst[i][j] >= sstMin && sst[i][j] <= sstMax && preqc[i][j] ==0) {
mask[i][j] = 1;
} else {
mask[i][j] = 0;
}

// obs error
// TODO(Somebody): add sampled std. dev. of sst to the total obs error
obserror[i][j] = (static_cast<float>(sstObsErr[0][i][j]) + errOffSet) * errScaleFactor;
obserror[i][j] = static_cast<float>(sstObsErr[index]) * errScaleFactor + errOffSet;

// epoch time in seconds
seconds[i][j] = static_cast<int64_t>((sstdTime[0][i][j] + dtimeOffSet)
* dtimeScaleFactor)
+ static_cast<int64_t>(refTime[0]);
seconds[i][j] = static_cast<float>(sstdTime[index]) * dtimeScaleFactor
+ static_cast<float>(refTime[0]);
index++;
}
}

// TODO(Guillaume): check periodic BC, use sampling std dev of sst as a proxi for obs error
// should the sst mean be weighted by the provided obs error?
std::vector<std::vector<float>> lon2d_s =
gdasapp::superobutils::subsample2D(lon2d, mask, fullConfig_);
std::vector<std::vector<float>> lat2d_s =
gdasapp::superobutils::subsample2D(lat2d, mask, fullConfig_);
std::vector<std::vector<float>> sst_s =
gdasapp::superobutils::subsample2D(sst, mask, fullConfig_);
std::vector<std::vector<float>> obserror_s =
gdasapp::superobutils::subsample2D(obserror, mask, fullConfig_);
std::vector<std::vector<float>> seconds_s =
gdasapp::superobutils::subsample2D(seconds, mask, fullConfig_);
// Superobing
// TODO(Guillaume): Save the sampling std dev of sst so it can be used
// as a proxi for obs error
std::vector<std::vector<float>> sst_s;
std::vector<std::vector<float>> lon2d_s;
std::vector<std::vector<float>> lat2d_s;
std::vector<std::vector<float>> obserror_s;
std::vector<std::vector<float>> seconds_s;
if ( fullConfig_.has("binning") ) {
sst_s = gdasapp::superobutils::subsample2D(sst, mask, fullConfig_);
lon2d_s = gdasapp::superobutils::subsample2D(lon2d, mask, fullConfig_);
lat2d_s = gdasapp::superobutils::subsample2D(lat2d, mask, fullConfig_);
obserror_s = gdasapp::superobutils::subsample2D(obserror, mask, fullConfig_);
seconds_s = gdasapp::superobutils::subsample2D(seconds, mask, fullConfig_);
} else {
sst_s = sst;
lon2d_s = lon2d;
lat2d_s = lat2d;
obserror_s = obserror;
seconds_s = seconds;
}

// number of obs after subsampling
int nobs = sst_s.size() * sst_s[0].size();

// Create instance of iodaVars object
gdasapp::IodaVars iodaVars(nobs, {}, {});

// unix epoch at Jan 01 1981 00:00:00 GMT+0000
// Reference time is Jan 01 1981 00:00:00 GMT+0000
iodaVars.referenceDate_ = refDate;
oops::Log::info() << "--- time: " << iodaVars.referenceDate_ << std::endl;

// Store into eigen arrays
int loc(0);
Expand All @@ -184,11 +201,19 @@ namespace gdasapp {
}
}

// Remove
// Basic QC
Eigen::Array<bool, Eigen::Dynamic, 1> boundsCheck =
(iodaVars.obsVal_ > -3.0 && iodaVars.obsVal_ < 50.0);
(iodaVars.obsVal_ > sstMin && iodaVars.obsVal_ < sstMax);
iodaVars.trim(boundsCheck);

// Replace datime by its mean
// TODO(ASGM): Remove when the time reading is fixed
int64_t mean = iodaVars.datetime_.sum() / iodaVars.datetime_.size();
iodaVars.datetime_.setConstant(mean);

// Test output
iodaVars.testOutput();

return iodaVars;
};
}; // class Ghrsst2Ioda
Expand Down
37 changes: 37 additions & 0 deletions utils/obsproc/NetCDFToIodaConverter.h
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,26 @@
#include "oops/util/missingValues.h"

namespace gdasapp {
namespace testutils {
template <typename Derived>
std::string checksum(const Eigen::ArrayBase<Derived>& arr, const std::string varname) {
std::stringstream result;
if (arr.size() == 0) {
result << varname << " is empty" << "\n";
} else {
auto minElement = arr.minCoeff();
auto maxElement = arr.maxCoeff();
auto sumElements = arr.sum();

result << varname << ":" << "\n";
result << " Min: " << minElement << "\n";
result << " Max: " << maxElement << "\n";
result << " Sum: " << sumElements;
}
return result.str();
}
} // namespace testutils

// A simple data structure to organize the info to provide to the ioda
// writter
struct IodaVars {
Expand Down Expand Up @@ -135,6 +155,23 @@ namespace gdasapp {
location_ = iodaVarsMasked.location_;
oops::Log::info() << "IodaVars::IodaVars done masking." << std::endl;
}

// Testing
void testOutput() {
oops::Log::test() << referenceDate_ << std::endl;
oops::Log::test() <<
gdasapp::testutils::checksum(obsVal_, "obsVal") << std::endl;
oops::Log::test() <<
gdasapp::testutils::checksum(obsError_, "obsError") << std::endl;
oops::Log::test() <<
gdasapp::testutils::checksum(preQc_, "preQc") << std::endl;
oops::Log::test() <<
gdasapp::testutils::checksum(longitude_, "longitude") << std::endl;
oops::Log::test() <<
gdasapp::testutils::checksum(latitude_, "latitude") << std::endl;
oops::Log::test() <<
gdasapp::testutils::checksum(datetime_, "datetime") << std::endl;
}
};

// Base class for the converters
Expand Down
2 changes: 1 addition & 1 deletion utils/obsproc/superob.h
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,7 @@ namespace gdasapp {
}

// Calculate the average and store it in the subsampled array
if ( count < 1 ) {
if ( count < minNumObs ) {
subsampled[i][j] = static_cast<T>(-9999);
} else {
subsampled[i][j] = sum / static_cast<T>(count);
Expand Down
8 changes: 8 additions & 0 deletions utils/test/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -8,8 +8,16 @@ list( APPEND utils_test_input
testinput/gdas_icecamsr2ioda.yaml
)

set( gdas_utils_test_ref
testref/ghrsst2ioda.test
)

file(MAKE_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/testinput)
file(MAKE_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/testrun)
file(MAKE_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/obsproc/testref)
file(MAKE_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/obsproc/testoutput)

CREATE_SYMLINK( ${CMAKE_CURRENT_SOURCE_DIR} ${CMAKE_CURRENT_BINARY_DIR}/obsproc ${gdas_utils_test_ref} )
CREATE_SYMLINK( ${CMAKE_CURRENT_SOURCE_DIR} ${CMAKE_CURRENT_BINARY_DIR} ${utils_test_input} )

# copy the cpp linter script
Expand Down
11 changes: 10 additions & 1 deletion utils/test/testinput/gdas_ghrsst2ioda.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,17 @@ window end: 2018-04-15T12:00:00Z
variable: seaSurfaceTemperature
binning:
stride: 2
min number of obs: 5
min number of obs: 1
bounds:
units: C
min: -3.0
max: 50.0
output file: ghrsst_sst_mb_20210701.ioda.nc
input files:
- ghrsst_sst_mb_202107010000.nc4
- ghrsst_sst_mb_202107010100.nc4

test:
reference filename: testref/ghrsst2ioda.test
test output filename: testoutput/ghrsst2ioda.test
float relative tolerance: 1e-6
52 changes: 52 additions & 0 deletions utils/test/testref/ghrsst2ioda.test
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
Reading ghrsst_sst_mb_202107010000.nc4
seconds since 1981-01-01T00:00:00Z
obsVal:
Min: 276.708
Max: 276.9
Sum: 4982.63
obsError:
Min: 0.32
Max: 0.32
Sum: 5.76
preQc:
Min: 0
Max: 0
Sum: 0
longitude:
Min: 131.52
Max: 131.71
Sum: 2369.13
latitude:
Min: -55.5
Max: -55.42
Sum: -998.28
datetime:
Min: 1277942528
Max: 1277942528
Sum: 23002965504
Reading ghrsst_sst_mb_202107010100.nc4
seconds since 1981-01-01T00:00:00Z
obsVal:
Min: 281.547
Max: 281.714
Sum: 5069.06
obsError:
Min: 0.58
Max: 0.61
Sum: 10.8225
preQc:
Min: 0
Max: 0
Sum: 0
longitude:
Min: 151.12
Max: 151.31
Sum: 2721.93
latitude:
Min: 56.62
Max: 56.7
Sum: 1019.88
datetime:
Min: 1277946624
Max: 1277946624
Sum: 23003039232

0 comments on commit e71d6b4

Please sign in to comment.