diff --git a/src/fdb5/CMakeLists.txt b/src/fdb5/CMakeLists.txt index 062f210f5..118d6c4f1 100644 --- a/src/fdb5/CMakeLists.txt +++ b/src/fdb5/CMakeLists.txt @@ -39,7 +39,8 @@ list( APPEND fdb5_srcs api/helpers/FDBToolRequest.cc api/helpers/FDBToolRequest.h api/helpers/DumpIterator.h - api/helpers/ListIterator.cc + api/helpers/ListElement.cc + api/helpers/ListElement.h api/helpers/ListIterator.h api/helpers/LockIterator.h api/helpers/MoveIterator.h @@ -78,6 +79,8 @@ list( APPEND fdb5_srcs database/AxisRegistry.h database/BaseArchiveVisitor.cc database/BaseArchiveVisitor.h + database/BaseKey.cc + database/BaseKey.h database/Catalogue.cc database/Catalogue.h database/DatabaseNotFoundException.cc @@ -124,6 +127,7 @@ list( APPEND fdb5_srcs database/IndexFactory.h database/Key.cc database/Key.h + database/ReadVisitor.cc database/ReadVisitor.h database/Report.cc database/Report.h @@ -444,6 +448,7 @@ list( APPEND fdb5_tools fdb-schema fdb-where fdb-info + fdb-inspect fdb-status fdb-lock fdb-unlock diff --git a/src/fdb5/api/DistFDB.cc b/src/fdb5/api/DistFDB.cc index fbbe524b5..37b28a247 100644 --- a/src/fdb5/api/DistFDB.cc +++ b/src/fdb5/api/DistFDB.cc @@ -179,11 +179,12 @@ auto DistFDB::queryInternal(const FDBToolRequest& request, const QueryFN& fn) -> } -ListIterator DistFDB::list(const FDBToolRequest& request) { +ListIterator DistFDB::list(const FDBToolRequest& request, int level) { LOG_DEBUG_LIB(LibFdb5) << "DistFDB::list() : " << request << std::endl; return queryInternal(request, - [](FDB& fdb, const FDBToolRequest& request) { - return fdb.list(request); + [level](FDB& fdb, const FDBToolRequest& request) { + bool deduplicate = false; // never deduplicate on inner calls + return fdb.list(request, deduplicate, level); }); } diff --git a/src/fdb5/api/DistFDB.h b/src/fdb5/api/DistFDB.h index 43e177f3f..60f964352 100644 --- a/src/fdb5/api/DistFDB.h +++ b/src/fdb5/api/DistFDB.h @@ -45,7 +45,7 @@ class DistFDB : public FDBBase { ListIterator inspect(const metkit::mars::MarsRequest& request) override; - ListIterator list(const FDBToolRequest& request) override; + ListIterator list(const FDBToolRequest& request, int level) override; AxesIterator axesIterator(const FDBToolRequest& request, int level=3) override { NOTIMP; } diff --git a/src/fdb5/api/FDB.cc b/src/fdb5/api/FDB.cc index 2720f46fa..c28513c5a 100644 --- a/src/fdb5/api/FDB.cc +++ b/src/fdb5/api/FDB.cc @@ -13,10 +13,17 @@ * (Project ID: 671951) www.nextgenio.eu */ +#include +#include +#include +#include + #include "eckit/config/Resource.h" +#include "eckit/exception/Exceptions.h" #include "eckit/io/DataHandle.h" #include "eckit/io/MemoryHandle.h" #include "eckit/log/Log.h" +#include "eckit/log/Timer.h" #include "eckit/message/Message.h" #include "eckit/message/Reader.h" @@ -26,6 +33,9 @@ #include "fdb5/api/FDB.h" #include "fdb5/api/FDBFactory.h" #include "fdb5/api/helpers/FDBToolRequest.h" +#include "fdb5/api/helpers/ListElement.h" +#include "fdb5/api/helpers/ListIterator.h" +#include "fdb5/database/FieldLocation.h" #include "fdb5/database/Key.h" #include "fdb5/io/HandleGatherer.h" #include "fdb5/message/MessageDecoder.h" @@ -110,12 +120,11 @@ void FDB::archive(const Key& key, const void* data, size_t length) { Key keyInternal(key); // step in archival requests from the model is just an integer. We need to include the stepunit - auto stepunit = keyInternal.find("stepunits"); - if (stepunit != keyInternal.end()) { - if (stepunit->second.size()>0 && static_cast(tolower(stepunit->second[0])) != 'h') { - auto step = keyInternal.find("step"); - if (step != keyInternal.end()) { - std::string canonicalStep = config().schema().registry().lookupType("step").toKey(step->second + static_cast(tolower(stepunit->second[0]))); + if (const auto [stepunit, found] = keyInternal.find("stepunits"); found) { + if (stepunit->second.size() > 0 && static_cast(tolower(stepunit->second[0])) != 'h') { + if (auto [step, foundStep] = keyInternal.find("step"); foundStep) { + std::string canonicalStep = config().schema().registry().lookupType("step").toKey( + step->second + static_cast(tolower(stepunit->second[0]))); keyInternal.set("step", canonicalStep); } } @@ -180,7 +189,7 @@ eckit::DataHandle* FDB::read(ListIterator& it, bool sorted) { if (it.next(el)) { // build the request representing the tensor-product of all retrieved fields metkit::mars::MarsRequest cubeRequest = el.combinedKey().request(); - std::vector elements{el}; + std::vector elements {el}; while (it.next(el)) { cubeRequest.merge(el.combinedKey().request()); @@ -188,11 +197,9 @@ eckit::DataHandle* FDB::read(ListIterator& it, bool sorted) { } // checking all retrieved fields against the hypercube, to remove duplicates - ListElementDeduplicator dedup; - metkit::hypercube::HyperCubePayloaded cube(cubeRequest, dedup); - for(auto el: elements) { - cube.add(el.combinedKey().request(), el); - } + ListElementDeduplicator deduplicator; + metkit::hypercube::HyperCubePayloaded cube(cubeRequest, deduplicator); + for (const auto& elem : elements) { cube.add(elem.combinedKey().request(), el); } if (cube.countVacant() > 0) { std::stringstream ss; @@ -203,7 +210,7 @@ eckit::DataHandle* FDB::read(ListIterator& it, bool sorted) { eckit::Log::warning() << ss.str() << std::endl; } - for (size_t i=0; i< cube.size(); i++) { + for (std::size_t i = 0; i < cube.size(); i++) { ListElement element; if (cube.find(i, element)) { result.add(element.location().dataHandle()); @@ -228,8 +235,8 @@ ListIterator FDB::inspect(const metkit::mars::MarsRequest& request) { return internal_->inspect(request); } -ListIterator FDB::list(const FDBToolRequest& request, bool deduplicate) { - return ListIterator(internal_->list(request), deduplicate); +ListIterator FDB::list(const FDBToolRequest& request, const bool deduplicate, const int level) { + return {internal_->list(request, level), deduplicate}; } DumpIterator FDB::dump(const FDBToolRequest& request, bool simple) { diff --git a/src/fdb5/api/FDB.h b/src/fdb5/api/FDB.h index 28529795c..9d66f69ac 100644 --- a/src/fdb5/api/FDB.h +++ b/src/fdb5/api/FDB.h @@ -21,6 +21,7 @@ #include #include +#include #include "eckit/distributed/Transport.h" @@ -95,7 +96,7 @@ class FDB { ListIterator inspect(const metkit::mars::MarsRequest& request); - ListIterator list(const FDBToolRequest& request, bool deduplicate=false); + ListIterator list(const FDBToolRequest& request, bool deduplicate=false, int level=3); DumpIterator dump(const FDBToolRequest& request, bool simple=false); diff --git a/src/fdb5/api/FDBFactory.h b/src/fdb5/api/FDBFactory.h index 6661a475e..7d7ae2c3e 100644 --- a/src/fdb5/api/FDBFactory.h +++ b/src/fdb5/api/FDBFactory.h @@ -75,7 +75,7 @@ class FDBBase : private eckit::NonCopyable { virtual ListIterator inspect(const metkit::mars::MarsRequest& request) = 0; - virtual ListIterator list(const FDBToolRequest& request) = 0; + virtual ListIterator list(const FDBToolRequest& request, int level) = 0; virtual DumpIterator dump(const FDBToolRequest& request, bool simple) = 0; diff --git a/src/fdb5/api/LocalFDB.cc b/src/fdb5/api/LocalFDB.cc index 8c2d680a7..97700bf1d 100644 --- a/src/fdb5/api/LocalFDB.cc +++ b/src/fdb5/api/LocalFDB.cc @@ -15,7 +15,6 @@ #include "eckit/container/Queue.h" #include "eckit/log/Log.h" -#include "eckit/message/Message.h" #include "fdb5/api/helpers/ListIterator.h" #include "fdb5/api/helpers/FDBToolRequest.h" @@ -34,7 +33,6 @@ #include "fdb5/api/local/DumpVisitor.h" #include "fdb5/api/local/ListVisitor.h" #include "fdb5/api/local/PurgeVisitor.h" -#include "fdb5/api/local/QueryVisitor.h" #include "fdb5/api/local/StatsVisitor.h" #include "fdb5/api/local/StatusVisitor.h" #include "fdb5/api/local/WipeVisitor.h" @@ -83,9 +81,9 @@ APIIterator LocalFDB::queryInternal(const FDBTo return QueryIterator(new AsyncIterator(async_worker)); } -ListIterator LocalFDB::list(const FDBToolRequest& request) { +ListIterator LocalFDB::list(const FDBToolRequest& request, const int level) { LOG_DEBUG_LIB(LibFdb5) << "LocalFDB::list() : " << request << std::endl; - return queryInternal(request); + return queryInternal(request, level); } DumpIterator LocalFDB::dump(const FDBToolRequest &request, bool simple) { @@ -127,7 +125,7 @@ ControlIterator LocalFDB::control(const FDBToolRequest& request, AxesIterator LocalFDB::axesIterator(const FDBToolRequest& request, int level) { LOG_DEBUG_LIB(LibFdb5) << "LocalFDB::axesIterator() : " << request << std::endl; - return queryInternal(request, config_, level); + return queryInternal(request, level); } void LocalFDB::flush() { diff --git a/src/fdb5/api/LocalFDB.h b/src/fdb5/api/LocalFDB.h index 05c4d3c8f..f57ffdb82 100644 --- a/src/fdb5/api/LocalFDB.h +++ b/src/fdb5/api/LocalFDB.h @@ -41,7 +41,7 @@ class LocalFDB : public FDBBase { ListIterator inspect(const metkit::mars::MarsRequest& request) override; - ListIterator list(const FDBToolRequest& request) override; + ListIterator list(const FDBToolRequest& request, int level) override; DumpIterator dump(const FDBToolRequest& request, bool simple) override; diff --git a/src/fdb5/api/RemoteFDB.cc b/src/fdb5/api/RemoteFDB.cc index c2401657f..f486f3463 100644 --- a/src/fdb5/api/RemoteFDB.cc +++ b/src/fdb5/api/RemoteFDB.cc @@ -6,10 +6,11 @@ #include "eckit/serialisation/MemoryStream.h" #include "fdb5/api/helpers/FDBToolRequest.h" +#include "fdb5/LibFdb5.h" #include "fdb5/api/RemoteFDB.h" +#include "fdb5/api/helpers/ListElement.h" #include "fdb5/database/Archiver.h" #include "fdb5/database/Inspector.h" -#include "fdb5/LibFdb5.h" #include "fdb5/remote/client/ClientConnectionRouter.h" #include "fdb5/remote/RemoteFieldLocation.h" @@ -36,6 +37,8 @@ using StatsHelper = BaseAPIHelper { + ListHelper(const int depth) : depth_(depth) { } + static fdb5::ListElement valueFromStream(eckit::Stream& s, fdb5::RemoteFDB* fdb) { fdb5::ListElement elem(s); @@ -48,11 +51,16 @@ struct ListHelper : BaseAPIHelper remoteLocation = fdb5::remote::RemoteFieldLocation(fdb->storeEndpoint(fieldLocationEndpoint), static_cast(elem.location())).make_shared(); - return fdb5::ListElement(elem.key(), remoteLocation, elem.timestamp()); + return fdb5::ListElement(elem.keys(), remoteLocation, elem.timestamp()); } std::shared_ptr remoteLocation = fdb5::remote::RemoteFieldLocation(fdb->storeEndpoint(), elem.location()).make_shared(); - return fdb5::ListElement(elem.key(), remoteLocation, elem.timestamp()); + return fdb5::ListElement(elem.keys(), remoteLocation, elem.timestamp()); } + + void encodeExtra(eckit::Stream& s) const { s << depth_; } + +private: + int depth_ {3}; }; struct AxesHelper : BaseAPIHelper { @@ -78,10 +86,10 @@ struct InspectHelper : BaseAPIHelper remoteLocation = fdb5::remote::RemoteFieldLocation(fdb->storeEndpoint(fieldLocationEndpoint), static_cast(elem.location())).make_shared(); - return fdb5::ListElement(elem.key(), remoteLocation, elem.timestamp()); + return fdb5::ListElement(elem.keys(), remoteLocation, elem.timestamp()); } std::shared_ptr remoteLocation = fdb5::remote::RemoteFieldLocation(fdb->storeEndpoint(), elem.location()).make_shared(); - return fdb5::ListElement(elem.key(), remoteLocation, elem.timestamp()); + return fdb5::ListElement(elem.keys(), remoteLocation, elem.timestamp()); } }; @@ -124,7 +132,7 @@ RemoteFDB::RemoteFDB(const eckit::Configuration& config, const std::string& name std::vector stores; std::vector fieldLocationEndpoints; - + for (size_t i=0; i> store; @@ -225,12 +233,12 @@ auto RemoteFDB::forwardApiCall(const HelperClass& helper, const FDBToolRequest& ); } -ListIterator RemoteFDB::list(const FDBToolRequest& request) { - return forwardApiCall(ListHelper(), request); +ListIterator RemoteFDB::list(const FDBToolRequest& request, const int depth) { + return forwardApiCall(ListHelper(depth), request); } -AxesIterator RemoteFDB::axesIterator(const FDBToolRequest& request, int level) { - return forwardApiCall(AxesHelper(level), request); +AxesIterator RemoteFDB::axesIterator(const FDBToolRequest& request, const int depth) { + return forwardApiCall(AxesHelper(depth), request); } ListIterator RemoteFDB::inspect(const metkit::mars::MarsRequest& request) { @@ -247,7 +255,7 @@ void RemoteFDB::print(std::ostream& s) const { // Client bool RemoteFDB::handle(remote::Message message, uint32_t requestID) { - + switch (message) { case fdb5::remote::Message::Complete: { diff --git a/src/fdb5/api/RemoteFDB.h b/src/fdb5/api/RemoteFDB.h index 7524896bc..3e5b2bf46 100644 --- a/src/fdb5/api/RemoteFDB.h +++ b/src/fdb5/api/RemoteFDB.h @@ -44,9 +44,9 @@ class RemoteFDB : public LocalFDB, public remote::Client { ListIterator inspect(const metkit::mars::MarsRequest& request) override; - ListIterator list(const FDBToolRequest& request) override; + ListIterator list(const FDBToolRequest& request, int depth) override; - AxesIterator axesIterator(const FDBToolRequest& request, int level=3) override; + AxesIterator axesIterator(const FDBToolRequest& request, int depth = 3) override; DumpIterator dump(const FDBToolRequest& request, bool simple) override { NOTIMP; } diff --git a/src/fdb5/api/SelectFDB.cc b/src/fdb5/api/SelectFDB.cc index 5e704e2ab..c6fb41b23 100644 --- a/src/fdb5/api/SelectFDB.cc +++ b/src/fdb5/api/SelectFDB.cc @@ -137,12 +137,10 @@ auto SelectFDB::queryInternal(const FDBToolRequest& request, const QueryFN& fn) return QueryIterator(new APIAggregateIterator(std::move(iterQueue))); } -ListIterator SelectFDB::list(const FDBToolRequest& request) { +ListIterator SelectFDB::list(const FDBToolRequest& request, const int level) { LOG_DEBUG_LIB(LibFdb5) << "SelectFDB::list() >> " << request << std::endl; return queryInternal(request, - [](FDB& fdb, const FDBToolRequest& request) { - return fdb.list(request); - }); + [level](FDB& fdb, const FDBToolRequest& request) { return fdb.list(request, false, level); }); } DumpIterator SelectFDB::dump(const FDBToolRequest& request, bool simple) { @@ -217,20 +215,13 @@ void SelectFDB::print(std::ostream &s) const { } bool SelectFDB::matches(const Key& key, const SelectMap &select, bool requireMissing) const { + for (const auto& [keyword, regex] : select) { + const auto [iter, found] = key.find(keyword); - for (const auto& kv : select) { + if (!found && requireMissing) { return false; } - const std::string& k(kv.first); - const eckit::Regex& re(kv.second); - - eckit::StringDict::const_iterator i = key.find(k); - if (i == key.end()) { - if (requireMissing) return false; - } else if (!re.match(i->second)) { - return false; - } + if (!regex.match(iter->second)) { return false; } } - return true; } diff --git a/src/fdb5/api/SelectFDB.h b/src/fdb5/api/SelectFDB.h index b8678d9ab..cb1c88c25 100644 --- a/src/fdb5/api/SelectFDB.h +++ b/src/fdb5/api/SelectFDB.h @@ -50,7 +50,7 @@ class SelectFDB : public FDBBase { ListIterator inspect(const metkit::mars::MarsRequest& request) override; - ListIterator list(const FDBToolRequest& request) override; + ListIterator list(const FDBToolRequest& request, int level) override; DumpIterator dump(const FDBToolRequest& request, bool simple) override; diff --git a/src/fdb5/api/fdb_c.cc b/src/fdb5/api/fdb_c.cc index eb69bc9d5..92db470f1 100644 --- a/src/fdb5/api/fdb_c.cc +++ b/src/fdb5/api/fdb_c.cc @@ -8,20 +8,22 @@ * does it submit to any jurisdiction. */ +#include "eckit/config/YAMLConfiguration.h" +#include "eckit/exception/Exceptions.h" #include "eckit/io/MemoryHandle.h" #include "eckit/message/Message.h" #include "eckit/runtime/Main.h" -#include "eckit/config/YAMLConfiguration.h" +#include "eckit/utils/Tokenizer.h" -#include "metkit/mars/MarsRequest.h" #include "metkit/mars/MarsExpension.h" -#include "eckit/utils/Tokenizer.h" +#include "metkit/mars/MarsRequest.h" -#include "fdb5/fdb5_version.h" #include "fdb5/api/FDB.h" #include "fdb5/api/helpers/FDBToolRequest.h" +#include "fdb5/api/helpers/ListElement.h" #include "fdb5/api/helpers/ListIterator.h" #include "fdb5/database/Key.h" +#include "fdb5/fdb5_version.h" #include "fdb5/api/fdb_c.h" @@ -88,50 +90,51 @@ struct fdb_request_t { }; struct fdb_split_key_t { -public: - fdb_split_key_t() : key_(nullptr), level_(-1) {} + using value_type = std::array; - void set(const std::vector& key) { - key_ = &key; - level_ = -1; + auto operator=(const value_type& keys) -> fdb_split_key_t& { + keys_ = &keys; + level_ = keys_->end(); + return *this; } - int next_metadata(const char** k, const char** v, size_t* level) { - if (key_ == nullptr) { - std::stringstream ss; - ss << "fdb_split_key_t not valid. Key not configured"; - throw eckit::UserError(ss.str(), Here()); + auto operator++() -> fdb_split_key_t& { + /// @todo the following "if" is an unfortunate consequence of a flaw in this iterator + if (level_ == keys_->end()) { + level_ = keys_->begin(); + curr_ = level_->begin(); + return *this; } - if (level_ == -1) { - if (0 < key_->size()) { - level_ = 0; - it_ = key_->at(0).begin(); - } else { - return FDB_ITERATION_COMPLETE; - } - } - while (it_ == key_->at(level_).end()) { - if (level_size()-1) { - level_++; - it_ = key_->at(level_).begin(); - } else { - return FDB_ITERATION_COMPLETE; - } + if (curr_ != level_->end()) { + ++curr_; + if (curr_ == level_->end() && level_ != keys_->end() - 1) { curr_ = (++level_)->begin(); } } + return *this; + } - *k = it_->first.c_str(); - *v = it_->second.c_str(); - if (level != nullptr) { - *level = level_; - } - it_++; + int next() { + ++(*this); + if (curr_ == level_->end()) { return FDB_ITERATION_COMPLETE; } return FDB_SUCCESS; } -private: - const std::vector* key_; - int level_; - Key::const_iterator it_; + void metadata(const char** k, const char** v, size_t* level) const { + ASSERT_MSG(keys_, "keys are missing!"); + + const auto& [key, val] = *curr_; + + *k = key.c_str(); + *v = val.c_str(); + + if (level) { *level = level_ - keys_->begin(); } + } + +private: // members + const value_type* keys_ {nullptr}; + + value_type::const_iterator level_; + + Key::const_iterator curr_; }; struct fdb_listiterator_t { @@ -147,17 +150,20 @@ struct fdb_listiterator_t { void attrs(const char** uri, size_t* off, size_t* len) { ASSERT(validEl_); - const FieldLocation& loc = el_.location(); - *uri = loc.uri().name().c_str(); - *off = loc.offset(); - *len = loc.length(); + // guard against negative values + ASSERT(0 <= el_.offset()); + ASSERT(0 <= el_.length()); + + *uri = el_.uri().name().c_str(); + *off = el_.offset(); + *len = el_.length(); } void key(fdb_split_key_t* key) { ASSERT(validEl_); ASSERT(key); - key->set(el_.key()); + *key = el_.keys(); } private: @@ -368,17 +374,18 @@ int fdb_archive_multiple(fdb_handle_t* fdb, fdb_request_t* req, const char* data }); } -int fdb_list(fdb_handle_t* fdb, const fdb_request_t* req, fdb_listiterator_t** it, bool duplicates) { - return wrapApiFunction([fdb, req, it, duplicates] { +int fdb_list(fdb_handle_t* fdb, const fdb_request_t* req, fdb_listiterator_t** it, const bool duplicates, const int depth) { + return wrapApiFunction([fdb, req, it, duplicates, depth] { ASSERT(fdb); ASSERT(it); + ASSERT(depth >= 1 && depth <= 3); std::vector minKeySet; // we consider an empty set const FDBToolRequest toolRequest( req ? req->request() : metkit::mars::MarsRequest(), req == nullptr, minKeySet); - *it = new fdb_listiterator_t(fdb->list(toolRequest, duplicates)); + *it = new fdb_listiterator_t(fdb->list(toolRequest, duplicates, depth)); }); } int fdb_retrieve(fdb_handle_t* fdb, fdb_request_t* req, fdb_datareader_t* dr) { @@ -499,7 +506,9 @@ int fdb_splitkey_next_metadata(fdb_split_key_t* it, const char** key, const char ASSERT(it); ASSERT(key); ASSERT(value); - return it->next_metadata(key, value, level); + const auto stat = it->next(); + if (stat == FDB_SUCCESS) { it->metadata(key, value, level); } + return stat; }}); } int fdb_delete_splitkey(fdb_split_key_t* key) { diff --git a/src/fdb5/api/fdb_c.h b/src/fdb5/api/fdb_c.h index acaa1186f..e010dbc0f 100644 --- a/src/fdb5/api/fdb_c.h +++ b/src/fdb5/api/fdb_c.h @@ -50,7 +50,7 @@ int fdb_version(const char** version); * \param version Return variable for version control checksum. Returned pointer valid throughout program lifetime. * \returns Return code (#FdbErrorValues) */ -int fdb_vcs_version(const char** version); +int fdb_vcs_version(const char** sha1); ///@} @@ -351,7 +351,7 @@ int fdb_archive_multiple(fdb_handle_t* fdb, fdb_request_t* req, const char* data * \param duplicates Boolean flag used to specify if duplicated ListElements are to be reported or not. * \returns Return code (#FdbErrorValues) */ -int fdb_list(fdb_handle_t* fdb, const fdb_request_t* req, fdb_listiterator_t** it, bool duplicates); +int fdb_list(fdb_handle_t* fdb, const fdb_request_t* req, fdb_listiterator_t** it, bool duplicates, int depth); /** Return all available data whose metadata matches a given user request. * \param fdb FDB instance. diff --git a/src/fdb5/api/helpers/FDBToolRequest.cc b/src/fdb5/api/helpers/FDBToolRequest.cc index 45762478c..f06d3e888 100644 --- a/src/fdb5/api/helpers/FDBToolRequest.cc +++ b/src/fdb5/api/helpers/FDBToolRequest.cc @@ -76,7 +76,7 @@ std::vector FDBToolRequest::requestsFromString(const std::string } }*/ LOG_DEBUG_LIB(LibFdb5) << "Expanded request: " << request << std::endl; - requests.emplace_back(FDBToolRequest(request, false, minimumKeys)); + requests.emplace_back(request, false, minimumKeys); } } @@ -132,4 +132,3 @@ void FDBToolRequest::checkMinimumKeys(const metkit::mars::MarsRequest& request, //---------------------------------------------------------------------------------------------------------------------- } // namespace fdb5 - diff --git a/src/fdb5/api/helpers/ListElement.cc b/src/fdb5/api/helpers/ListElement.cc new file mode 100644 index 000000000..715cfec86 --- /dev/null +++ b/src/fdb5/api/helpers/ListElement.cc @@ -0,0 +1,142 @@ +/* + * (C) Copyright 1996- ECMWF. + * + * This software is licensed under the terms of the Apache Licence Version 2.0 + * which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. + * In applying this licence, ECMWF does not waive the privileges and immunities + * granted to it by virtue of its status as an intergovernmental organisation nor + * does it submit to any jurisdiction. + */ + +#include "fdb5/api/helpers/ListElement.h" + +#include "eckit/exception/Exceptions.h" +#include "eckit/filesystem/URI.h" +#include "eckit/log/JSON.h" +#include "eckit/serialisation/Reanimator.h" +#include "eckit/serialisation/Stream.h" +#include "fdb5/database/FieldLocation.h" +#include "fdb5/database/Key.h" + +#include +#include +#include +#include + +namespace fdb5 { + +//---------------------------------------------------------------------------------------------------------------------- + +ListElement::ListElement(Key dbKey, const TimeStamp& timestamp): + keyParts_ {std::move(dbKey)}, timestamp_ {timestamp} { } + +ListElement::ListElement(Key dbKey, Key indexKey, const TimeStamp& timestamp): + keyParts_ {std::move(dbKey), std::move(indexKey)}, timestamp_ {timestamp} { } + +ListElement::ListElement(Key dbKey, Key indexKey, Key datumKey, std::shared_ptr location, + const TimeStamp& timestamp): + keyParts_ {std::move(dbKey), std::move(indexKey), std::move(datumKey)}, + loc_ {std::move(location)}, + timestamp_ {timestamp} { } + +ListElement::ListElement(const std::array& keys, std::shared_ptr location, const TimeStamp& timestamp) : + ListElement(keys[0], keys[1], keys[2], std::move(location), timestamp) { } + +ListElement::ListElement(eckit::Stream& stream) { + std::vector keys; + stream >> keys; + keyParts_[0] = std::move(keys.at(0)); + keyParts_[1] = std::move(keys.at(1)); + keyParts_[2] = std::move(keys.at(2)); + + if (!keyParts_[2].empty()) + loc_.reset(eckit::Reanimator::reanimate(stream)); + stream >> timestamp_; +} + +Key ListElement::combinedKey() const { + Key combined; + + for (const Key& partKey : keyParts_) { + for (const auto& kv : partKey) { + combined.set(kv.first, kv.second); + } + } + return combined; +} + +const FieldLocation& ListElement::location() const { + if (!loc_) { throw eckit::SeriousBug("Only datum (3-level) elements have FieldLocation.", Here()); } + return *loc_; +} + +const eckit::URI& ListElement::uri() const { + ASSERT(loc_); + return loc_->uri(); +} + +eckit::Offset ListElement::offset() const { + return loc_ ? loc_->offset() : eckit::Offset(0); +} + +eckit::Length ListElement::length() const { + return loc_ ? loc_->length() : eckit::Length(0); +} + +void ListElement::print(std::ostream& out, const bool location, const bool length, const bool timestamp, const char* sep) const { + out << keyParts_[0]; + if (!keyParts_[1].empty()) { + out << keyParts_[1]; + if (!keyParts_[2].empty()) { + out << keyParts_[2]; + if (location) { + out << sep; + if (loc_) { + out << *loc_; + } + } + } + } + if (length) { out << sep << "length=" << this->length(); } + if (timestamp) { out << sep << "timestamp=" << timestamp_; } +} + +void ListElement::json(eckit::JSON& json) const { + json << combinedKey().keyDict(); + if (loc_) { json << "length" << loc_->length(); } +} + +void ListElement::encode(eckit::Stream& stream) const { + std::vector keys; + keys.reserve(3); + keys.push_back(keyParts_[0]); + keys.push_back(keyParts_[1]); + keys.push_back(keyParts_[2]); + stream << keys; + + if (loc_) + stream << *loc_; + stream << timestamp_; +} + +//---------------------------------------------------------------------------------------------------------------------- +// friends + +std::ostream& operator<<(std::ostream& out, const ListElement& elem) { + elem.print(out, false, false, false, " "); + return out; +} + +eckit::Stream& operator<<(eckit::Stream& stream, const ListElement& elem) { + elem.encode(stream); + return stream; +} + +eckit::JSON& operator<<(eckit::JSON& json, const ListElement& elem) { + elem.json(json); + return json; +} + +//---------------------------------------------------------------------------------------------------------------------- + +} // namespace fdb5 diff --git a/src/fdb5/api/helpers/ListElement.h b/src/fdb5/api/helpers/ListElement.h new file mode 100644 index 000000000..d70a8a2fb --- /dev/null +++ b/src/fdb5/api/helpers/ListElement.h @@ -0,0 +1,91 @@ +/* + * (C) Copyright 1996- ECMWF. + * + * This software is licensed under the terms of the Apache Licence Version 2.0 + * which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. + * In applying this licence, ECMWF does not waive the privileges and immunities + * granted to it by virtue of its status as an intergovernmental organisation nor + * does it submit to any jurisdiction. + */ + +/// @author Simon Smart +/// @author Emanuele Danovaro +/// @author Metin Cakircali +/// @date October 2018 + +#pragma once + +#include "eckit/filesystem/URI.h" +#include "eckit/io/Length.h" +#include "eckit/io/Offset.h" +#include "fdb5/database/Key.h" + +#include +#include +#include + +namespace eckit { +class JSON; +class Stream; +} + +namespace fdb5 { + +class FieldLocation; + +//---------------------------------------------------------------------------------------------------------------------- + +/// Define a standard object which can be used to iterate the results of a +/// list() call on an arbitrary FDB object + +class ListElement { +public: // types + using TimeStamp = std::time_t; + +public: // methods + ListElement() = default; + ListElement(Key dbKey, const TimeStamp& timestamp); + + ListElement(Key dbKey, Key indexKey, const TimeStamp& timestamp); + + ListElement(Key dbKey, Key indexKey, Key datumKey, std::shared_ptr location, + const TimeStamp& timestamp); + + ListElement(const std::array& keys, std::shared_ptr location, const TimeStamp& timestamp); + + explicit ListElement(eckit::Stream& stream); + + const std::array& keys() const { return keyParts_; } + Key combinedKey() const; + + const FieldLocation& location() const; + std::shared_ptr sharedLocation() const { return loc_; } + const eckit::URI& uri() const; + + eckit::Offset offset() const; + eckit::Length length() const; + + const TimeStamp& timestamp() const { return timestamp_; } + + void print(std::ostream& out, bool location, bool length, bool timestamp, const char* sep) const; + +private: // methods + void encode(eckit::Stream& stream) const; + + void json(eckit::JSON& json) const; + + friend std::ostream& operator<<(std::ostream& out, const ListElement& elem); + friend eckit::Stream& operator<<(eckit::Stream& stream, const ListElement& elem); + friend eckit::JSON& operator<<(eckit::JSON& json, const ListElement& elem); + +private: // members + std::array keyParts_; + + std::shared_ptr loc_; + + TimeStamp timestamp_ {0}; +}; + +//---------------------------------------------------------------------------------------------------------------------- + +} // namespace fdb5 diff --git a/src/fdb5/api/helpers/ListIterator.cc b/src/fdb5/api/helpers/ListIterator.cc deleted file mode 100644 index 5f17d0f0b..000000000 --- a/src/fdb5/api/helpers/ListIterator.cc +++ /dev/null @@ -1,71 +0,0 @@ -/* - * (C) Copyright 1996- ECMWF. - * - * This software is licensed under the terms of the Apache Licence Version 2.0 - * which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. - * In applying this licence, ECMWF does not waive the privileges and immunities - * granted to it by virtue of its status as an intergovernmental organisation nor - * does it submit to any jurisdiction. - */ - -/* - * This software was developed as part of the EC H2020 funded project NextGenIO - * (Project ID: 671951) www.nextgenio.eu - */ - -#include "fdb5/api/helpers/ListIterator.h" - -#include "eckit/log/JSON.h" - -namespace fdb5 { - -//---------------------------------------------------------------------------------------------------------------------- - -ListElement::ListElement(const std::vector& keyParts, std::shared_ptr location, time_t timestamp) : - keyParts_(keyParts), location_(location), timestamp_(timestamp) {} - -ListElement::ListElement(eckit::Stream &s) { - s >> keyParts_; - location_.reset(eckit::Reanimator::reanimate(s)); - s >> timestamp_; -} - -Key ListElement::combinedKey() const { - Key combined = keyParts_[2]; - - for (const Key& partKey : keyParts_) { - for (const auto& kv : partKey) { - combined.set(kv.first, kv.second); - } - } - return combined; -} - -void ListElement::print(std::ostream& out, bool withLocation, bool withLength, bool withTimestamp, const char* sep) const { - for (const auto& bit : keyParts_) { - out << bit; - } - if (location_) { - if (withLocation) { - out << sep << *location_; - } else if (withLength) { - out << sep << "length=" << location_->length(); - } - } - if (withTimestamp) out << sep << "timestamp=" << timestamp_; -} - -void ListElement::json(eckit::JSON& json) const { - json << combinedKey().keyDict(); - json << "length" << location_->length(); -} - -void ListElement::encode(eckit::Stream &s) const { - s << keyParts_; - s << *location_; - s << timestamp_; -} - -//---------------------------------------------------------------------------------------------------------------------- - -} // namespace fdb5 diff --git a/src/fdb5/api/helpers/ListIterator.h b/src/fdb5/api/helpers/ListIterator.h index 79788d0ef..c1ed875f2 100644 --- a/src/fdb5/api/helpers/ListIterator.h +++ b/src/fdb5/api/helpers/ListIterator.h @@ -19,75 +19,17 @@ #ifndef fdb5_ListIterator_H #define fdb5_ListIterator_H -#include #include -#include -#include -#include +#include #include "fdb5/database/Key.h" -#include "fdb5/database/FieldLocation.h" #include "fdb5/api/helpers/APIIterator.h" - -namespace eckit { - class Stream; - class JSON; -} +#include "fdb5/api/helpers/ListElement.h" namespace fdb5 { //---------------------------------------------------------------------------------------------------------------------- -/// Define a standard object which can be used to iterate the results of a -/// list() call on an arbitrary FDB object - -class ListElement { -public: // methods - - ListElement() = default; - ListElement(const std::vector& keyParts, std::shared_ptr location, time_t timestamp); - ListElement(eckit::Stream& s); - - const std::vector& key() const { return keyParts_; } - const FieldLocation& location() const { return *location_; } - const time_t& timestamp() const { return timestamp_; } - - Key combinedKey() const; - - void print(std::ostream& out, bool withLocation=false, bool withLength=false, bool withTimestamp=false, const char* sep = " ") const; - void json(eckit::JSON& json) const; - -private: // methods - - void encode(eckit::Stream& s) const; - - friend std::ostream& operator<<(std::ostream& os, const ListElement& e) { - e.print(os); - return os; - } - - friend eckit::Stream& operator<<(eckit::Stream& s, const ListElement& r) { - r.encode(s); - return s; - } - - friend eckit::JSON& operator<<(eckit::JSON& j, const ListElement& e) { - e.json(j); - return j; - } - -public: // members - - std::vector keyParts_; - -private: // members - - std::shared_ptr location_; - time_t timestamp_; -}; - -//---------------------------------------------------------------------------------------------------------------------- - using ListAggregateIterator = APIAggregateIterator; using ListAsyncIterator = APIAsyncIterator; @@ -113,16 +55,10 @@ class ListIterator : public APIIterator { ListElement tmp; while (APIIterator::next(tmp)) { if(deduplicate_) { - Key combinedKey = tmp.combinedKey(); - if (seenKeys_.find(combinedKey) == seenKeys_.end()) { - seenKeys_.emplace(std::move(combinedKey)); - std::swap(elem, tmp); - return true; - } - } else { - std::swap(elem, tmp); - return true; + if (const auto [iter, success] = seenKeys_.emplace(tmp.combinedKey()); !success) { continue; } } + std::swap(elem, tmp); + return true; } return false; } diff --git a/src/fdb5/api/local/AxesVisitor.cc b/src/fdb5/api/local/AxesVisitor.cc index 4d67c00ec..b95f4a794 100644 --- a/src/fdb5/api/local/AxesVisitor.cc +++ b/src/fdb5/api/local/AxesVisitor.cc @@ -11,43 +11,35 @@ #include "fdb5/api/local/AxesVisitor.h" #include "fdb5/database/Catalogue.h" +#include "fdb5/database/IndexAxis.h" +#include "fdb5/rules/Schema.h" +#include "fdb5/types/Type.h" -namespace fdb5 { -namespace api { -namespace local { +#include -//---------------------------------------------------------------------------------------------------------------------- - -AxesVisitor::AxesVisitor(eckit::Queue& queue, - const metkit::mars::MarsRequest& request, - const Config& config, - int level) : - QueryVisitor(queue, request), - schema_(config.schema()), - level_(level) {} - -#if 0 +namespace fdb5::api::local { -// TODO: Here we can do nice tricks to make things go muuuuuuuuuuuuuch faster... -// See improvements to the EntryVisitMechanism... & the schema +//---------------------------------------------------------------------------------------------------------------------- -bool AxesVisitor::preVisitDatabase(const eckit::URI& uri) { +AxesVisitor::AxesVisitor(eckit::Queue& queue, const metkit::mars::MarsRequest& request, int level): + QueryVisitor(queue, request), level_(level) { } +bool AxesVisitor::preVisitDatabase(const eckit::URI& uri, const Schema& schema) { // If level == 1, avoid constructing the Catalogue/Store objects, so just interrogate the URIs if (level_ == 1 && uri.scheme() == "toc") { - // TODO: This is hacky, only works with the toc backend... - if (schema_.matchFirstLevel(uri.path().baseName(), dbKey_)) { + /// @todo This is hacky, only works with the toc backend... + if (auto found = schema.matchDatabase(uri.path().baseName())) { + dbKey_ = *found; axes_.wipe(); axes_.insert(dbKey_); axes_.sort(); - queue_.emplace(AxesElement{std::move(dbKey_), std::move(axes_)}); + queue_.emplace(std::move(dbKey_), std::move(axes_)); } return false; } return true; } -#endif bool AxesVisitor::visitDatabase(const Catalogue& catalogue) { dbKey_ = catalogue.key(); @@ -58,6 +50,7 @@ bool AxesVisitor::visitDatabase(const Catalogue& catalogue) { } bool AxesVisitor::visitIndex(const Index& index) { + if (index.partialMatch(request_)) { IndexAxis tmpAxis; tmpAxis.insert(index.key()); @@ -77,6 +70,4 @@ void AxesVisitor::catalogueComplete(const fdb5::Catalogue& catalogue) { //---------------------------------------------------------------------------------------------------------------------- -} // namespace local -} // namespace api -} // namespace fdb5 +} // namespace fdb5::api::local diff --git a/src/fdb5/api/local/AxesVisitor.h b/src/fdb5/api/local/AxesVisitor.h index bbecb67d1..edab6e98e 100644 --- a/src/fdb5/api/local/AxesVisitor.h +++ b/src/fdb5/api/local/AxesVisitor.h @@ -13,13 +13,22 @@ #pragma once -#include "fdb5/api/local/QueryVisitor.h" -#include "fdb5/api/helpers/AxesIterator.h" +#include "eckit/container/Queue.h" +#include "fdb5/api/helpers/AxesIterator.h" +#include "fdb5/api/local/QueryVisitor.h" +#include "fdb5/database/IndexAxis.h" +#include "fdb5/database/Key.h" namespace fdb5 { -namespace api { -namespace local { + +class Index; +class Field; +class Schema; +class Store; +class Catalogue; + +namespace api::local { /// @note Helper classes for LocalFDB @@ -27,29 +36,31 @@ namespace local { class AxesVisitor : public QueryVisitor { public: - - AxesVisitor(eckit::Queue& queue, - const metkit::mars::MarsRequest& request, - const Config& config, - int level); + AxesVisitor(eckit::Queue& queue, const metkit::mars::MarsRequest& request, int level); bool visitIndexes() override { return true; } + bool visitEntries() override { return false; } - void catalogueComplete(const fdb5::Catalogue& catalogue) override; + + void catalogueComplete(const Catalogue& catalogue) override; + + bool preVisitDatabase(const eckit::URI& uri, const Schema& schema) override; + bool visitDatabase(const Catalogue& catalogue) override; - bool visitIndex(const Index&) override; - void visitDatum(const Field&, const Key&) override { NOTIMP; } + + bool visitIndex(const Index& index) override; + + void visitDatum(const Field& /*field*/, const Key& /*key*/) override { NOTIMP; } private: // members Key dbKey_; IndexAxis axes_; - const Schema& schema_; int level_; }; //---------------------------------------------------------------------------------------------------------------------- -} // namespace local -} // namespace api +} // namespace api::local + } // namespace fdb5 diff --git a/src/fdb5/api/local/ControlVisitor.h b/src/fdb5/api/local/ControlVisitor.h index 5a98fe001..3f5c1dba5 100644 --- a/src/fdb5/api/local/ControlVisitor.h +++ b/src/fdb5/api/local/ControlVisitor.h @@ -39,6 +39,7 @@ class ControlVisitor : public QueryVisitor { bool visitDatabase(const Catalogue& catalogue) override; bool visitIndex(const Index&) override { NOTIMP; } + void visitDatum(const Field&, const Key&) override { NOTIMP; } private: // members diff --git a/src/fdb5/api/local/DumpVisitor.h b/src/fdb5/api/local/DumpVisitor.h index 8fe4785be..146fcb603 100644 --- a/src/fdb5/api/local/DumpVisitor.h +++ b/src/fdb5/api/local/DumpVisitor.h @@ -48,8 +48,10 @@ class DumpVisitor : public QueryVisitor { catalogue.dump(out_, simple_); return true; } - bool visitIndex(const Index&) override { NOTIMP; } - void visitDatum(const Field&, const Key&) override { NOTIMP; } + + bool visitIndex(const Index& /*index*/) override { NOTIMP; } + + void visitDatum(const Field& /*field*/, const Key& /*datumKey*/) override { NOTIMP; } void visitDatum(const Field& field, const std::string& keyFingerprint) override { EntryVisitor::visitDatum(field, keyFingerprint); diff --git a/src/fdb5/api/local/ListVisitor.h b/src/fdb5/api/local/ListVisitor.h index ac9c15ccf..4e8a53faa 100644 --- a/src/fdb5/api/local/ListVisitor.h +++ b/src/fdb5/api/local/ListVisitor.h @@ -19,18 +19,26 @@ #ifndef fdb5_api_local_ListVisitor_H #define fdb5_api_local_ListVisitor_H +#include "eckit/container/Queue.h" +#include "eckit/exception/Exceptions.h" +#include "eckit/filesystem/URI.h" +#include "fdb5/api/helpers/ControlIterator.h" +#include "fdb5/api/helpers/ListElement.h" +#include "fdb5/api/local/QueryVisitor.h" #include "fdb5/database/Catalogue.h" +#include "fdb5/database/EntryVisitMechanism.h" +#include "fdb5/database/Field.h" #include "fdb5/database/Index.h" #include "fdb5/database/Key.h" -#include "fdb5/rules/Rule.h" -#include "fdb5/api/local/QueryVisitor.h" -#include "fdb5/api/helpers/ListIterator.h" +#include "fdb5/database/Store.h" +#include "fdb5/types/Type.h" #include "metkit/mars/MarsRequest.h" -namespace fdb5 { -namespace api { -namespace local { +#include +#include + +namespace fdb5::api::local { /// @note Helper classes for LocalFDB @@ -39,7 +47,21 @@ namespace local { struct ListVisitor : public QueryVisitor { public: - using QueryVisitor::QueryVisitor; + ListVisitor(eckit::Queue& queue, const metkit::mars::MarsRequest& request, int level): + QueryVisitor(queue, request), level_(level) { } + + /// @todo remove this with better logic + bool preVisitDatabase(const eckit::URI& uri, const Schema& schema) override { + // If level == 1, avoid constructing the Catalogue/Store objects, so just interrogate the URIs + if (level_ == 1 && uri.scheme() == "toc") { + /// @todo only works with the toc backend + if (auto dbKey = schema.matchDatabase(uri.path().baseName())) { + queue_.emplace(*dbKey, 0); + return false; + } + } + return true; + } /// Make a note of the current database. Subtract its key from the current /// request so we can test request is used in its entirety @@ -51,12 +73,16 @@ struct ListVisitor : public QueryVisitor { } bool ret = QueryVisitor::visitDatabase(catalogue); - ASSERT(catalogue.key().partialMatch(request_)); + + ASSERT(currentCatalogue_->key().partialMatch(request_)); // Subselect the parts of the request indexRequest_ = request_; - for (const auto& kv : catalogue.key()) { - indexRequest_.unsetValues(kv.first); + for (const auto& kv : currentCatalogue_->key()) { indexRequest_.unsetValues(kv.first); } + + if (level_ == 1) { + queue_.emplace(currentCatalogue_->key(), 0); + ret = false; } return ret; @@ -70,22 +96,25 @@ struct ListVisitor : public QueryVisitor { bool visitIndex(const Index& index) override { QueryVisitor::visitIndex(index); - if (index.partialMatch(request_)) { // Subselect the parts of the request datumRequest_ = indexRequest_; - for (const auto& kv : index.key()) { - datumRequest_.unsetValues(kv.first); - } + + for (const auto& kv : index.key()) { datumRequest_.unsetValues(kv.first); } // Take into account any rule-specific behaviour in the request datumRequest_ = rule_->registry().canonicalise(datumRequest_); - return true; // Explore contained entries + if (level_ == 2) { + queue_.emplace(currentCatalogue_->key(), currentIndex_->key(), 0); + return false; + } + + return true; // Explore contained entries } - return false; // Skip contained entries + return false; // Skip contained entries } /// Test if entry matches the current request. If so, add to the output queue. @@ -94,7 +123,8 @@ struct ListVisitor : public QueryVisitor { ASSERT(currentIndex_); if (datumKey.match(datumRequest_)) { - queue_.emplace(ListElement({currentCatalogue_->key(), currentIndex_->key(), datumKey}, field.stableLocation(), field.timestamp())); + queue_.emplace(currentCatalogue_->key(), currentIndex_->key(), datumKey, field.stableLocation(), + field.timestamp()); } } @@ -106,12 +136,11 @@ struct ListVisitor : public QueryVisitor { metkit::mars::MarsRequest indexRequest_; metkit::mars::MarsRequest datumRequest_; + const int level_; }; //---------------------------------------------------------------------------------------------------------------------- -} // namespace local -} // namespace api -} // namespace fdb5 +} // namespace fdb5::api::local #endif diff --git a/src/fdb5/api/local/MoveVisitor.h b/src/fdb5/api/local/MoveVisitor.h index 2e43ac6aa..8ea0ca99f 100644 --- a/src/fdb5/api/local/MoveVisitor.h +++ b/src/fdb5/api/local/MoveVisitor.h @@ -43,9 +43,12 @@ class MoveVisitor : public QueryVisitor { bool visitEntries() override { return false; } bool visitDatabase(const Catalogue& catalogue) override; - bool visitIndex(const Index&) override { NOTIMP; } - void visitDatum(const Field&, const Key&) override { NOTIMP; } - void visitDatum(const Field& field, const std::string& keyFingerprint) override { NOTIMP; } + + bool visitIndex(const Index& /*index*/) override { NOTIMP; } + + void visitDatum(const Field& /*field*/, const Key& /*datumKey*/) override { NOTIMP; } + + void visitDatum(const Field& /*field*/, const std::string& /*keyFingerprint*/) override { NOTIMP; } private: // members diff --git a/src/fdb5/api/local/PurgeVisitor.h b/src/fdb5/api/local/PurgeVisitor.h index 985367b25..c20dd26fb 100644 --- a/src/fdb5/api/local/PurgeVisitor.h +++ b/src/fdb5/api/local/PurgeVisitor.h @@ -46,8 +46,10 @@ class PurgeVisitor : public QueryVisitor { bool visitDatabase(const Catalogue& catalogue) override; bool visitIndex(const Index& index) override; void catalogueComplete(const Catalogue& catalogue) override; + void visitDatum(const Field& field, const std::string& keyFingerprint) override; - void visitDatum(const Field&, const Key&) override { NOTIMP; } + + void visitDatum(const Field& /*field*/, const Key& /*datumKey*/) override { NOTIMP; } private: // members diff --git a/src/fdb5/api/local/StatsVisitor.h b/src/fdb5/api/local/StatsVisitor.h index e62dbee20..c009da7cb 100644 --- a/src/fdb5/api/local/StatsVisitor.h +++ b/src/fdb5/api/local/StatsVisitor.h @@ -41,8 +41,10 @@ class StatsVisitor : public QueryVisitor { bool visitDatabase(const Catalogue& catalogue) override; bool visitIndex(const Index& index) override; void catalogueComplete(const Catalogue& catalogue) override; + void visitDatum(const Field& field, const std::string& keyFingerprint) override; - void visitDatum(const Field&, const Key&) override { NOTIMP; } + + void visitDatum(const Field& /*field*/, const Key& /*datumKey*/) override { NOTIMP; } private: // members diff --git a/src/fdb5/api/local/WipeVisitor.h b/src/fdb5/api/local/WipeVisitor.h index 7ef6f83c4..ecdaaea10 100644 --- a/src/fdb5/api/local/WipeVisitor.h +++ b/src/fdb5/api/local/WipeVisitor.h @@ -49,8 +49,10 @@ class WipeVisitor : public QueryVisitor { bool visitDatabase(const Catalogue& catalogue) override; bool visitIndex(const Index& index) override; void catalogueComplete(const Catalogue& catalogue) override; - void visitDatum(const Field&, const Key&) override { NOTIMP; } - void visitDatum(const Field& field, const std::string& keyFingerprint) override { NOTIMP; } + + void visitDatum(const Field& /*field*/, const Key& /*datumKey*/) override { NOTIMP; } + + void visitDatum(const Field& /*field*/, const std::string& /*keyFingerprint*/) override { NOTIMP; } void onDatabaseNotFound(const fdb5::DatabaseNotFoundException& e) override { throw e; } diff --git a/src/fdb5/daos/DaosCatalogueReader.cc b/src/fdb5/daos/DaosCatalogueReader.cc index 1f81bede7..b77918db8 100644 --- a/src/fdb5/daos/DaosCatalogueReader.cc +++ b/src/fdb5/daos/DaosCatalogueReader.cc @@ -110,13 +110,13 @@ bool DaosCatalogueReader::open() { } -bool DaosCatalogueReader::axis(const std::string &keyword, eckit::StringSet &s) const { +bool DaosCatalogueReader::axis(const std::string& keyword, eckit::DenseSet& s) const { bool found = false; if (current_.axes().has(keyword)) { found = true; const eckit::DenseSet& a = current_.axes().values(keyword); - s.insert(a.begin(), a.end()); + s.merge(a); } return found; diff --git a/src/fdb5/daos/DaosCatalogueReader.h b/src/fdb5/daos/DaosCatalogueReader.h index 022dd47d6..1fd03d1a3 100644 --- a/src/fdb5/daos/DaosCatalogueReader.h +++ b/src/fdb5/daos/DaosCatalogueReader.h @@ -38,7 +38,7 @@ class DaosCatalogueReader : public DaosCatalogue, public CatalogueReader { void clean() override {} void close() override {} - bool axis(const std::string &keyword, eckit::StringSet &s) const override; + bool axis(const std::string& keyword, eckit::DenseSet& s) const override; bool retrieve(const Key& key, Field& field) const override; diff --git a/src/fdb5/daos/DaosCatalogueWriter.cc b/src/fdb5/daos/DaosCatalogueWriter.cc index 56f2201a6..cf60c8172 100644 --- a/src/fdb5/daos/DaosCatalogueWriter.cc +++ b/src/fdb5/daos/DaosCatalogueWriter.cc @@ -83,19 +83,19 @@ DaosCatalogueWriter::DaosCatalogueWriter(const Key& key, const fdb5::Config& con int db_key_max_len = 512; // @todo: take from config if (hs.bytesWritten() > db_key_max_len) throw eckit::Exception("Serialised db key exceeded configured maximum db key length."); - - fdb5::DaosKeyValue{s, catalogue_kv_name}.put("key", h.data(), hs.bytesWritten()); + + fdb5::DaosKeyValue{s, catalogue_kv_name}.put("key", h.data(), hs.bytesWritten()); /// index newly created catalogue kv in main kv int db_loc_max_len = 512; // @todo: take from config std::string nstr = catalogue_kv_name.URI().asString(); - if (nstr.length() > db_loc_max_len) + if (nstr.length() > db_loc_max_len) throw eckit::Exception("Serialised db location exceeded configured maximum db location length."); main_kv.put(db_cont_, nstr.data(), nstr.length()); } - + /// @todo: record or read dbUID /// @note: performed RPCs: @@ -159,7 +159,7 @@ bool DaosCatalogueWriter::selectIndex(const Key& idxKey) { } catch (fdb5::DaosEntityNotFoundException& e) { firstIndexWrite_ = true; - + indexes_[idxKey] = Index( new fdb5::DaosIndex( idxKey, @@ -251,9 +251,8 @@ void DaosCatalogueWriter::archive(const Key& idxKey, const Key& datumKey, std::s for (Key::const_iterator i = datumKey.begin(); i != datumKey.end(); ++i) { - const std::string &keyword = i->first; - - std::string value = datumKey.canonicalValue(keyword); + const std::string& keyword = i->first; + const std::string& value = i->second; if (value.length() == 0) continue; diff --git a/src/fdb5/daos/DaosIndex.cc b/src/fdb5/daos/DaosIndex.cc index 05a017e67..587b4f398 100644 --- a/src/fdb5/daos/DaosIndex.cc +++ b/src/fdb5/daos/DaosIndex.cc @@ -35,7 +35,7 @@ namespace fdb5 { //---------------------------------------------------------------------------------------------------------------------- DaosIndex::DaosIndex(const Key& key, const Catalogue& catalogue, const fdb5::DaosName& name) : - IndexBase(key, "daosKeyValue", catalogue), + IndexBase(key, "daosKeyValue"), location_(buildIndexKvName(key, name), 0) { fdb5::DaosSession s{}; @@ -66,7 +66,7 @@ DaosIndex::DaosIndex(const Key& key, const Catalogue& catalogue, const fdb5::Dao } DaosIndex::DaosIndex(const Key& key, const Catalogue& catalogue, const fdb5::DaosKeyValueName& name, bool readAxes) : - IndexBase(key, "daosKeyValue", catalogue), + IndexBase(key, "daosKeyValue"), location_(name, 0) { if (readAxes) updateAxes(); @@ -230,7 +230,7 @@ void DaosIndex::entries(EntryVisitor &visitor) const { } } -const std::vector DaosIndex::dataURIs() const { +std::vector DaosIndex::dataURIs() const { /// @note: if daos index + daos store, this will return a uri to a DAOS array for each indexed field /// @note: if daos index + posix store, this will return a vector of unique uris to all referenced posix files diff --git a/src/fdb5/daos/DaosIndex.h b/src/fdb5/daos/DaosIndex.h index df83ed079..b6315228d 100644 --- a/src/fdb5/daos/DaosIndex.h +++ b/src/fdb5/daos/DaosIndex.h @@ -35,9 +35,9 @@ class DaosIndex : public IndexBase { void funlock() const override { NOTIMP; } private: // methods - const IndexLocation& location() const override { return location_; } - const std::vector dataURIs() const override; + + std::vector dataURIs() const override; bool dirty() const override { NOTIMP; } diff --git a/src/fdb5/database/ArchiveVisitor.cc b/src/fdb5/database/ArchiveVisitor.cc index 338897359..610dfd619 100644 --- a/src/fdb5/database/ArchiveVisitor.cc +++ b/src/fdb5/database/ArchiveVisitor.cc @@ -7,13 +7,13 @@ * granted to it by virtue of its status as an intergovernmental organisation nor * does it submit to any jurisdiction. */ -#include -#include "eckit/exception/Exceptions.h" -#include "fdb5/database/Archiver.h" #include "fdb5/database/ArchiveVisitor.h" +#include "fdb5/database/Archiver.h" #include "fdb5/database/Catalogue.h" #include "fdb5/database/Store.h" +#include + namespace fdb5 { ArchiveVisitor::ArchiveVisitor(Archiver& owner, const Key& initialFieldKey, const void *data, size_t size, const ArchiveCallback& callback) : @@ -28,15 +28,17 @@ void ArchiveVisitor::callbacks(fdb5::CatalogueWriter* catalogue, const Key& idxK catalogue->archive(idxKey, datumKey, std::move(fieldLocation)); } -bool ArchiveVisitor::selectDatum(const TypedKey& datumKey, const TypedKey& fullComputedKey) { +bool ArchiveVisitor::selectDatum(const Key& datumKey, const Key& fullKey) { - checkMissingKeys(fullComputedKey); + checkMissingKeys(fullKey); const Key idxKey = catalogue()->currentIndexKey(); - std::shared_ptr>> p = std::make_shared>>(std::promise>()); + std::shared_ptr>> p = + std::make_shared>>( + std::promise>()); store()->archive(idxKey, data_, size_, - std::bind(&ArchiveVisitor::callbacks, this, catalogue(), idxKey, datumKey.canonical(), p, std::placeholders::_1)); + std::bind(&ArchiveVisitor::callbacks, this, catalogue(), idxKey, datumKey, p, std::placeholders::_1)); callback_(initialFieldKey(), data_, size_, p->get_future()); return true; diff --git a/src/fdb5/database/ArchiveVisitor.h b/src/fdb5/database/ArchiveVisitor.h index 3d1c08f04..ed96eb9c2 100644 --- a/src/fdb5/database/ArchiveVisitor.h +++ b/src/fdb5/database/ArchiveVisitor.h @@ -19,7 +19,9 @@ #include "fdb5/api/helpers/Callback.h" #include "fdb5/database/BaseArchiveVisitor.h" -namespace metkit { class MarsRequest; } +namespace metkit::mars { +class MarsRequest; +} namespace fdb5 { @@ -34,10 +36,9 @@ class ArchiveVisitor : public BaseArchiveVisitor { ArchiveVisitor(Archiver& owner, const Key& dataKey, const void* data, size_t size, const ArchiveCallback& callback = CALLBACK_NOOP); protected: // methods + bool selectDatum(const Key& datumKey, const Key& fullKey) override; - bool selectDatum(const TypedKey& datumKey, const TypedKey& fullComputedKey) override; - - void print( std::ostream &out ) const override; + void print(std::ostream& out) const override; private: // methods diff --git a/src/fdb5/database/BaseArchiveVisitor.cc b/src/fdb5/database/BaseArchiveVisitor.cc index 9be4c2096..68341cddd 100644 --- a/src/fdb5/database/BaseArchiveVisitor.cc +++ b/src/fdb5/database/BaseArchiveVisitor.cc @@ -25,7 +25,7 @@ BaseArchiveVisitor::BaseArchiveVisitor(Archiver &owner, const Key& initialFieldK checkMissingKeysOnWrite_ = eckit::Resource("checkMissingKeysOnWrite", true); } -bool BaseArchiveVisitor::selectDatabase(const Key& dbKey, const TypedKey& fullComputedKey) { +bool BaseArchiveVisitor::selectDatabase(const Key& dbKey, const Key&) { LOG_DEBUG_LIB(LibFdb5) << "BaseArchiveVisitor::selectDatabase " << dbKey << std::endl; owner_.selectDatabase(dbKey); catalogue()->deselectIndex(); @@ -33,14 +33,12 @@ bool BaseArchiveVisitor::selectDatabase(const Key& dbKey, const TypedKey& fullCo return true; } -bool BaseArchiveVisitor::selectIndex(const Key& idxKey, const TypedKey& fullComputedKey) { +bool BaseArchiveVisitor::selectIndex(const Key& idxKey, const Key&) { return catalogue()->selectIndex(idxKey); } -void BaseArchiveVisitor::checkMissingKeys(const TypedKey& fullComputedKey) { - if (checkMissingKeysOnWrite_) { - fullComputedKey.validateKeys(initialFieldKey_); - } +void BaseArchiveVisitor::checkMissingKeys(const Key& fullKey) const { + if (checkMissingKeysOnWrite_) { fullKey.validateKeys(initialFieldKey_); } } const Schema& BaseArchiveVisitor::databaseSchema() const { diff --git a/src/fdb5/database/BaseArchiveVisitor.h b/src/fdb5/database/BaseArchiveVisitor.h index c313162f2..476a69027 100644 --- a/src/fdb5/database/BaseArchiveVisitor.h +++ b/src/fdb5/database/BaseArchiveVisitor.h @@ -18,7 +18,9 @@ #include "fdb5/database/WriteVisitor.h" -namespace metkit { class MarsRequest; } +namespace metkit::mars { +class MarsRequest; +} namespace fdb5 { @@ -36,25 +38,25 @@ class BaseArchiveVisitor : public WriteVisitor { BaseArchiveVisitor(Archiver& owner, const Key& initialFieldKey); protected: // methods + bool selectDatabase(const Key& dbKey, const Key&) override; - bool selectDatabase(const Key& dbKey, const TypedKey& fullComputedKey) override; + bool selectIndex(const Key& idxKey, const Key&) override; - bool selectIndex(const Key& idxKey, const TypedKey& fullComputedKey) override; - - virtual void checkMissingKeys(const TypedKey& fullComputedKey); + virtual void checkMissingKeys(const Key& fullKey) const; const Schema& databaseSchema() const override; fdb5::CatalogueWriter* catalogue() const; fdb5::Store* store() const; - const Key& initialFieldKey() const { return initialFieldKey_; } + const Key& initialFieldKey() const { return initialFieldKey_; } private: // members Archiver& owner_; - const Key initialFieldKey_; + Key initialFieldKey_; + bool checkMissingKeysOnWrite_; }; diff --git a/src/fdb5/database/BaseKey.cc b/src/fdb5/database/BaseKey.cc new file mode 100644 index 000000000..fcd8baa2b --- /dev/null +++ b/src/fdb5/database/BaseKey.cc @@ -0,0 +1,216 @@ +/* + * (C) Copyright 1996- ECMWF. + * + * This software is licensed under the terms of the Apache Licence Version 2.0 + * which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. + * In applying this licence, ECMWF does not waive the privileges and immunities + * granted to it by virtue of its status as an intergovernmental organisation nor + * does it submit to any jurisdiction. + */ + +#include "fdb5/database/BaseKey.h" + +#include "eckit/exception/Exceptions.h" +#include "eckit/log/CodeLocation.h" +#include "eckit/serialisation/Stream.h" +#include "eckit/types/Types.h" +#include "eckit/utils/StringTools.h" + +#include "metkit/mars/MarsRequest.h" + +#include +#include +#include +#include +#include + +namespace fdb5 { + +//---------------------------------------------------------------------------------------------------------------------- +// HELPERS + +namespace { + +class ReverseName { + using value_type = eckit::StringList; + +public: // methods + ReverseName() = delete; + ReverseName(const ReverseName&) = delete; + ReverseName& operator=(const ReverseName&) = delete; + ReverseName(ReverseName&&) = delete; + ReverseName& operator=(ReverseName&&) = delete; + ~ReverseName() = default; + + explicit ReverseName(const value_type& value) : value_ {value} { } + + auto begin() const -> value_type::const_reverse_iterator { return value_.rbegin(); } + + auto end() const -> value_type::const_reverse_iterator { return value_.rend(); } + +private: // members + const value_type& value_; +}; + +} // namespace + +//---------------------------------------------------------------------------------------------------------------------- +// ACCESSORS + +const std::string& BaseKey::get(const std::string& keyword) const { + + if (const auto [iter, found] = find(keyword); found) { return iter->second; } + + std::ostringstream oss; + oss << "Could not find [" + keyword + "] in " << *this; + throw eckit::SeriousBug(oss.str(), Here()); +} + +eckit::StringSet BaseKey::keys() const { + eckit::StringSet result; + for (const auto& pair : *this) { result.insert(pair.first); } + return result; +} + +metkit::mars::MarsRequest BaseKey::request(const std::string& verb) const { + return {verb, keys_}; +} + +//---------------------------------------------------------------------------------------------------------------------- +// MODIFIERS + +void BaseKey::clear() { + keys_.clear(); + names_.clear(); +} + +void BaseKey::set(const std::string& keyword, const std::string& value) { + /// @note this unfortunate (consequence of insertion-order problem) check is not fully safe + ASSERT(names_.size() == keys_.size()); + + if (const auto iter = keys_.find(keyword); iter != keys_.end()) { + iter->second = eckit::StringTools::lower(value); + } else { + names_.push_back(keyword); + keys_[keyword] = eckit::StringTools::lower(value); + } +} + +void BaseKey::unset(const std::string& keyword) { + keys_.erase(keyword); +} + +void BaseKey::push(const std::string& keyword, const std::string& value) { + keys_[keyword] = value; + names_.push_back(keyword); +} + +void BaseKey::pop(const std::string& keyword) { + keys_.erase(keyword); + ASSERT(names_.back() == keyword); + names_.pop_back(); +} + +void BaseKey::pushFrom(const BaseKey& other) { + for (const auto& keyword : other.names()) { + const auto& value = other.get(keyword); + push(keyword, value); + } +} + +void BaseKey::popFrom(const BaseKey& other) { + for (const auto& keyword : ReverseName(other.names())) { pop(keyword); } +} + +//---------------------------------------------------------------------------------------------------------------------- + +void BaseKey::decode(eckit::Stream& stream) { + + clear(); + + std::size_t size = 0; + std::string keyword; + std::string value; + + stream >> size; + for (std::size_t i = 0; i < size; ++i) { + stream >> keyword; + stream >> value; + keys_[keyword] = eckit::StringTools::lower(value); + } + + stream >> size; + names_.reserve(size); + for (std::size_t i = 0; i < size; ++i) { + stream >> keyword; + stream >> value; + names_.push_back(keyword); + } +} + +void BaseKey::encode(eckit::Stream& stream) const { + stream << keys_.size(); + for (const auto& [keyword, value] : *this) { stream << keyword << value; } + stream << names_.size(); + for (const auto& keyword : names_) { stream << keyword << ""; } // << type(keyword) +} + +size_t encodeString(const std::string& str) { + return (1 + 4 + str.length()); +} + +size_t BaseKey::encodeSize() const { + size_t size = 1 + 4; + for (const auto& [keyword, value] : keys_) { + size += encodeString(keyword) + encodeString(value); + } + size += (1 + 4); + for (const auto& keyword : names_) { + size += encodeString(keyword) + encodeString(""); + } + return size; +} + +//---------------------------------------------------------------------------------------------------------------------- + +BaseKey::operator eckit::StringDict() const { + /// @note this unfortunate (consequence of insertion-order problem) check is not fully safe + ASSERT(names_.size() == keys_.size()); + + eckit::StringDict result; + for (const auto& keyword : names()) { + const auto& value = get(keyword); + ASSERT(!value.empty()); + result[keyword] = value; + } + + return result; +} + +std::string BaseKey::toString() const { + std::string result; + + const char* sep = ""; + for (const auto& keyword : names()) { + const auto& value = get(keyword); + if (!value.empty()) { + result += sep + keyword + '=' + value; + sep = ","; + } + } + + return result; +} + +void BaseKey::print(std::ostream& out) const { + /// @note this unfortunate (consequence of insertion-order problem) check is not fully safe + if (names_.size() == size()) { + out << "{" << toString() << "}"; + } else { + out << keys_; + } +} + +//---------------------------------------------------------------------------------------------------------------------- + +} // namespace fdb5 diff --git a/src/fdb5/database/BaseKey.h b/src/fdb5/database/BaseKey.h new file mode 100644 index 000000000..8ed94629b --- /dev/null +++ b/src/fdb5/database/BaseKey.h @@ -0,0 +1,170 @@ +/* + * (C) Copyright 1996- ECMWF. + * + * This software is licensed under the terms of the Apache Licence Version 2.0 + * which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. + * In applying this licence, ECMWF does not waive the privileges and immunities + * granted to it by virtue of its status as an intergovernmental organisation nor + * does it submit to any jurisdiction. + */ + +/// @file BaseKey.h +/// @author Metin Cakircali +/// @date Oct 2024 + +#pragma once + +#include "eckit/types/Types.h" + +#include +#include +#include +#include +#include // std::pair + +namespace metkit::mars { +class MarsRequest; +} + +namespace fdb5 { + +//---------------------------------------------------------------------------------------------------------------------- + +class BaseKey { +public: // types + using pair_type = std::pair; + using value_type = eckit::StringDict; + using iterator = value_type::iterator; + using const_iterator = value_type::const_iterator; + using const_reverse_iterator = value_type::const_reverse_iterator; + +public: // methods + explicit BaseKey(value_type keys) : keys_ {std::move(keys)} { + /// @note the order of keys (in map<>) is not "insertion order" + for (const auto& key : *this) { names_.emplace_back(key.first); } + } + + BaseKey(std::initializer_list keys) : keys_(keys) { + for (const auto& key : *this) { names_.emplace_back(key.first); } + } + + explicit BaseKey(eckit::Stream& stream) { decode(stream); } + + // RULES + + BaseKey() = default; + BaseKey(const BaseKey& other) = default; + BaseKey& operator=(const BaseKey& other) = default; + BaseKey(BaseKey&& other) = default; + BaseKey& operator=(BaseKey&& other) = default; + virtual ~BaseKey() = default; + + virtual std::string type() const = 0; + + // ITERATORS + + iterator begin() noexcept { return keys_.begin(); } + + const_iterator begin() const noexcept { return keys_.begin(); } + + iterator end() noexcept { return keys_.end(); } + + const_iterator end() const noexcept { return keys_.end(); } + + const_reverse_iterator rbegin() const noexcept { return keys_.rbegin(); } + + const_reverse_iterator rend() const noexcept { return keys_.rend(); } + + std::pair find(const std::string& keyword) const { + const auto iter = keys_.find(keyword); + return {iter, iter != end()}; + } + + // ACCESSORS + + bool empty() const noexcept { return keys_.empty(); } + + std::size_t size() const noexcept { return keys_.size(); } + + /// @throws eckit::SeriousBug if 'keyword' is not found + const std::string& get(const std::string& keyword) const; + + /// @note returns a copy + /// @throws eckit::SeriousBug if 'keyword' is not found + std::string value(const std::string& keyword) const { return get(keyword); } + + eckit::StringDict keyDict() const { return keys_; } + + eckit::StringSet keys() const; + + metkit::mars::MarsRequest request(const std::string& verb = "retrieve") const; + + const eckit::StringList& names() const { return names_; } + + // MODIFIERS + + void clear(); + + void set(const std::string& keyword, const std::string& value); + + void unset(const std::string& keyword); + + void push(const std::string& keyword, const std::string& value); + + void pop(const std::string& keyword); + + void pushFrom(const BaseKey& other); + + void popFrom(const BaseKey& other); + + // OPERATORS + + const std::string& operator[](const std::string& keyword) const { return get(keyword); } + + bool operator<(const BaseKey& other) const { return keys_ < other.keys_; } + + bool operator!=(const BaseKey& other) const { return keys_ != other.keys_; } + + bool operator==(const BaseKey& other) const { return keys_ == other.keys_; } + + friend std::ostream& operator<<(std::ostream& stream, const BaseKey& key) { + key.print(stream); + return stream; + } + + friend eckit::Stream& operator>>(eckit::Stream& stream, BaseKey& key) { + key.decode(stream); + return stream; + } + + friend eckit::Stream& operator<<(eckit::Stream& stream, const BaseKey& key) { + key.encode(stream); + return stream; + } + + operator std::string() const { return toString(); } + + /// @note same as keyDict but throws when value.empty() + operator eckit::StringDict() const; + + size_t encodeSize() const; + +protected: // methods + void decode(eckit::Stream& stream); + + void encode(eckit::Stream& stream) const; + +private: // methods + std::string toString() const; + + void print(std::ostream& out) const; + +private: // members + value_type keys_; + + eckit::StringList names_; +}; + +//---------------------------------------------------------------------------------------------------------------------- + +} // namespace fdb5 diff --git a/src/fdb5/database/Catalogue.cc b/src/fdb5/database/Catalogue.cc index 573fe6c87..2fca10263 100644 --- a/src/fdb5/database/Catalogue.cc +++ b/src/fdb5/database/Catalogue.cc @@ -36,7 +36,7 @@ std::unique_ptr CatalogueImpl::buildStore() const { void Catalogue::visitEntries(EntryVisitor& visitor, bool sorted) { - std::vector all = indexes(sorted); + auto all = indexes(sorted); // Deferred reading indexes. // It is likely that many indexes in the same database share resources/files/etc. // To prevent repeated opening/closing (especially where a PooledFile would facilitate things) diff --git a/src/fdb5/database/Catalogue.h b/src/fdb5/database/Catalogue.h index 9c798035e..5ddf72cf8 100644 --- a/src/fdb5/database/Catalogue.h +++ b/src/fdb5/database/Catalogue.h @@ -14,10 +14,16 @@ #pragma once +#include +#include #include +#include +#include +#include +#include #include "eckit/config/LocalConfiguration.h" -#include "eckit/types/Types.h" +#include "eckit/thread/Mutex.h" #include "fdb5/api/helpers/ControlIterator.h" #include "fdb5/api/helpers/MoveIterator.h" @@ -25,13 +31,12 @@ #include "fdb5/database/Catalogue.h" #include "fdb5/database/Field.h" #include "fdb5/database/FieldLocation.h" -#include "fdb5/database/Key.h" #include "fdb5/database/Index.h" -#include "fdb5/api/helpers/ControlIterator.h" +#include "fdb5/database/Key.h" +#include "fdb5/database/MoveVisitor.h" #include "fdb5/database/PurgeVisitor.h" #include "fdb5/database/StatsReportVisitor.h" #include "fdb5/database/WipeVisitor.h" -#include "fdb5/database/MoveVisitor.h" #include "fdb5/rules/Schema.h" namespace fdb5 { @@ -42,9 +47,8 @@ typedef std::map IndexStore; class Catalogue { public: - - Catalogue() {} - virtual ~Catalogue() {} + Catalogue() = default; + virtual ~Catalogue() = default; virtual const Key& key() const = 0; virtual const Key& indexKey() const = 0; @@ -108,7 +112,7 @@ class CatalogueImpl : virtual public Catalogue { CatalogueImpl(const Key& key, ControlIdentifiers controlIdentifiers, const fdb5::Config& config) : dbKey_(key), config_(config), controlIdentifiers_(controlIdentifiers) {} - virtual ~CatalogueImpl() {} + ~CatalogueImpl() override {} const Key& key() const override { return dbKey_; } const Key& indexKey() const override { NOTIMP; } @@ -135,13 +139,11 @@ class CatalogueImpl : virtual public Catalogue { class CatalogueReader : virtual public Catalogue { public: - - CatalogueReader() {} - - virtual ~CatalogueReader() {} + ~CatalogueReader() override {} virtual DbStats stats() const = 0; - virtual bool axis(const std::string& keyword, eckit::StringSet& s) const { NOTIMP; } + + virtual bool axis(const std::string& /*keyword*/, eckit::DenseSet& /*string*/) const { NOTIMP; } virtual bool retrieve(const Key& key, Field& field) const = 0; }; @@ -149,9 +151,7 @@ class CatalogueReader : virtual public Catalogue { class CatalogueWriter : virtual public Catalogue { public: - - CatalogueWriter() {} - virtual ~CatalogueWriter() {} + ~CatalogueWriter() override {} virtual const Index& currentIndex() = 0; virtual const Key currentIndexKey(); diff --git a/src/fdb5/database/EntryVisitMechanism.cc b/src/fdb5/database/EntryVisitMechanism.cc index bc2b8d328..7d5bed950 100644 --- a/src/fdb5/database/EntryVisitMechanism.cc +++ b/src/fdb5/database/EntryVisitMechanism.cc @@ -8,17 +8,21 @@ * does it submit to any jurisdiction. */ -#include "fdb5/database/EntryVisitMechanism.h" - +#include "eckit/exception/Exceptions.h" #include "eckit/io/AutoCloser.h" +#include "eckit/log/Log.h" +#include "fdb5/LibFdb5.h" #include "fdb5/api/helpers/FDBToolRequest.h" +#include "fdb5/database/Engine.h" +#include "fdb5/database/EntryVisitMechanism.h" #include "fdb5/database/Manager.h" -#include "fdb5/database/Key.h" -#include "fdb5/LibFdb5.h" #include "fdb5/rules/Schema.h" #include "fdb5/database/Store.h" +#include +#include + using namespace eckit; @@ -33,14 +37,18 @@ class FDBVisitException : public eckit::Exception { //---------------------------------------------------------------------------------------------------------------------- +bool EntryVisitor::preVisitDatabase(const eckit::URI& /*uri*/, const Schema& /*schema*/) { + return true; +} + +EntryVisitor::EntryVisitor() : currentCatalogue_(nullptr), currentStore_(nullptr), currentIndex_(nullptr) {} + EntryVisitor::~EntryVisitor() { if (currentStore_) { delete currentStore_; } } -EntryVisitor::EntryVisitor() : currentCatalogue_(nullptr), currentStore_(nullptr), currentIndex_(nullptr) {} - Store& EntryVisitor::store() const { if (!currentStore_) { ASSERT(currentCatalogue_); @@ -70,19 +78,20 @@ void EntryVisitor::catalogueComplete(const Catalogue& catalogue) { } bool EntryVisitor::visitIndex(const Index& index) { + ASSERT(currentCatalogue_); currentIndex_ = &index; - rule_ = currentCatalogue_->schema().ruleFor(currentCatalogue_->key(), currentIndex_->key()); + rule_ = ¤tCatalogue_->schema().matchingRule(currentCatalogue_->key(), currentIndex_->key()); return true; } void EntryVisitor::visitDatum(const Field& field, const std::string& keyFingerprint) { ASSERT(currentCatalogue_); ASSERT(currentIndex_); - ASSERT(rule_); - Key key(keyFingerprint, *rule_); - visitDatum(field, key); -} + const auto datumKey = rule_->makeKey(keyFingerprint); + + visitDatum(field, datumKey); +} time_t EntryVisitor::indexTimestamp() const { return currentIndex_ == nullptr ? 0 : currentIndex_->timestamp(); @@ -104,32 +113,31 @@ void EntryVisitMechanism::visit(const FDBToolRequest& request, EntryVisitor& vis ASSERT(request.all() == request.request().empty()); - // TODO: Put minimim keys check into FDBToolRequest. + /// @todo Put minimim keys check into FDBToolRequest. LOG_DEBUG_LIB(LibFdb5) << "REQUEST ====> " << request.request() << std::endl; try { - - fdb5::Manager mg{dbConfig_}; + fdb5::Manager mg {dbConfig_}; std::vector uris(mg.visitableLocations(request.request(), request.all())); // n.b. it is not an error if nothing is found (especially in a sub-fdb). // And do the visitation for (const URI& uri : uris) { + if (!visitor.preVisitDatabase(uri, dbConfig_.schema())) { continue; } + /// @note: the schema of a URI returned by visitableLocations + /// matches the corresponding Engine type name + // fdb5::Engine& ng = fdb5::Engine::backend(uri.scheme()); LOG_DEBUG_LIB(LibFdb5) << "FDB processing URI " << uri << std::endl; std::unique_ptr catalogue; try { - - catalogue = CatalogueReaderFactory::instance().build(uri, dbConfig_); - } catch (fdb5::DatabaseNotFoundException& e) { - - visitor.onDatabaseNotFound(e); + catalogue = CatalogueReaderFactory::instance().build(uri, dbConfig_); - } + } catch (fdb5::DatabaseNotFoundException& e) { visitor.onDatabaseNotFound(e); } ASSERT(catalogue->open()); diff --git a/src/fdb5/database/EntryVisitMechanism.h b/src/fdb5/database/EntryVisitMechanism.h index 92139c8ca..713b90a13 100644 --- a/src/fdb5/database/EntryVisitMechanism.h +++ b/src/fdb5/database/EntryVisitMechanism.h @@ -20,13 +20,18 @@ #include "fdb5/database/DatabaseNotFoundException.h" #include "fdb5/database/Field.h" +namespace eckit { +class URI; +} + namespace fdb5 { class Catalogue; class Store; class FDBToolRequest; class Index; -class TypedKey; +class Rule; +class Key; //---------------------------------------------------------------------------------------------------------------------- @@ -41,6 +46,7 @@ class EntryVisitor : public eckit::NonCopyable { virtual bool visitIndexes() { return true; } virtual bool visitEntries() { return true; } + virtual bool preVisitDatabase(const eckit::URI& uri, const Schema& schema); virtual bool visitDatabase(const Catalogue& catalogue); // return true if Catalogue should be explored virtual bool visitIndex(const Index& index); // return true if index should be explored virtual void catalogueComplete(const Catalogue& catalogue); @@ -55,18 +61,17 @@ class EntryVisitor : public eckit::NonCopyable { Store& store() const; private: // methods - virtual void visitDatum(const Field& field, const Key& datumKey) = 0; protected: // members /// Non-owning - const Catalogue* currentCatalogue_ = nullptr; + const Catalogue* currentCatalogue_ {nullptr}; /// Owned store - mutable Store* currentStore_ = nullptr; + mutable Store* currentStore_ {nullptr}; /// Non-owning - const Index* currentIndex_ = nullptr; + const Index* currentIndex_ {nullptr}; /// Non-owning - const Rule* rule_ = nullptr; + const Rule* rule_ {nullptr}; }; //---------------------------------------------------------------------------------------------------------------------- diff --git a/src/fdb5/database/FieldLocation.cc b/src/fdb5/database/FieldLocation.cc index 4365c7f55..96b1ea0ac 100644 --- a/src/fdb5/database/FieldLocation.cc +++ b/src/fdb5/database/FieldLocation.cc @@ -127,7 +127,7 @@ FieldLocation::FieldLocation(const eckit::URI& uri) : uri_(uri) { const std::string keyStr = uri.query("remapKey"); if (!keyStr.empty()) { - remapKey_ = Key::parseString(keyStr); + remapKey_ = Key::parse(keyStr); } else { remapKey_ = Key(); } diff --git a/src/fdb5/database/FieldLocation.h b/src/fdb5/database/FieldLocation.h index 2e07ae4e8..3fcd8efac 100644 --- a/src/fdb5/database/FieldLocation.h +++ b/src/fdb5/database/FieldLocation.h @@ -36,8 +36,7 @@ namespace fdb5 { class FieldLocationVisitor; -class FieldLocation : public eckit::OwnedLock, public eckit::Streamable { - +class FieldLocation: public eckit::OwnedLock, public eckit::Streamable { public: // methods FieldLocation() : offset_(eckit::Offset(0)), length_(eckit::Length(0)), remapKey_(Key()) {} diff --git a/src/fdb5/database/Index.cc b/src/fdb5/database/Index.cc index a08db81a6..05e0d6f2d 100755 --- a/src/fdb5/database/Index.cc +++ b/src/fdb5/database/Index.cc @@ -18,10 +18,7 @@ namespace fdb5 { //---------------------------------------------------------------------------------------------------------------------- -IndexBase::IndexBase(const Key& key, const std::string& type, const Catalogue& catalogue) : - type_(type), - key_(key), - catalogue_(catalogue) {} +IndexBase::IndexBase(const Key& key, const std::string& type) : type_(type), key_(key) { } enum IndexBaseStreamKeys { IndexKeyUnrecognised, @@ -86,9 +83,7 @@ void IndexBase::decodeLegacy(eckit::Stream& s, const int version) { // decoding timestamp_ = 0; } - -IndexBase::IndexBase(eckit::Stream& s, const int version, const Catalogue& catalogue) : - catalogue_(catalogue) { +IndexBase::IndexBase(eckit::Stream& s, const int version) { if (version >= 3) decodeCurrent(s, version); else @@ -134,20 +129,19 @@ void IndexBase::put(const Key& key, const Field& field) { add(key, field); } -const TypesRegistry& IndexBase::registry() const { - if (!registry_) { - const Rule* rule = catalogue_.schema().ruleFor(catalogue_.key(), key_); - ASSERT(rule); - registry_ = std::ref(rule->registry()); - } - return registry_.value().get(); -} +// const TypesRegistry& IndexBase::registry() const { +// if (!registry_) { +// const auto& rule = catalogue_.schema().matchingRule(catalogue_.key(), key_); +// registry_ = std::ref(rule.registry()); +// } +// return registry_.value().get(); +// } bool IndexBase::partialMatch(const metkit::mars::MarsRequest& request) const { - if (!key_.partialMatch(request)) return false; + if (!key_.partialMatch(request)) { return false; } - if (!axes_.partialMatch(request, registry())) return false; + if (!axes_.partialMatch(request)) { return false; } return true; } @@ -156,6 +150,10 @@ bool IndexBase::mayContain(const Key& key) const { return axes_.contains(key); } +bool IndexBase::mayContainPartial(const Key& key) const { + return axes_.containsPartial(key); +} + const Key& IndexBase::key() const { return key_; } @@ -172,8 +170,7 @@ const IndexAxis& IndexBase::axes() const { class NullIndex : public IndexBase { public: // methods - - NullIndex() : IndexBase(Key{}, "null", NullCatalogue{}) {} + NullIndex() : IndexBase(Key {}, "null") { } private: // methods diff --git a/src/fdb5/database/Index.h b/src/fdb5/database/Index.h index 1d38114f8..43ae8ff2f 100755 --- a/src/fdb5/database/Index.h +++ b/src/fdb5/database/Index.h @@ -15,24 +15,20 @@ #ifndef fdb5_Index_H #define fdb5_Index_H -#include -#include -#include - -#include "eckit/eckit.h" +#include +#include +#include #include "eckit/io/Length.h" #include "eckit/io/Offset.h" -#include "eckit/memory/NonCopyable.h" -#include "eckit/types/FixedString.h" -#include "eckit/types/Types.h" #include "eckit/memory/Counted.h" +#include "eckit/types/Types.h" #include "fdb5/database/EntryVisitMechanism.h" #include "fdb5/database/Field.h" -#include "fdb5/database/IndexStats.h" #include "fdb5/database/IndexAxis.h" #include "fdb5/database/IndexLocation.h" +#include "fdb5/database/IndexStats.h" #include "fdb5/database/Indexer.h" @@ -55,15 +51,14 @@ class Schema; class IndexBase : public eckit::Counted { public: // methods - - IndexBase(const Key& key, const std::string& type, const Catalogue& catalogue); - IndexBase(eckit::Stream& s, const int version, const Catalogue& catalogue); + IndexBase(const Key& key, const std::string& type); + IndexBase(eckit::Stream& s, const int version); ~IndexBase() override; virtual const IndexLocation& location() const = 0; - virtual const std::vector dataURIs() const { NOTIMP; } + virtual std::vector dataURIs() const { NOTIMP; } virtual bool dirty() const = 0; @@ -88,10 +83,13 @@ class IndexBase : public eckit::Counted { virtual void encode(eckit::Stream& s, const int version) const; virtual void entries(EntryVisitor& visitor) const = 0; + + /// @note default args on virtual methods is not best practice; no guarantee that overrides will have same defaults virtual void dump(std::ostream& out, const char* indent, bool simple = false, bool dumpFields = false) const = 0; virtual bool partialMatch(const metkit::mars::MarsRequest& request) const; virtual bool mayContain(const Key& key) const; + virtual bool mayContainPartial(const Key& key) const; virtual IndexStats statistics() const = 0; @@ -104,37 +102,28 @@ class IndexBase : public eckit::Counted { void takeTimestamp() { time(×tamp_); } private: // methods + void encodeCurrent(eckit::Stream& s, int version) const; + void encodeLegacy(eckit::Stream& s, int version) const; - void encodeCurrent(eckit::Stream& s, const int version) const; - void encodeLegacy(eckit::Stream& s, const int version) const; - - void decodeCurrent(eckit::Stream& s, const int version); - void decodeLegacy(eckit::Stream& s, const int version); + void decodeCurrent(eckit::Stream& s, int version); + void decodeLegacy(eckit::Stream& s, int version); virtual void add(const Key& key, const Field &field) = 0; - const TypesRegistry& registry() const; - protected: // members std::string type_; /// @note Order of members is important here ... - IndexAxis axes_; ///< This Index spans along these axis - Key key_; ///< key that selected this index - time_t timestamp_; ///< timestamp when this Index was flushed + IndexAxis axes_; ///< This Index spans along these axis + Key key_; ///< key that selected this index + time_t timestamp_ {0}; ///< timestamp when this Index was flushed Indexer indexer_; friend std::ostream& operator<<(std::ostream& s, const IndexBase& o) { o.print(s); return s; } - -private: // members - - const Catalogue& catalogue_; - mutable std::optional> registry_; - }; //---------------------------------------------------------------------------------------------------------------------- @@ -153,14 +142,17 @@ class Index { const IndexLocation& location() const { return content_->location(); } - const std::vector dataURIs() const { return content_->dataURIs(); } + std::vector dataURIs() const { return content_->dataURIs(); } bool dirty() const { return content_->dirty(); } - void open() { return content_->open(); } - void reopen() { return content_->reopen(); } - void close() { return content_->close(); } - void flush() { return content_->flush(); } + void open() { content_->open(); } + + void reopen() { content_->reopen(); } + + void close() { content_->close(); } + + void flush() { content_->flush(); } void visit(IndexLocationVisitor& visitor) const { content_->visit(visitor); } @@ -187,6 +179,7 @@ class Index { bool partialMatch(const metkit::mars::MarsRequest& request) const { return content_->partialMatch(request); } bool mayContain(const Key& key) const { return content_->mayContain(key); } + bool mayContainPartial(const Key& key) const { return content_->mayContainPartial(key); } bool null() const { return null_; } diff --git a/src/fdb5/database/IndexAxis.cc b/src/fdb5/database/IndexAxis.cc index 6fba17385..866488938 100755 --- a/src/fdb5/database/IndexAxis.cc +++ b/src/fdb5/database/IndexAxis.cc @@ -16,9 +16,11 @@ #include "fdb5/database/AxisRegistry.h" #include "fdb5/database/IndexAxis.h" + #include "fdb5/database/Key.h" -#include "fdb5/types/TypesRegistry.h" #include "fdb5/types/Type.h" +#include "fdb5/types/TypesRegistry.h" +#include namespace fdb5 { @@ -154,7 +156,7 @@ IndexAxisStreamKeys indexAxiskeyId(const std::string& s) { if( it != keys.end() ) { return it->second; } - return IndexAxisKeyUnrecognised; + return IndexAxisKeyUnrecognised; } void IndexAxis::decodeCurrent(eckit::Stream &s, const int version) { @@ -239,45 +241,46 @@ void IndexAxis::dump(std::ostream &out, const char* indent) const { // out << std::endl; } -bool IndexAxis::partialMatch(const metkit::mars::MarsRequest& request, const TypesRegistry& registry) const { +bool IndexAxis::partialMatch(const metkit::mars::MarsRequest& request) const { // We partially match on a request // // --> keys that are in the request, but not the axis are OK (other parts of the request) // --> keys that are in the axis, but not the request are OK (list doesn't need to specify everything) // - // BUT keys tha correspond to the axis object, but do not match it, should result + // BUT keys that correspond to the axis object, but do not match it, should result // in the match failing (this will be the common outcome during the model run, when many // indexes exist) - for (const auto& kv : axis_) { - if (request.has(kv.first)) { - bool found = false; - for (const auto& rqval : request.values(kv.first)) { - if (kv.second->contains(rqval)) { - found = true; - break;; - } - std::string canonical_rqval = registry.lookupType(kv.first).toKey(rqval); - if (kv.second->contains(canonical_rqval)) { - found = true; - break;; - } - } - - if (!found) return false; + auto matchValues = [](const std::vector& rqValues, const eckit::DenseSet& values) { + if (rqValues.empty()) { return true; } + for (const auto& rqval : rqValues) { + if (values.contains(rqval)) { return true; } } + return false; + }; + + for (const auto& [keyword, values] : axis_) { + if (!matchValues(request.values(keyword, true), *values)) { return false; } } return true; } bool IndexAxis::contains(const Key& key) const { + for (const auto& [keyword, values] : axis_) { + if (!key.matchValues(keyword, *values)) { return false; } + } + return true; +} - for (AxisMap::const_iterator i = axis_.begin(); i != axis_.end(); ++i) { - - if (!key.match(i->first, *(i->second))) { +bool IndexAxis::containsPartial(const Key& key) const { + for (const auto& kv : key) { + auto it = axis_.find(kv.first); + if (it == axis_.end()) { return false; + } else { + if (!it->second->contains(kv.second)) { return false; } } } return true; @@ -286,19 +289,19 @@ bool IndexAxis::contains(const Key& key) const { void IndexAxis::insert(const Key& key) { ASSERT(!readOnly_); - for (const auto& k : key) { + for (const auto& [keyword, value] : key) { + + auto& axis_set = axis_[keyword]; - std::shared_ptr >& axis_set = axis_[k.first]; - if (!axis_set) - axis_set.reset(new eckit::DenseSet); + if (!axis_set) { axis_set = std::make_shared>(); } - axis_set->insert(key.canonicalValue(k.first)); + axis_set->insert(value); dirty_ = true; } } -/// @note: this method inserts key-value pairs into an axis in memory. +/// @note: this method inserts key-value pairs into an axis in memory. /// Intended for importing axis information from storage in the DAOS backend. /// Input values are required to be cannoicalised. void IndexAxis::insert(const std::string& axis, const std::vector& values) { diff --git a/src/fdb5/database/IndexAxis.h b/src/fdb5/database/IndexAxis.h index d57c3b51c..a718fb5a7 100644 --- a/src/fdb5/database/IndexAxis.h +++ b/src/fdb5/database/IndexAxis.h @@ -22,7 +22,7 @@ #include "eckit/container/DenseSet.h" #include "eckit/memory/NonCopyable.h" -#include "eckit/filesystem/PathName.h" +// #include "eckit/filesystem/PathName.h" #include "eckit/types/Types.h" namespace eckit { @@ -36,7 +36,8 @@ class MarsRequest; namespace fdb5 { class Key; -class TypesRegistry; + +// class TypesRegistry; //---------------------------------------------------------------------------------------------------------------------- @@ -74,8 +75,9 @@ class IndexAxis : private eckit::NonCopyable { void dump(std::ostream &out, const char* indent) const; - bool partialMatch(const metkit::mars::MarsRequest& request, const TypesRegistry& registry) const; + bool partialMatch(const metkit::mars::MarsRequest& request) const; bool contains(const Key& key) const; + bool containsPartial(const Key& key) const; /// Provide a means to test if the index has changed since it was last written out, and to /// mark that it has been written out. diff --git a/src/fdb5/database/Key.cc b/src/fdb5/database/Key.cc index bd1abee5c..a256716da 100644 --- a/src/fdb5/database/Key.cc +++ b/src/fdb5/database/Key.cc @@ -8,483 +8,163 @@ * does it submit to any jurisdiction. */ -#include -#include -#include +#include "fdb5/database/Key.h" + +#include "fdb5/database/BaseKey.h" +#include "fdb5/rules/Rule.h" +#include "fdb5/rules/Schema.h" +#include "fdb5/types/Type.h" -#include "eckit/config/Resource.h" #include "eckit/container/DenseSet.h" +#include "eckit/exception/Exceptions.h" +#include "eckit/types/Types.h" #include "eckit/utils/Tokenizer.h" -#include "eckit/utils/StringTools.h" - #include "metkit/mars/MarsRequest.h" -#include "fdb5/database/Key.h" -#include "fdb5/rules/Rule.h" -#include "fdb5/rules/Schema.h" -#include "fdb5/types/Type.h" +#include +#include +#include +#include +#include +#include namespace fdb5 { //---------------------------------------------------------------------------------------------------------------------- +// KEY -BaseKey::BaseKey(const std::string& fingerprint, const Rule& rule) { - eckit::Tokenizer parse(":", true); - eckit::StringList values; - parse(fingerprint, values); - - rule.fill(*this, values); -} - -void BaseKey::decode(eckit::Stream& s) { - - keys_.clear(); - names_.clear(); +Key Key::parse(const std::string& keyString) { - size_t n; + Key key; - s >> n; - std::string k; - std::string v; - for (size_t i = 0; i < n; ++i) { - s >> k; - s >> v; - keys_[k] = eckit::StringTools::lower(v); + for (const auto& bit : eckit::Tokenizer(",").tokenize(keyString)) { + const auto pair = eckit::Tokenizer("=").tokenize(bit); + ASSERT(pair.size() == 2); + key.push(pair[0], pair[1]); } - s >> n; - names_.reserve(n); - for (size_t i = 0; i < n; ++i) { - s >> k; - s >> v; // this is the type (ignoring FTM) - names_.push_back(k); - } -} - -size_t encodeString(const std::string& str) { - return (5 + str.length()); -} - -size_t BaseKey::encodeSize() const { - size_t size = 5; - for (const auto& [key_name, key_value] : keys_) { - size += encodeString(key_name) + encodeString(canonicalise(key_name, key_value)); - } - size += 5; - for (const auto& name : names_) { - size += encodeString(name) + encodeString(type(name)); - } - return size; + return key; } -void BaseKey::encode(eckit::Stream& s) const { - - s << keys_.size(); - for (const auto& [key_name, key_value] : keys_) { - s << key_name << canonicalise(key_name, key_value); - } +std::string Key::valuesToString() const { + std::ostringstream oss; - s << names_.size(); - for (eckit::StringList::const_iterator i = names_.begin(); i != names_.end(); ++i) { - s << (*i) << type(*i); + /// @note this unfortunate (consequence of insertion-order problem) check is not fully safe + if (names().size() != size()) { + oss << "names and keys size mismatch" << '\n' + << " names: " << names().size() << " " << names() << '\n' + << " keys: " << size() << " " << keyDict() << '\n'; + throw eckit::SeriousBug(oss.str()); } -} - -std::set BaseKey::keys() const { - std::set k; - - for (eckit::StringDict::const_iterator i = keys_.begin(); i != keys_.end(); ++i) { - k.insert(i->first); + const char* sep = ""; + for (const auto& keyword : names()) { + oss << sep << get(keyword); + sep = ":"; } - return k; -} - -void BaseKey::clear() { - keys_.clear(); - names_.clear(); + return oss.str(); } -void BaseKey::set(const std::string &k, const std::string &v) { +void Key::validateKeys(const Key& other, bool checkAlsoValues) const { - ASSERT(names_.size() == keys_.size()); + eckit::StringSet missing; + eckit::StringSet mismatch; - eckit::StringDict::iterator it = keys_.find(k); - if (it == keys_.end()) { - names_.push_back(k); - keys_[k] = eckit::StringTools::lower(v); - } else { - it->second = eckit::StringTools::lower(v); + for (const auto& [keyword, value] : other) { + if (const auto [iter, found] = find(keyword); found) { + if (checkAlsoValues && value != iter->second) { + mismatch.insert(keyword + '=' + value + " and " + iter->second); + } + } else { + missing.insert(keyword); + } } -} - -void BaseKey::unset(const std::string &k) { - keys_.erase(k); -} -void BaseKey::push(const std::string &k, const std::string &v) { - keys_[k] = v; - names_.push_back(k); -} - -void BaseKey::pop(const std::string &k) { - keys_.erase(k); - ASSERT(names_.back() == k); - names_.pop_back(); -} - -const std::string &BaseKey::get( const std::string &k ) const { - eckit::StringDict::const_iterator i = keys_.find(k); - if ( i == keys_.end() ) { + if (missing.size() || mismatch.size()) { std::ostringstream oss; - oss << "BaseKey::get() failed for [" + k + "] in " << *this; - throw eckit::SeriousBug(oss.str(), Here()); + if (missing.size()) { oss << "Keywords not used: " << missing << " "; } + if (mismatch.size()) { oss << "Values mismatch: " << mismatch << " "; } + oss << "for key=" << *this << " validating against=" << other; + throw eckit::SeriousBug(oss.str()); } - - return i->second; } -bool BaseKey::match(const BaseKey& other) const { +//---------------------------------------------------------------------------------------------------------------------- +// MATCH - for (const_iterator i = other.begin(); i != other.end(); ++i) { +bool Key::match(const Key& other) const { - const_iterator j = find(i->first); - if (j == end()) { - return false; - } + for (const auto& [keyword, value] : other) { - if (j->second != i->second && !i->second.empty()) { - return false; + if (const auto [iter, found] = find(keyword); found) { + if (iter->second == value && !value.empty()) { continue; } } + return false; } + return true; } +bool Key::match(const metkit::mars::MarsRequest& request) const { -bool BaseKey::match(const metkit::mars::MarsRequest& request) const { - - for (const auto& k : request.params()) { + // for (const auto& param : request.parameters()) { + for (const auto& param : request.params()) { - const_iterator j = find(k); - if (j == end()) { - return false; + if (auto [iter, found] = find(param); found) { + const auto& values = request.values(param); + if (std::find(values.begin(), values.end(), iter->second) != values.end()) { continue; } } - bool found=false; - const auto& values = request.values(k); - std::string can = canonicalise(k, j->second); - for (auto it = values.cbegin(); !found && it != values.cend(); it++) { - found = can == canonicalise(k, *it); - } - if (!found) { - return false; - } - } - - return true; -} - -bool BaseKey::match(const std::string &key, const eckit::DenseSet &values) const { - - eckit::StringDict::const_iterator i = find(key); - if (i == end()) { return false; } - // by default we use the exact request value. In case of mismatch, we try to canonicalise it - return values.find(i->second) != values.end() || values.find(canonicalise(key, i->second)) != values.end(); -} - -bool BaseKey::partialMatch(const metkit::mars::MarsRequest& request) const { - - for (const auto& kv : *this) { - - const auto& values = request.values(kv.first, /* emptyOk */ true); - - if (!values.empty()) { - if (std::find(values.begin(), values.end(), kv.second) == values.end()) { - return false; - } - } - } - return true; } -std::string BaseKey::canonicalValue(const std::string& keyword) const { - - eckit::StringDict::const_iterator it = keys_.find(keyword); - ASSERT(it != keys_.end()); - return canonicalise(keyword, it->second); -} - -std::string BaseKey::valuesToString() const { - - if(names_.size() != keys_.size()) { - std::stringstream ss; - ss << "names and keys size mismatch" << std::endl - << " names: " << names_.size() << " " << names_ << std::endl - << " keys: " << keys_.size() << " " << keys_ << std::endl; - - throw eckit::SeriousBug(ss.str()); - } - - std::ostringstream oss; - const char *sep = ""; - - for (eckit::StringList::const_iterator j = names_.begin(); j != names_.end(); ++j) { - eckit::StringDict::const_iterator i = keys_.find(*j); - ASSERT(i != keys_.end()); - - oss << sep; - oss << canonicalise(*j, i->second); - - sep = ":"; - } - return oss.str(); -} - - -const eckit::StringList& BaseKey::names() const { - return names_; -} - -std::string BaseKey::value(const std::string& key) const { - - eckit::StringDict::const_iterator it = keys_.find(key); - ASSERT(it != keys_.end()); - return it->second; -} - -const eckit::StringDict &BaseKey::keyDict() const { - return keys_; -} - -metkit::mars::MarsRequest BaseKey::request(const std::string& verb) const { - metkit::mars::MarsRequest req(verb); +bool Key::partialMatch(const metkit::mars::MarsRequest& request) const { - for (eckit::StringDict::const_iterator i = keys_.begin(); i != keys_.end(); ++i) { - req.setValue(i->first, i->second); - } - - return req; -} - - -fdb5::BaseKey::operator std::string() const { - ASSERT(names_.size() == keys_.size()); - return toString(); -} + for (const auto& [keyword, value] : *this) { -fdb5::BaseKey::operator eckit::StringDict() const -{ - eckit::StringDict res; + const auto& values = request.values(keyword, /* emptyOk */ true); - ASSERT(names_.size() == keys_.size()); + if (values.empty()) { continue; } - for (eckit::StringList::const_iterator j = names_.begin(); j != names_.end(); ++j) { - - eckit::StringDict::const_iterator i = keys_.find(*j); - - ASSERT(i != keys_.end()); - ASSERT(!(*i).second.empty()); - - res[*j] = canonicalise(*j, (*i).second); - } - - return res; -} - -void BaseKey::print(std::ostream &out) const { - if (names_.size() == keys_.size()) { - out << "{" << toString() << "}"; - } else { - out << keys_; - } -} - -std::string BaseKey::toString() const { - std::string res; - const char *sep = ""; - for (eckit::StringList::const_iterator j = names_.begin(); j != names_.end(); ++j) { - eckit::StringDict::const_iterator i = keys_.find(*j); - ASSERT(i != keys_.end()); - if (!i->second.empty()) { - res += sep + *j + '=' + i->second; - sep = ","; - } + if (std::find(values.begin(), values.end(), value) == values.end()) { return false; } } - return res; -} - - -//---------------------------------------------------------------------------------------------------------------------- - -Key::Key(const eckit::StringDict &keys) : - BaseKey(keys) {} -Key::Key(eckit::Stream& s) { - decode(s); + return true; } -Key::Key(std::initializer_list> l) : - BaseKey(l) {} +bool Key::matchValues(const std::string& keyword, const eckit::DenseSet& values) const { -Key::Key(const std::string& fingerprint, const Rule& rule) : - BaseKey(fingerprint, rule) {} - -Key Key::parseString(const std::string& s) { - - eckit::Tokenizer parse1(","); - eckit::Tokenizer parse2("="); - eckit::StringDict keys; - - eckit::StringList v; - parse1(s, v); - for (const auto& bit : v) { - eckit::StringList kv; - parse2(bit, kv); - ASSERT(kv.size() == 2); - keys.emplace(std::move(kv[0]), std::move(kv[1])); - } - - return Key{keys}; -} + if (const auto [iter, found] = find(keyword); found) { return values.find(iter->second) != values.end(); } -std::string Key::canonicalise(const std::string& keyword, const std::string& value) const { - return value; -} - -std::string Key::type(const std::string& keyword) const { - return ""; + return false; } //---------------------------------------------------------------------------------------------------------------------- +// TYPED KEY -TypedKey::TypedKey(const Key& key, const TypesRegistry& reg) : - BaseKey(key), registry_(std::cref(reg)) {} - -TypedKey::TypedKey(const TypesRegistry& reg) : - registry_(std::cref(reg)) {} - -TypedKey::TypedKey(const std::string &s, const Rule& rule) : - BaseKey(s, rule), - registry_(std::cref(rule.registry())) { -} - -TypedKey::TypedKey(const eckit::StringDict &keys, const TypesRegistry& reg) : - BaseKey(keys), registry_(std::cref(reg)) { -} - -TypedKey::TypedKey(eckit::Stream& s, const TypesRegistry& reg) : - registry_(std::cref(reg)) { - decode(s); -} - -TypedKey::TypedKey(std::initializer_list> l, const TypesRegistry& reg) : - BaseKey(l), registry_(std::cref(reg)) {} - -TypedKey TypedKey::parseString(const std::string &s, const TypesRegistry& registry) { - - eckit::Tokenizer parse1(","); - eckit::Tokenizer parse2("="); - TypedKey ret(std::move(registry)); - - eckit::StringList vals; - parse1(s, vals); - - for (const auto& bit : vals) { - eckit::StringList kv; - parse2(bit, kv); - ASSERT(kv.size() == 2); - - std::string v = ret.registry().lookupType(kv[0]).tidy(kv[1]); - - if (ret.find(kv[0]) == ret.end()) { - ret.push(kv[0], v); - } else { - ret.set(kv[0], v); - } - } - - return ret; -} - -void TypedKey::validateKeys(const BaseKey& other, bool checkAlsoValues) const { - - eckit::StringSet missing; - eckit::StringSet mismatch; - - for (BaseKey::const_iterator j = other.begin(); j != other.end(); ++j) { - const std::string& keyword = (*j).first; - BaseKey::const_iterator k = find(keyword); - if (k == keys_.end()) { - missing.insert(keyword); - } - else { - if(checkAlsoValues && !registry_.get().lookupType(keyword).match(keyword, j->second, k->second)) { - mismatch.insert((*j).first + "=" + j->second + " and " + k->second); - } - } - } - - if (missing.size() || mismatch.size()) { - std::ostringstream oss; - - if(missing.size()) { - oss << "Keywords not used: " << missing << " "; - } - - if(mismatch.size()) { - oss << "Values mismatch: " << mismatch << " "; - } - - oss << "for key " << *this << " validating " << other; - - throw eckit::SeriousBug(oss.str()); +Key TypedKey::tidy() const { + Key key; + for (const auto& keyword : names()) { + const auto& value = get(keyword); + value.empty() ? key.push(keyword, value) : key.push(keyword, registry_.lookupType(keyword).tidy(value)); } -} - -void TypedKey::registry(const TypesRegistry& reg) { - registry_ = std::cref(reg); -} - -const TypesRegistry& TypedKey::registry() const { - return registry_.get(); -} - -std::string TypedKey::canonicalise(const std::string& keyword, const std::string& value) const { - if (value.empty()) { - return value; - } else { - return registry().lookupType(keyword).toKey(value); - } -} - -std::string TypedKey::type(const std::string& keyword) const { - return registry().lookupType(keyword).type(); + return key; } Key TypedKey::canonical() const { - Key key{}; - - for (const auto& name: names_) { - auto m = keys_.find(name); - ASSERT(m != keys_.end()); - - key.set(name, canonicalise(name, m->second)); + Key key; + for (const auto& keyword : names()) { + const auto& value = get(keyword); + value.empty() ? key.push(keyword, value) : key.push(keyword, registry_.lookupType(keyword).toKey(value)); } - return key; } -eckit::Stream& operator>>(eckit::Stream& s, TypedKey& x) { - static TypesRegistry emptyTypesRegistry{}; - - x = TypedKey(s, emptyTypesRegistry); - return s; -} +//---------------------------------------------------------------------------------------------------------------------- -} // namespace fdb5 +} // namespace fdb5 diff --git a/src/fdb5/database/Key.h b/src/fdb5/database/Key.h index 523a7b015..7b077337b 100644 --- a/src/fdb5/database/Key.h +++ b/src/fdb5/database/Key.h @@ -13,29 +13,22 @@ /// @author Tiago Quintino /// @date Mar 2016 -#ifndef fdb5_Key_H -#define fdb5_Key_H +#pragma once -#include -#include -#include -#include -#include -#include - -#include "eckit/serialisation/Stream.h" -#include "eckit/types/Types.h" +#include "fdb5/database/BaseKey.h" #include "fdb5/types/TypesRegistry.h" +#include +#include +#include + namespace eckit { - class JSON; - template class DenseSet; +template +class DenseSet; } -namespace metkit { -namespace mars { - class MarsRequest; -} +namespace metkit::mars { +class MarsRequest; } namespace fdb5 { @@ -43,197 +36,68 @@ namespace fdb5 { class Rule; //---------------------------------------------------------------------------------------------------------------------- +// KEY -class BaseKey { - -public: // methods - - BaseKey() = default; - BaseKey(const BaseKey &key) = default; - - explicit BaseKey(const eckit::StringDict &keys) : keys_(keys) { - for (const auto& k : keys) { - names_.emplace_back(k.first); - } - } - BaseKey(std::initializer_list> l) : keys_(l) { - for (const auto& k : l) { - names_.emplace_back(k.first); - } - } - BaseKey(const std::string& fingerprint, const Rule& rule); +class Key : public BaseKey { +public: // factory + static Key parse(const std::string& keyString); - virtual ~BaseKey() = default; +public: // methods + using BaseKey::BaseKey; - std::set keys() const; + std::string type() const override { return "Key"; } - void set(const std::string &k, const std::string &v); - void unset(const std::string &k); + std::string valuesToString() const; - void push(const std::string &k, const std::string &v); - void pop(const std::string &k); + /// @throws When "other" doesn't contain all the keys of "this" + void validateKeys(const Key& other, bool checkAlsoValues = false) const; - const std::string& get( const std::string &k ) const; + // MATCH - void clear(); + bool match(const Key& other) const; - bool match(const BaseKey& other) const; bool match(const metkit::mars::MarsRequest& request) const; - bool match(const std::string& key, const eckit::DenseSet& values) const; - /// test that, if keys are present in the supplied request, they match the /// keys present in the key. Essentially implements a reject-filter bool partialMatch(const metkit::mars::MarsRequest& request) const; - bool operator< (const BaseKey& other) const { - return keys_ < other.keys_; - } - - bool operator!= (const BaseKey& other) const { - return keys_ != other.keys_; - } - - bool operator== (const BaseKey& other) const { - return keys_ == other.keys_; - } - - friend std::ostream& operator<<(std::ostream &s, const BaseKey& x) { - x.print(s); - return s; - } - - friend eckit::Stream& operator<<(eckit::Stream &s, const BaseKey& x) { - x.encode(s); - return s; - } - - std::string valuesToString() const; - - const eckit::StringList& names() const; - - std::string value(const std::string& keyword) const; - std::string canonicalValue(const std::string& keyword) const; - - typedef eckit::StringDict::const_iterator const_iterator; - typedef eckit::StringDict::const_reverse_iterator const_reverse_iterator; - - const_iterator begin() const { return keys_.begin(); } - const_iterator end() const { return keys_.end(); } - - const_reverse_iterator rbegin() const { return keys_.rbegin(); } - const_reverse_iterator rend() const { return keys_.rend(); } - - const_iterator find(const std::string& s) const { return keys_.find(s); } - - size_t size() const { return keys_.size(); } - - bool empty() const { return keys_.empty(); } - - const eckit::StringDict& keyDict() const; - - metkit::mars::MarsRequest request(const std::string& verb = "retrieve") const; - - operator std::string() const; - - virtual operator eckit::StringDict() const; - - size_t encodeSize() const; - -protected: // members - - //TODO add unit test for each type - virtual std::string canonicalise(const std::string& keyword, const std::string& value) const = 0; - virtual std::string type(const std::string& keyword) const = 0; - - void print( std::ostream &out ) const; - void decode(eckit::Stream& s); - void encode(eckit::Stream &s) const; - -private: // methods - - std::string toString() const; - -protected: // members - - eckit::StringDict keys_; - eckit::StringList names_; -}; - - -//---------------------------------------------------------------------------------------------------------------------- - -class Key : public BaseKey { - -public: // methods - - explicit Key() = default; - explicit Key(eckit::Stream &); - explicit Key(const eckit::StringDict &keys); - explicit Key(const std::string& fingerprint, const Rule& rule); - Key(std::initializer_list>); - - static Key parseString(const std::string& s); - - friend eckit::Stream& operator>>(eckit::Stream& s, Key& x) { - x = Key(s); - return s; - } - -private: // members - - std::string canonicalise(const std::string& keyword, const std::string& value) const override; - std::string type(const std::string& keyword) const override; - + bool matchValues(const std::string& keyword, const eckit::DenseSet& values) const; }; //---------------------------------------------------------------------------------------------------------------------- +// TYPED KEY class TypedKey : public BaseKey { +public: // methods + explicit TypedKey(const TypesRegistry& reg) : registry_ {reg} { } -public: // methods + // RULES + TypedKey(const TypedKey& other) = delete; + TypedKey& operator=(const TypedKey& other) = delete; + TypedKey(TypedKey&& other) = delete; + TypedKey& operator=(TypedKey&& other) = delete; + ~TypedKey() = default; - explicit TypedKey(const Key& key, const TypesRegistry& reg); - explicit TypedKey(const TypesRegistry& reg); - explicit TypedKey(eckit::Stream &, const TypesRegistry& reg); - explicit TypedKey(const std::string &keys, const Rule& rule); - explicit TypedKey(const eckit::StringDict &keys, const TypesRegistry& reg); - TypedKey(std::initializer_list>, const TypesRegistry& reg); + std::string type() const override { return "TypedKey"; } - static TypedKey parseString(const std::string&, const TypesRegistry& reg); + Key tidy() const; Key canonical() const; - /// @throws When "other" doesn't contain all the keys of "this" - void validateKeys(const BaseKey& other, bool checkAlsoValues = false) const; - - friend eckit::Stream& operator>>(eckit::Stream& s, TypedKey& x); - - // Registry is needed before we can stringise/canonicalise. - void registry(const TypesRegistry& reg); - [[ nodiscard ]] - const TypesRegistry& registry() const; - -private: // members - - //TODO add unit test for each type - std::string canonicalise(const std::string& keyword, const std::string& value) const override; - std::string type(const std::string& keyword) const override; - - std::reference_wrapper registry_; +private: // members + const TypesRegistry& registry_; }; //---------------------------------------------------------------------------------------------------------------------- -} // namespace fdb5 +} // namespace fdb5 namespace std { - template <> - struct hash { - size_t operator() (const fdb5::Key& key) const { - return std::hash()(key.valuesToString()); - } - }; -} -#endif +template<> +struct hash { + size_t operator()(const fdb5::Key& key) const { return std::hash()(key.valuesToString()); } +}; + +} // namespace std diff --git a/src/fdb5/database/Manager.cc b/src/fdb5/database/Manager.cc index 3d888bf84..1bda034b8 100644 --- a/src/fdb5/database/Manager.cc +++ b/src/fdb5/database/Manager.cc @@ -223,7 +223,7 @@ std::set Manager::engines(const metkit::mars::MarsRequest& rq, bool // Match all possible expansions of the first level according to the schema std::set keys; - config_.schema().matchFirstLevel(rq, keys, ""); + config_.schema().matchDatabase(rq, keys, ""); std::set expandedKeys; for (auto k = keys.begin(); k != keys.end(); ++k) { @@ -273,7 +273,7 @@ std::vector Manager::visitableLocations(const metkit::mars::MarsRequ std::set engines = Manager::engines(rq, all); - LOG_DEBUG_LIB(LibFdb5) << "Matching engines for request " << rq << " -> " << engines << std::endl; + LOG_DEBUG_LIB(LibFdb5) << "Matching engines for request " << rq << (all ? " ALL" : "") << " -> " << engines << std::endl; std::vector r; // union of all locations diff --git a/src/fdb5/database/MoveVisitor.h b/src/fdb5/database/MoveVisitor.h index e6efd693d..2d1e76e25 100644 --- a/src/fdb5/database/MoveVisitor.h +++ b/src/fdb5/database/MoveVisitor.h @@ -37,8 +37,10 @@ class MoveVisitor : public EntryVisitor { bool visitIndexes() override { return false; } bool visitEntries() override { return false; } - bool visitIndex(const Index&) override { NOTIMP; } - void visitDatum(const Field&, const Key&) override { NOTIMP; } + bool visitIndex(const Index& /*index*/) override { NOTIMP; } + + void visitDatum(const Field& /*field*/, const Key& /**/) override { NOTIMP; } + void visitDatum(const Field& /*field*/, const std::string& /*keyFingerprint*/) override { NOTIMP; } protected: // members @@ -50,4 +52,3 @@ class MoveVisitor : public EntryVisitor { //---------------------------------------------------------------------------------------------------------------------- } // namespace fdb5 - diff --git a/src/fdb5/database/MultiRetrieveVisitor.cc b/src/fdb5/database/MultiRetrieveVisitor.cc index c5de99e40..518d7924c 100644 --- a/src/fdb5/database/MultiRetrieveVisitor.cc +++ b/src/fdb5/database/MultiRetrieveVisitor.cc @@ -11,18 +11,19 @@ #include "fdb5/database/MultiRetrieveVisitor.h" #include +#include +#include +#include -#include "eckit/config/Resource.h" +#include "eckit/log/Log.h" #include "fdb5/LibFdb5.h" +#include "fdb5/api/helpers/ListElement.h" #include "fdb5/database/Catalogue.h" #include "fdb5/database/Key.h" -#include "fdb5/io/HandleGatherer.h" #include "fdb5/types/Type.h" #include "fdb5/types/TypesRegistry.h" - - namespace fdb5 { //---------------------------------------------------------------------------------------------------------------------- @@ -42,9 +43,9 @@ MultiRetrieveVisitor::~MultiRetrieveVisitor() { // From Visitor -bool MultiRetrieveVisitor::selectDatabase(const Key& dbKey, const TypedKey& fullComputedKey) { +bool MultiRetrieveVisitor::selectDatabase(const Key& dbKey, const Key& /* fullKey */) { - LOG_DEBUG_LIB(LibFdb5) << "FDB5 selectDatabase " << dbKey << std::endl; + LOG_DEBUG_LIB(LibFdb5) << "FDB5 selectDatabase " << dbKey << std::endl; /* is it the current DB ? */ @@ -87,26 +88,25 @@ bool MultiRetrieveVisitor::selectDatabase(const Key& dbKey, const TypedKey& full } } -bool MultiRetrieveVisitor::selectIndex(const Key& idxKey, const TypedKey&) { +bool MultiRetrieveVisitor::selectIndex(const Key& idxKey, const Key& /* fullKey */) { ASSERT(catalogue_); LOG_DEBUG_LIB(LibFdb5) << "selectIndex " << idxKey << std::endl; return catalogue_->selectIndex(idxKey); } -bool MultiRetrieveVisitor::selectDatum(const TypedKey& datumKey, const TypedKey& full) { +bool MultiRetrieveVisitor::selectDatum(const Key& datumKey, const Key& fullKey) { ASSERT(catalogue_); - LOG_DEBUG_LIB(LibFdb5) << "selectDatum " << datumKey << ", " << full << std::endl; + LOG_DEBUG_LIB(LibFdb5) << "selectDatum " << datumKey << ", " << fullKey << std::endl; Field field; - if (catalogue_->retrieve(datumKey.canonical(), field)) { + if (catalogue_->retrieve(datumKey, field)) { Key simplifiedKey; - for (auto k = datumKey.begin(); k != datumKey.end(); k++) { - if (!k->second.empty()) - simplifiedKey.set(k->first, k->second); + for (const auto& [keyword, value] : datumKey) { + if (!value.empty()) { simplifiedKey.push(keyword, value); } } - iterator_.emplace(ListElement({catalogue_->key(), catalogue_->indexKey(), simplifiedKey}, field.stableLocation(), field.timestamp())); + iterator_.emplace({catalogue_->key(), catalogue_->indexKey(), simplifiedKey, field.stableLocation(), field.timestamp()}); return true; } @@ -120,16 +120,16 @@ void MultiRetrieveVisitor::values(const metkit::mars::MarsRequest &request, eckit::StringList list; registry.lookupType(keyword).getValues(request, keyword, list, wind_, catalogue_); - eckit::StringSet filter; + eckit::DenseSet filter; bool toFilter = false; if (catalogue_) { toFilter = catalogue_->axis(keyword, filter); } - for(const auto& l: list) { - std::string v = registry.lookupType(keyword).toKey(l); + for (const auto& value : list) { + std::string v = registry.lookupType(keyword).toKey(value); if (!toFilter || filter.find(v) != filter.end()) { - values.push_back(l); + values.push_back(value); } } } diff --git a/src/fdb5/database/MultiRetrieveVisitor.h b/src/fdb5/database/MultiRetrieveVisitor.h index 98748f2ca..120b4418d 100644 --- a/src/fdb5/database/MultiRetrieveVisitor.h +++ b/src/fdb5/database/MultiRetrieveVisitor.h @@ -48,18 +48,18 @@ class MultiRetrieveVisitor : public ReadVisitor { // From Visitor - bool selectDatabase(const Key& dbKey, const TypedKey& fullComputedKey) override; + bool selectDatabase(const Key& dbKey, const Key& fullKey) override; - bool selectIndex(const Key& idxKey, const TypedKey& fullComputedKey) override; + bool selectIndex(const Key& idxKey, const Key& fullKey) override; - bool selectDatum(const TypedKey& datumKey, const TypedKey& fullComputedKey) override; + bool selectDatum(const Key& datumKey, const Key& fullKey) override; - virtual void values(const metkit::mars::MarsRequest& request, - const std::string& keyword, - const TypesRegistry& registry, - eckit::StringList& values) override; + void values(const metkit::mars::MarsRequest& request, + const std::string& keyword, + const TypesRegistry& registry, + eckit::StringList& values) override; - void print( std::ostream &out ) const override; + void print(std::ostream& out) const override; const Schema& databaseSchema() const override; diff --git a/src/fdb5/database/ReadVisitor.h b/src/fdb5/database/ReadVisitor.h index e7dbb6612..f568eb03e 100644 --- a/src/fdb5/database/ReadVisitor.h +++ b/src/fdb5/database/ReadVisitor.h @@ -22,16 +22,13 @@ #include "fdb5/database/Catalogue.h" #include "eckit/types/Types.h" -namespace metkit { -namespace mars { - class MarsRequest; -} +namespace metkit::mars { +class MarsRequest; } namespace fdb5 { class Key; -class TypedKey; class TypesRegistry; class Store; class Schema; @@ -46,9 +43,9 @@ class ReadVisitor : public eckit::NonCopyable { virtual ~ReadVisitor() {} - virtual bool selectDatabase(const Key& dbKey, const TypedKey& fullComputedKey) = 0; - virtual bool selectIndex(const Key& idxKey, const TypedKey& fullComputedKey) = 0; - virtual bool selectDatum(const TypedKey& datumKey, const TypedKey& fullComputedKey) = 0; + virtual bool selectDatabase(const Key& dbKey, const Key& fullKey) = 0; + virtual bool selectIndex(const Key& idxKey, const Key& fullKey) = 0; + virtual bool selectDatum(const Key& datumKey, const Key& fullKey) = 0; // Once we have selected a database, return its schema. Used for further iteration. virtual const Schema& databaseSchema() const = 0; diff --git a/src/fdb5/database/RetrieveVisitor.cc b/src/fdb5/database/RetrieveVisitor.cc index dfaea4244..67b11eb05 100644 --- a/src/fdb5/database/RetrieveVisitor.cc +++ b/src/fdb5/database/RetrieveVisitor.cc @@ -29,12 +29,9 @@ RetrieveVisitor::RetrieveVisitor(const Notifier &wind, HandleGatherer &gatherer) store_(nullptr), wind_(wind), gatherer_(gatherer) { } -RetrieveVisitor::~RetrieveVisitor() { -} - // From Visitor -bool RetrieveVisitor::selectDatabase(const Key& dbKey, const TypedKey&) { +bool RetrieveVisitor::selectDatabase(const Key& dbKey, const Key& /*fullKey*/) { if(catalogue_) { if(dbKey == catalogue_->key()) { @@ -57,22 +54,22 @@ bool RetrieveVisitor::selectDatabase(const Key& dbKey, const TypedKey&) { if (!catalogue_->open()) { eckit::Log::info() << "Database does not exists " << dbKey << std::endl; return false; - } else { - return true; } + + return true; } -bool RetrieveVisitor::selectIndex(const Key& idxKey, const TypedKey& fullComputedKey) { +bool RetrieveVisitor::selectIndex(const Key& idxKey, const Key& /*fullKey*/) { ASSERT(catalogue_); return catalogue_->selectIndex(idxKey); } -bool RetrieveVisitor::selectDatum(const TypedKey& datumKey, const TypedKey&) { +bool RetrieveVisitor::selectDatum(const Key& datumKey, const Key& /*fullKey*/) { ASSERT(catalogue_); Field field; - eckit::DataHandle *dh = nullptr; - if (catalogue_->retrieve(datumKey.canonical(), field)) { + eckit::DataHandle* dh = nullptr; + if (catalogue_->retrieve(datumKey, field)) { dh = store().retrieve(field); } @@ -90,7 +87,7 @@ void RetrieveVisitor::values(const metkit::mars::MarsRequest &request, eckit::StringList list; registry.lookupType(keyword).getValues(request, keyword, list, wind_, catalogue_); - eckit::StringSet filter; + eckit::DenseSet filter; bool toFilter = false; if (catalogue_) { toFilter = catalogue_->axis(keyword, filter); diff --git a/src/fdb5/database/RetrieveVisitor.h b/src/fdb5/database/RetrieveVisitor.h index 80a469fbc..7f7f9ab19 100644 --- a/src/fdb5/database/RetrieveVisitor.h +++ b/src/fdb5/database/RetrieveVisitor.h @@ -33,18 +33,14 @@ class RetrieveVisitor : public ReadVisitor { RetrieveVisitor(const Notifier &wind, HandleGatherer &gatherer); - ~RetrieveVisitor(); - - -private: // methods - +protected: // methods // From Visitor - bool selectDatabase(const Key& dbKey, const TypedKey& fullComputedKey) override; + bool selectDatabase(const Key& dbKey, const Key& fullKey) override; - bool selectIndex(const Key& idxKey, const TypedKey& fullComputedKey) override; + bool selectIndex(const Key& idxKey, const Key& fullKey) override; - bool selectDatum(const TypedKey& datumKey, const TypedKey& fullComputedKey) override; + bool selectDatum(const Key& datumKey, const Key& fullKey) override; void values(const metkit::mars::MarsRequest& request, const std::string& keyword, diff --git a/src/fdb5/database/Store.cc b/src/fdb5/database/Store.cc index 2c18203ef..f2dd9f707 100644 --- a/src/fdb5/database/Store.cc +++ b/src/fdb5/database/Store.cc @@ -28,11 +28,11 @@ void Store::archive(const Key& key, const void *data, eckit::Length length, std: catalogue_archive(archive(key, data, length)); } -std::unique_ptr Store::archive(const Key& key, const void *data, eckit::Length length) { +std::unique_ptr Store::archive(const Key& /*key*/, const void* /*data*/, eckit::Length /*length*/) { NOTIMP; } -bool Store::canMoveTo(const Key&, const Config&, const eckit::URI& dest) const { +bool Store::canMoveTo(const Key& /*key*/, const Config& /*config*/, const eckit::URI& /*dest*/) const { std::stringstream ss; ss << "Store type " << type() << " does not support move" << std::endl; throw eckit::UserError(ss.str(), Here()); diff --git a/src/fdb5/database/UriStore.h b/src/fdb5/database/UriStore.h index 0a4978ab6..60525cd95 100644 --- a/src/fdb5/database/UriStore.h +++ b/src/fdb5/database/UriStore.h @@ -18,11 +18,12 @@ #include "eckit/filesystem/PathName.h" #include "eckit/filesystem/URI.h" -#include "eckit/io/DataHandle.h" #include "eckit/io/Length.h" #include "eckit/io/Offset.h" #include "eckit/memory/NonCopyable.h" +#include + namespace eckit { class Stream; } diff --git a/src/fdb5/database/WipeVisitor.h b/src/fdb5/database/WipeVisitor.h index 272e15931..77bb078c8 100644 --- a/src/fdb5/database/WipeVisitor.h +++ b/src/fdb5/database/WipeVisitor.h @@ -37,7 +37,9 @@ class WipeVisitor : public EntryVisitor { ~WipeVisitor() override; bool visitEntries() override { return false; } - void visitDatum(const Field&, const Key&) override { NOTIMP; } + + void visitDatum(const Field& /*field*/, const Key& /*datumKey*/) override { NOTIMP; } + void visitDatum(const Field& /*field*/, const std::string& /*keyFingerprint*/) override { NOTIMP; } protected: // members diff --git a/src/fdb5/database/WriteVisitor.cc b/src/fdb5/database/WriteVisitor.cc index 02864b71c..7636960ae 100644 --- a/src/fdb5/database/WriteVisitor.cc +++ b/src/fdb5/database/WriteVisitor.cc @@ -14,15 +14,10 @@ namespace fdb5 { //---------------------------------------------------------------------------------------------------------------------- -WriteVisitor::WriteVisitor(std::vector &prev) : - prev_(prev), - rule_(0) { +WriteVisitor::WriteVisitor(std::vector& prev) : prev_(prev) { prev.resize(3); } -WriteVisitor::~WriteVisitor() { -} - //---------------------------------------------------------------------------------------------------------------------- } // namespace fdb5 diff --git a/src/fdb5/database/WriteVisitor.h b/src/fdb5/database/WriteVisitor.h index 6d09055b0..dd3dd3479 100644 --- a/src/fdb5/database/WriteVisitor.h +++ b/src/fdb5/database/WriteVisitor.h @@ -20,11 +20,12 @@ #include #include "eckit/memory/NonCopyable.h" -#include "eckit/types/Types.h" #include "fdb5/database/Key.h" -namespace metkit { class MarsRequest; } +namespace metkit::mars { +class MarsRequest; +} namespace fdb5 { @@ -36,14 +37,13 @@ class Schema; class WriteVisitor : public eckit::NonCopyable { public: // methods + WriteVisitor(std::vector&); - WriteVisitor(std::vector &); + virtual ~WriteVisitor() = default; - virtual ~WriteVisitor(); - - virtual bool selectDatabase(const Key& dbKey, const TypedKey& fullComputedKey) = 0; - virtual bool selectIndex(const Key& idxKey, const TypedKey& fullComputedKey) = 0; - virtual bool selectDatum(const TypedKey& datumKey, const TypedKey& fullComputedKey) = 0; + virtual bool selectDatabase(const Key& dbKey, const Key& fullKey) = 0; + virtual bool selectIndex(const Key& idxKey, const Key& fullKey) = 0; + virtual bool selectDatum(const Key& datumKey, const Key& fullKey) = 0; // Once we have selected a database, return its schema. Used for further iteration. virtual const Schema& databaseSchema() const = 0; @@ -70,8 +70,7 @@ class WriteVisitor : public eckit::NonCopyable { std::vector &prev_; - const Rule *rule_; // Last rule used - + const Rule* rule_ {nullptr}; // Last rule used }; //---------------------------------------------------------------------------------------------------------------------- diff --git a/src/fdb5/message/MessageArchiver.cc b/src/fdb5/message/MessageArchiver.cc index 0cc059cbf..84cad85c0 100644 --- a/src/fdb5/message/MessageArchiver.cc +++ b/src/fdb5/message/MessageArchiver.cc @@ -76,7 +76,7 @@ std::vector make_filter_requests(const std::string& s if(str.empty()) return {}; - std::set keys = fdb5::Key::parseString(str).keys(); //< keys to filter from that request + std::set keys = Key::parse(str).keys(); //< keys to filter from that request std::vector v = str_to_requests(str); diff --git a/src/fdb5/message/MessageDecoder.cc b/src/fdb5/message/MessageDecoder.cc index cd71d57d0..f40accce6 100755 --- a/src/fdb5/message/MessageDecoder.cc +++ b/src/fdb5/message/MessageDecoder.cc @@ -8,7 +8,6 @@ * does it submit to any jurisdiction. */ -#include #include #include "fdb5/message/MessageDecoder.h" @@ -16,8 +15,6 @@ #include "eckit/message/Reader.h" #include "eckit/message/Message.h" -#include "metkit/mars/MarsExpandContext.h" -#include "metkit/mars/MarsLanguage.h" #include "metkit/mars/Type.h" namespace fdb5 { @@ -30,13 +27,13 @@ class KeySetter : public eckit::message::MetadataGatherer { } void setValue(const std::string& key, long value) override { - if (key_.find(key) == key_.end()) { + if (const auto [iter, found] = key_.find(key); !found) { key_.set(key, std::to_string(value)); } } void setValue(const std::string& key, double value) override { - if (key_.find(key) == key_.end()) { + if (const auto [iter, found] = key_.find(key); !found) { key_.set(key, std::to_string(value)); } } diff --git a/src/fdb5/rules/MatchAlways.cc b/src/fdb5/rules/MatchAlways.cc index af0554a49..90fcd9df7 100644 --- a/src/fdb5/rules/MatchAlways.cc +++ b/src/fdb5/rules/MatchAlways.cc @@ -8,7 +8,7 @@ * does it submit to any jurisdiction. */ -#include "eckit/log/Log.h" +#include #include "fdb5/rules/MatchAlways.h" #include "fdb5/types/TypesRegistry.h" @@ -22,10 +22,6 @@ eckit::ClassSpec MatchAlways::classSpec_ = { &Matcher::classSpec(), "MatchAlways eckit::Reanimator MatchAlways::reanimator_; -MatchAlways::MatchAlways() : - Matcher() { -} - MatchAlways::MatchAlways(eckit::Stream&) : Matcher() { } @@ -33,13 +29,6 @@ MatchAlways::MatchAlways(eckit::Stream&) : void MatchAlways::encode(eckit::Stream& s) const { } -MatchAlways::~MatchAlways() { -} - -bool MatchAlways::match(const std::string&, const Key&) const { - return true; -} - void MatchAlways::dump(std::ostream &s, const std::string &keyword, const TypesRegistry ®istry) const { registry.dump(s, keyword); } diff --git a/src/fdb5/rules/MatchAlways.h b/src/fdb5/rules/MatchAlways.h index 9d699c7b6..2dd959af2 100644 --- a/src/fdb5/rules/MatchAlways.h +++ b/src/fdb5/rules/MatchAlways.h @@ -16,6 +16,7 @@ #pragma once #include +#include #include "fdb5/rules/Matcher.h" @@ -26,13 +27,12 @@ namespace fdb5 { class MatchAlways : public Matcher { public: // methods + MatchAlways() = default; - MatchAlways(); MatchAlways(eckit::Stream& s); + bool match(const std::string& /*keyword*/, const Key& /*key*/) const override { return true; } - ~MatchAlways() override; - - bool match(const std::string& keyword, const Key& key) const override; + bool match(const std::string& /*value*/) const override { return true; } void dump(std::ostream& s, const std::string& keyword, const TypesRegistry& registry) const override; diff --git a/src/fdb5/rules/MatchAny.cc b/src/fdb5/rules/MatchAny.cc index 2ecee2510..3cb6bf582 100644 --- a/src/fdb5/rules/MatchAny.cc +++ b/src/fdb5/rules/MatchAny.cc @@ -8,6 +8,8 @@ * does it submit to any jurisdiction. */ +#include + #include "fdb5/database/Key.h" #include "fdb5/rules/MatchAny.h" #include "fdb5/types/TypesRegistry.h" @@ -21,7 +23,7 @@ eckit::ClassSpec MatchAny::classSpec_ = { &Matcher::classSpec(), "MatchAny", }; eckit::Reanimator MatchAny::reanimator_; -MatchAny::MatchAny(const std::set &values) : +MatchAny::MatchAny(const std::set& values) : Matcher(), values_(values) { } @@ -46,18 +48,15 @@ void MatchAny::encode(eckit::Stream& s) const { } } -MatchAny::~MatchAny() { -} - bool MatchAny::match(const std::string &keyword, const Key& key) const { - auto i = key.find(keyword); + if (const auto [iter, found] = key.find(keyword); found) { return match(iter->second); } - if (i == key.end()) { - return false; - } + return false; +} - return (values_.find(i->second) != values_.end()); +bool MatchAny::match(const std::string& value) const { + return (values_.find(value) != values_.end()); } void MatchAny::dump(std::ostream &s, const std::string &keyword, const TypesRegistry ®istry) const { diff --git a/src/fdb5/rules/MatchAny.h b/src/fdb5/rules/MatchAny.h index 37294c9cb..d534429a1 100644 --- a/src/fdb5/rules/MatchAny.h +++ b/src/fdb5/rules/MatchAny.h @@ -17,6 +17,7 @@ #include #include +#include #include "fdb5/rules/Matcher.h" @@ -31,7 +32,7 @@ class MatchAny : public Matcher{ MatchAny(const std::set &values); MatchAny(eckit::Stream& s); - ~MatchAny() override; + bool match(const std::string& value) const override; bool match(const std::string& keyword, const Key& key) const override; diff --git a/src/fdb5/rules/MatchHidden.cc b/src/fdb5/rules/MatchHidden.cc index 8dc5fb8d6..dbb30c59a 100644 --- a/src/fdb5/rules/MatchHidden.cc +++ b/src/fdb5/rules/MatchHidden.cc @@ -8,17 +8,16 @@ * does it submit to any jurisdiction. */ -#include "eckit/log/Log.h" +#include +#include +#include #include "fdb5/rules/MatchHidden.h" #include "fdb5/database/Key.h" -#include "eckit/types/Types.h" #include "fdb5/types/TypesRegistry.h" namespace fdb5 { -static std::string empty; - //---------------------------------------------------------------------------------------------------------------------- eckit::ClassSpec MatchHidden::classSpec_ = { &Matcher::classSpec(), "MatchHidden", }; @@ -26,20 +25,16 @@ eckit::ClassSpec MatchHidden::classSpec_ = { &Matcher::classSpec(), "MatchHidden eckit::Reanimator MatchHidden::reanimator_; -MatchHidden::MatchHidden(const std::string &def) : - Matcher() { - default_.push_back(def); -} +MatchHidden::MatchHidden(std::string def): default_ {std::move(def)} { } + +MatchHidden::MatchHidden(eckit::Stream& stream) : Matcher() { -MatchHidden::MatchHidden(eckit::Stream& s) : - Matcher() { - size_t numValues; std::string value; - s >> numValues; + stream >> numValues; for (size_t i=0; i < numValues; i++) { - s >> value; + stream >> value; default_.push_back(value); } } @@ -51,17 +46,6 @@ void MatchHidden::encode(eckit::Stream& s) const { } } -MatchHidden::~MatchHidden() { -} - -bool MatchHidden::match(const std::string&, const Key&) const { - return true; -} - -bool MatchHidden::optional() const { - return true; -} - const std::string &MatchHidden::value(const Key&, const std::string&) const { return default_[0]; } diff --git a/src/fdb5/rules/MatchHidden.h b/src/fdb5/rules/MatchHidden.h index 115dc916c..b0f339fb6 100644 --- a/src/fdb5/rules/MatchHidden.h +++ b/src/fdb5/rules/MatchHidden.h @@ -18,6 +18,7 @@ #include #include +#include #include "fdb5/rules/Matcher.h" @@ -28,13 +29,13 @@ namespace fdb5 { class MatchHidden : public Matcher{ public: // methods + MatchHidden(std::string def); - MatchHidden(const std::string &def); MatchHidden(eckit::Stream& s); - ~MatchHidden() override; + bool match(const std::string& /*value*/) const override { return true; } - bool match(const std::string& keyword, const Key& key) const override; + bool match(const std::string& /*keyword*/, const Key& /*key*/) const override { return true; } void dump(std::ostream& s, const std::string& keyword, const TypesRegistry& registry) const override; @@ -42,15 +43,16 @@ class MatchHidden : public Matcher{ static const eckit::ClassSpec& classSpec() { return classSpec_; } private: // methods + void encode(eckit::Stream& stream) const override; - void encode(eckit::Stream&) const override; + bool optional() const override { return true; } - bool optional() const override; const std::string &value(const Key&, const std::string& keyword) const override; const std::vector& values(const metkit::mars::MarsRequest& rq, const std::string& keyword) const override; - void print( std::ostream& out ) const override; const std::string& defaultValue() const override; + void print(std::ostream& out) const override; + private: // members static eckit::ClassSpec classSpec_; diff --git a/src/fdb5/rules/MatchOptional.cc b/src/fdb5/rules/MatchOptional.cc index 8f90170b2..42ca6c9f2 100644 --- a/src/fdb5/rules/MatchOptional.cc +++ b/src/fdb5/rules/MatchOptional.cc @@ -8,10 +8,10 @@ * does it submit to any jurisdiction. */ -#include "fdb5/rules/MatchOptional.h" +#include +#include -#include "eckit/log/Log.h" -#include "eckit/types/Types.h" +#include "fdb5/rules/MatchOptional.h" #include "metkit/mars/MarsRequest.h" @@ -20,8 +20,6 @@ namespace fdb5 { -static std::string empty; - //---------------------------------------------------------------------------------------------------------------------- eckit::ClassSpec MatchOptional::classSpec_ = { &Matcher::classSpec(), "MatchOptional", }; @@ -29,14 +27,11 @@ eckit::ClassSpec MatchOptional::classSpec_ = { &Matcher::classSpec(), "MatchOpti eckit::Reanimator MatchOptional::reanimator_; -MatchOptional::MatchOptional(const std::string &def) : - Matcher() { - default_.push_back(def); -} +MatchOptional::MatchOptional(std::string def): default_ {std::move(def)} { } MatchOptional::MatchOptional(eckit::Stream& s) : Matcher() { - + size_t numValues; std::string value; @@ -54,7 +49,8 @@ void MatchOptional::encode(eckit::Stream& s) const { } } -MatchOptional::~MatchOptional() { +bool MatchOptional::match(const std::string& /*value*/) const { + return true; } bool MatchOptional::match(const std::string&, const Key&) const { @@ -65,20 +61,17 @@ bool MatchOptional::optional() const { return true; } -void MatchOptional::fill(BaseKey& key, const std::string &keyword, const std::string& value) const { +void MatchOptional::fill(Key& key, const std::string& keyword, const std::string& value) const { if (!value.empty()) { key.push(keyword, value); } } -const std::string &MatchOptional::value(const Key& key, const std::string &keyword) const { - Key::const_iterator i = key.find(keyword); +const std::string& MatchOptional::value(const Key& key, const std::string& keyword) const { - if (i == key.end()) { - return default_[0]; - } + if (const auto [iter, found] = key.find(keyword); found) { return iter->second; } - return key.get(keyword); + return default_[0]; } const std::vector& MatchOptional::values(const metkit::mars::MarsRequest& rq, const std::string& keyword) const { diff --git a/src/fdb5/rules/MatchOptional.h b/src/fdb5/rules/MatchOptional.h index 837d88e00..b9ce513d1 100644 --- a/src/fdb5/rules/MatchOptional.h +++ b/src/fdb5/rules/MatchOptional.h @@ -25,14 +25,14 @@ namespace fdb5 { //---------------------------------------------------------------------------------------------------------------------- -class MatchOptional : public Matcher{ +class MatchOptional : public Matcher { public: // methods + MatchOptional(std::string def); - MatchOptional(const std::string &def); MatchOptional(eckit::Stream& s); - ~MatchOptional() override; + bool match(const std::string& value) const override; bool match(const std::string& keyword, const Key& key) const override; @@ -48,9 +48,9 @@ class MatchOptional : public Matcher{ bool optional() const override; const std::string& value(const Key&, const std::string& keyword) const override; const std::vector& values(const metkit::mars::MarsRequest& rq, const std::string& keyword) const override; - void print( std::ostream& out ) const override; + void print(std::ostream& out) const override; const std::string& defaultValue() const override; - void fill(BaseKey& key, const std::string& keyword, const std::string& value) const override; + void fill(Key& key, const std::string& keyword, const std::string& value) const override; private: // members diff --git a/src/fdb5/rules/MatchValue.cc b/src/fdb5/rules/MatchValue.cc index 63a2b46bc..4fb1d0f8b 100644 --- a/src/fdb5/rules/MatchValue.cc +++ b/src/fdb5/rules/MatchValue.cc @@ -8,6 +8,10 @@ * does it submit to any jurisdiction. */ +#include +#include +#include + #include "fdb5/database/Key.h" #include "fdb5/rules/MatchValue.h" #include "fdb5/types/TypesRegistry.h" @@ -16,47 +20,42 @@ namespace fdb5 { //---------------------------------------------------------------------------------------------------------------------- -eckit::ClassSpec MatchValue::classSpec_ = { &Matcher::classSpec(), "MatchValue", }; +eckit::ClassSpec MatchValue::classSpec_ = {&Matcher::classSpec(), "MatchValue"}; eckit::Reanimator MatchValue::reanimator_; +//---------------------------------------------------------------------------------------------------------------------- + +MatchValue::MatchValue(std::string value) : value_ {std::move(value)} { } -MatchValue::MatchValue(const std::string &value) : - Matcher(), - value_(value) { +MatchValue::MatchValue(eckit::Stream& stream) { + stream >> value_; } -MatchValue::MatchValue(eckit::Stream& s) : - Matcher() { - - s >> value_; +void MatchValue::encode(eckit::Stream& out) const { + out << value_; } -void MatchValue::encode(eckit::Stream& s) const { - s << value_; -} -MatchValue::~MatchValue() { +bool MatchValue::match(const std::string& value) const { + return value == value_; } -bool MatchValue::match(const std::string &keyword, const Key& key) const { - auto i = key.find(keyword); +bool MatchValue::match(const std::string& keyword, const Key& key) const { - if (i == key.end()) { - return false; - } + if (const auto [iter, found] = key.find(keyword); found) { return match(iter->second); } - return ( i->second == value_ ); + return false; } -void MatchValue::dump(std::ostream &s, const std::string &keyword, const TypesRegistry ®istry) const { +void MatchValue::dump(std::ostream& s, const std::string& keyword, const TypesRegistry& registry) const { registry.dump(s, keyword); s << "=" << value_; } -void MatchValue::print(std::ostream &out) const { +void MatchValue::print(std::ostream& out) const { out << "MatchValue[value=" << value_ << "]"; } //---------------------------------------------------------------------------------------------------------------------- -} // namespace fdb5 +} // namespace fdb5 diff --git a/src/fdb5/rules/MatchValue.h b/src/fdb5/rules/MatchValue.h index b8663f918..e83a5bded 100644 --- a/src/fdb5/rules/MatchValue.h +++ b/src/fdb5/rules/MatchValue.h @@ -25,39 +25,39 @@ namespace fdb5 { //---------------------------------------------------------------------------------------------------------------------- -class MatchValue : public Matcher{ +class MatchValue : public Matcher { -public: // methods +public: // methods + MatchValue(std::string value); - MatchValue(const std::string &value); - MatchValue(eckit::Stream& s); + MatchValue(eckit::Stream& stream); - ~MatchValue() override; + bool match(const std::string& value) const override; - bool match(const std::string &keyword, const Key& key) const override; + bool match(const std::string& keyword, const Key& key) const override; - void dump(std::ostream &s, const std::string &keyword, const TypesRegistry ®istry) const override; + void dump(std::ostream& out, const std::string& keyword, const TypesRegistry& registry) const override; + // streamable const eckit::ReanimatorBase& reanimator() const override { return reanimator_; } - static const eckit::ClassSpec& classSpec() { return classSpec_; } + static const eckit::ClassSpec& classSpec() { return classSpec_; } -private: // methods +private: // methods + void encode(eckit::Stream& out) const override; - void encode(eckit::Stream&) const override; + void print(std::ostream& out) const override; - void print( std::ostream &out ) const override; +private: // members + std::string value_; -private: // members + // streamable - static eckit::ClassSpec classSpec_; + static eckit::ClassSpec classSpec_; static eckit::Reanimator reanimator_; - - std::string value_; - }; //---------------------------------------------------------------------------------------------------------------------- -} // namespace fdb5 +} // namespace fdb5 #endif diff --git a/src/fdb5/rules/Matcher.cc b/src/fdb5/rules/Matcher.cc index 15a56ef76..a9b1c98b5 100644 --- a/src/fdb5/rules/Matcher.cc +++ b/src/fdb5/rules/Matcher.cc @@ -12,53 +12,40 @@ #include "metkit/mars/MarsRequest.h" -#include "fdb5/rules/Matcher.h" #include "fdb5/database/Key.h" +#include "fdb5/rules/Matcher.h" namespace fdb5 { //---------------------------------------------------------------------------------------------------------------------- -eckit::ClassSpec Matcher::classSpec_ = { &eckit::Streamable::classSpec(), "Matcher", }; - -Matcher::Matcher() { -} +eckit::ClassSpec Matcher::classSpec_ = {&eckit::Streamable::classSpec(), "Matcher"}; -Matcher::Matcher(eckit::Stream&) { -} +Matcher::Matcher(eckit::Stream& stream) { } -void Matcher::encode(eckit::Stream& s) const { -} +void Matcher::encode(eckit::Stream& out) const { } -Matcher::~Matcher() { -} - -bool Matcher::optional() const { - return false; -} - -const std::string &Matcher::value(const Key& key, const std::string &keyword) const { +const std::string& Matcher::value(const Key& key, const std::string& keyword) const { return key.get(keyword); } -const std::vector &Matcher::values(const metkit::mars::MarsRequest& rq, const std::string &keyword) const { +const std::vector& Matcher::values(const metkit::mars::MarsRequest& rq, const std::string& keyword) const { return rq.values(keyword); } -void Matcher::fill(BaseKey& key, const std::string &keyword, const std::string& value) const { +void Matcher::fill(Key& key, const std::string& keyword, const std::string& value) const { key.push(keyword, value); } - -const std::string &Matcher::defaultValue() const { +const std::string& Matcher::defaultValue() const { NOTIMP; } -std::ostream &operator<<(std::ostream &s, const Matcher &x) { +std::ostream& operator<<(std::ostream& s, const Matcher& x) { x.print(s); return s; } //---------------------------------------------------------------------------------------------------------------------- -} // namespace fdb5 +} // namespace fdb5 diff --git a/src/fdb5/rules/Matcher.h b/src/fdb5/rules/Matcher.h index 14b6f9627..e243ed621 100644 --- a/src/fdb5/rules/Matcher.h +++ b/src/fdb5/rules/Matcher.h @@ -22,15 +22,13 @@ #include "eckit/serialisation/Streamable.h" class MarsTask; -namespace metkit { -namespace mars { - class MarsRequest; -} + +namespace metkit::mars { +class MarsRequest; } namespace fdb5 { -class BaseKey; class Key; class TypesRegistry; @@ -38,44 +36,43 @@ class TypesRegistry; class Matcher : public eckit::Streamable { -public: // methods - - Matcher(); - Matcher(eckit::Stream& s); +public: // methods + Matcher() = default; - virtual ~Matcher(); + Matcher(eckit::Stream& stream); - virtual bool optional() const; + virtual bool optional() const { return false; } - virtual const std::string &value(const Key& , const std::string &keyword) const; + virtual const std::string& value(const Key&, const std::string& keyword) const; virtual const std::vector& values(const metkit::mars::MarsRequest& rq, const std::string& keyword) const; - virtual const std::string &defaultValue() const; + virtual const std::string& defaultValue() const; - virtual bool match(const std::string &keyword, const Key& key) const = 0; - virtual void fill(BaseKey& key, const std::string &keyword, const std::string& value) const; + virtual bool match(const std::string& value) const = 0; + virtual bool match(const std::string& keyword, const Key& key) const = 0; + virtual void fill(Key& key, const std::string& keyword, const std::string& value) const; + virtual void dump(std::ostream& s, const std::string& keyword, const TypesRegistry& registry) const = 0; - virtual void dump(std::ostream &s, const std::string &keyword, const TypesRegistry ®istry) const = 0; + friend std::ostream& operator<<(std::ostream& s, const Matcher& x); - friend std::ostream &operator<<(std::ostream &s, const Matcher &x); + // streamable - static const eckit::ClassSpec& classSpec() { return classSpec_; } + static const eckit::ClassSpec& classSpec() { return classSpec_; } -private: // methods +private: // methods + void encode(eckit::Stream& out) const override; - void encode(eckit::Stream&) const override; + virtual void print(std::ostream& out) const = 0; - virtual void print( std::ostream &out ) const = 0; +private: // members + // streamable -private: // members - - static eckit::ClassSpec classSpec_; + static eckit::ClassSpec classSpec_; static eckit::Reanimator reanimator_; - }; //---------------------------------------------------------------------------------------------------------------------- -} // namespace fdb5 +} // namespace fdb5 #endif diff --git a/src/fdb5/rules/Predicate.cc b/src/fdb5/rules/Predicate.cc index 6d7b08ad6..e19a7382e 100644 --- a/src/fdb5/rules/Predicate.cc +++ b/src/fdb5/rules/Predicate.cc @@ -8,59 +8,63 @@ * does it submit to any jurisdiction. */ +#include +#include +#include + +#include "eckit/serialisation/Stream.h" + #include "metkit/mars/MarsRequest.h" #include "fdb5/database/Key.h" -#include "fdb5/rules/Predicate.h" #include "fdb5/rules/Matcher.h" +#include "fdb5/rules/Predicate.h" namespace fdb5 { //---------------------------------------------------------------------------------------------------------------------- -eckit::ClassSpec Predicate::classSpec_ = { &eckit::Streamable::classSpec(), "Predicate", }; +eckit::ClassSpec Predicate::classSpec_ = {&eckit::Streamable::classSpec(), "Predicate"}; eckit::Reanimator Predicate::reanimator_; -Predicate::Predicate(const std::string &keyword, Matcher *matcher) : - matcher_(matcher), - keyword_(keyword) { -} +//---------------------------------------------------------------------------------------------------------------------- -Predicate::Predicate(eckit::Stream& s) { - s >> keyword_; - matcher_.reset(eckit::Reanimator::reanimate(s)); -} +Predicate::Predicate(std::string keyword, Matcher* matcher) : keyword_ {std::move(keyword)}, matcher_ {matcher} { } -void Predicate::encode(eckit::Stream& s) const { - s << keyword_; - s << *matcher_; +Predicate::Predicate(eckit::Stream& stream) { + stream >> keyword_; + matcher_.reset(eckit::Reanimator::reanimate(stream)); } -Predicate::~Predicate() { +//---------------------------------------------------------------------------------------------------------------------- + +void Predicate::encode(eckit::Stream& out) const { + out << keyword_; + out << *matcher_; } bool Predicate::match(const Key& key) const { return matcher_->match(keyword_, key); } -void Predicate::dump(std::ostream &s, const TypesRegistry ®istry) const { - matcher_->dump(s, keyword_, registry); +bool Predicate::match(const std::string& value) const { + return matcher_->match(value); } -void Predicate::print(std::ostream &out) const { - out << "Predicate[keyword=" << keyword_ << ",matcher=" << *matcher_ << "]"; +void Predicate::dump(std::ostream& out, const TypesRegistry& registry) const { + matcher_->dump(out, keyword_, registry); } -std::string Predicate::keyword() const { - return keyword_; +void Predicate::print(std::ostream& out) const { + out << "Predicate[keyword=" << keyword_ << ",matcher=" << *matcher_ << "]"; } bool Predicate::optional() const { return matcher_->optional(); } -const std::string &Predicate::value(const Key& key) const { +const std::string& Predicate::value(const Key& key) const { return matcher_->value(key, keyword_); } @@ -68,20 +72,19 @@ const std::vector& Predicate::values(const metkit::mars::MarsReques return matcher_->values(rq, keyword_); } -void Predicate::fill(BaseKey& key, const std::string& value) const { +void Predicate::fill(Key& key, const std::string& value) const { matcher_->fill(key, keyword_, value); } -const std::string &Predicate::defaultValue() const { +const std::string& Predicate::defaultValue() const { return matcher_->defaultValue(); } -std::ostream &operator<<(std::ostream &s, const Predicate &x) { - x.print(s); - return s; +std::ostream& operator<<(std::ostream& out, const Predicate& predicate) { + predicate.print(out); + return out; } - //---------------------------------------------------------------------------------------------------------------------- -} // namespace fdb5 +} // namespace fdb5 diff --git a/src/fdb5/rules/Predicate.h b/src/fdb5/rules/Predicate.h index 84ae390e1..6836aa8ae 100644 --- a/src/fdb5/rules/Predicate.h +++ b/src/fdb5/rules/Predicate.h @@ -17,68 +17,79 @@ #define fdb5_Predicate_H #include -#include #include +#include +#include #include "eckit/serialisation/Streamable.h" -#include "eckit/serialisation/Reanimator.h" -namespace metkit { class MarsRequest; } +#include "fdb5/rules/Matcher.h" + +namespace metkit::mars { +class MarsRequest; +} namespace fdb5 { class Key; -class BaseKey; -class Matcher; +// class Matcher; class TypesRegistry; //---------------------------------------------------------------------------------------------------------------------- class Predicate : public eckit::Streamable { -public: // methods +public: // methods + Predicate(std::string keyword, Matcher* matcher); - Predicate(const std::string &keyword, Matcher *matcher); - Predicate(eckit::Stream& s); + explicit Predicate(eckit::Stream& stream); - ~Predicate(); + /// @note this calls find() on key; prefer match(value) bool match(const Key& key) const; - void dump( std::ostream &s, const TypesRegistry ®istry ) const; - void fill(BaseKey& key, const std::string& value) const; + bool match(const std::string& value) const; + + void dump(std::ostream& out, const TypesRegistry& registry) const; + + void fill(Key& key, const std::string& value) const; + + const std::string& value(const Key& key) const; - const std::string &value(const Key& key) const; const std::vector& values(const metkit::mars::MarsRequest& rq) const; - const std::string &defaultValue() const; + + const std::string& defaultValue() const; bool optional() const; - std::string keyword() const; + const std::string& keyword() const { return keyword_; } - const eckit::ReanimatorBase& reanimator() const override { return reanimator_; } - static const eckit::ClassSpec& classSpec() { return classSpec_; } + // streamable -private: // methods + const eckit::ReanimatorBase& reanimator() const override { return reanimator_; } - friend std::ostream &operator<<(std::ostream &s, const Predicate &x); + static const eckit::ClassSpec& classSpec() { return classSpec_; } - void encode(eckit::Stream& s) const override; +private: // methods + void encode(eckit::Stream& out) const override; - void print( std::ostream &out ) const; + void print(std::ostream& out) const; -private: // members + friend std::ostream& operator<<(std::ostream& out, const Predicate& predicate); - static eckit::ClassSpec classSpec_; - static eckit::Reanimator reanimator_; +private: // members + std::string keyword_; std::unique_ptr matcher_; - std::string keyword_; + // streamable + + static eckit::ClassSpec classSpec_; + static eckit::Reanimator reanimator_; }; //---------------------------------------------------------------------------------------------------------------------- -} // namespace fdb5 +} // namespace fdb5 #endif diff --git a/src/fdb5/rules/Rule.cc b/src/fdb5/rules/Rule.cc index c87baaa2f..c12edb97e 100644 --- a/src/fdb5/rules/Rule.cc +++ b/src/fdb5/rules/Rule.cc @@ -8,407 +8,323 @@ * does it submit to any jurisdiction. */ -#include "fdb5/rules/Rule.h" - -#include +#include +#include +#include +#include +#include +#include +#include +#include +#include #include "eckit/config/Resource.h" +#include "eckit/exception/Exceptions.h" +#include "eckit/serialisation/Reanimator.h" +#include "eckit/types/Types.h" +#include "eckit/utils/Tokenizer.h" #include "metkit/mars/MarsRequest.h" -#include "fdb5/rules/Predicate.h" -#include "fdb5/rules/Schema.h" +#include "fdb5/LibFdb5.h" +#include "fdb5/database/BaseKey.h" #include "fdb5/database/Key.h" #include "fdb5/database/ReadVisitor.h" #include "fdb5/database/WriteVisitor.h" +#include "fdb5/rules/Predicate.h" +#include "fdb5/rules/Rule.h" +#include "fdb5/rules/Schema.h" #include "fdb5/types/Type.h" - +#include "fdb5/types/TypesRegistry.h" namespace fdb5 { //---------------------------------------------------------------------------------------------------------------------- +// GRAPH -eckit::ClassSpec Rule::classSpec_ = { &eckit::Streamable::classSpec(), "Rule", }; +namespace { -eckit::Reanimator Rule::reanimator_; +class RuleGraph { + struct RuleNode { -Rule::Rule(const Schema &schema, - size_t line, - std::vector &predicates, std::vector &rules, - const std::map &types): - schema_(schema), line_(line) { - std::swap(predicates, predicates_); - std::swap(rules, rules_); - for (std::map::const_iterator i = types.begin(); i != types.end(); ++i) { - registry_.addType(i->first, i->second); - } -} + RuleNode(const RuleNode&) = delete; + RuleNode& operator=(const RuleNode&) = delete; + RuleNode(RuleNode&&) = delete; + RuleNode& operator=(RuleNode&&) = delete; + ~RuleNode() = default; -Rule::Rule(eckit::Stream& s): - Rule(Schema(""), s) { - NOTIMP; -} + explicit RuleNode(const std::string& keyword) : keyword_ {keyword} { } -Rule::Rule(const Schema &schema, eckit::Stream& s): - schema_(schema), registry_(s) { + const std::string& keyword_; - size_t numPredicates; - size_t numRules; + eckit::StringList values_; + }; - s >> line_; - s >> numPredicates; - for (size_t i=0; i < numPredicates; i++) { - predicates_.push_back(eckit::Reanimator::reanimate(s)); - } +public: // types + using value_type = std::list; + using reference = eckit::StringList&; + using const_iterator = value_type::const_iterator; - s >> numRules; - for (size_t i=0; i < numRules; i++) { - rules_.push_back(new Rule(schema, s)); - } -} +public: // methods + reference push(const std::string& keyword) { return nodes_.emplace_back(keyword).values_; } -void Rule::encode(eckit::Stream& s) const { - - registry_.encode(s); + std::size_t size() const { return nodes_.size(); } - s << line_; - s << predicates_.size(); - for (const Predicate* predicate : predicates_) { - s << *predicate; - } - s << rules_.size(); - for (const Rule* rule : rules_) { - rule->encode(s); - } -} + std::vector makeKeys() const { + std::set seen; + std::vector keys; -Rule::~Rule() { - for (std::vector::iterator i = predicates_.begin(); i != predicates_.end(); ++i ) { - delete *i; - } + if (!nodes_.empty()) { + Key key; + visit(nodes_.begin(), key, seen, keys); + } - for (std::vector::iterator i = rules_.begin(); i != rules_.end(); ++i ) { - delete *i; + return keys; } -} -void Rule::expand( const metkit::mars::MarsRequest &request, - std::vector::const_iterator cur, - size_t depth, - std::vector &keys, - TypedKey& fullComputedKey, - ReadVisitor &visitor) const { - - ASSERT(depth < 3); - - if (cur == predicates_.end()) { + void canonicalise(const TypesRegistry& registry) { + for (auto& [keyword, values] : nodes_) { + const auto& type = registry.lookupType(keyword); + for (auto& value : values) { + if (!value.empty()) { value = type.toKey(value); } + } + } + } - keys[depth].registry(registry()); +private: // methods + // Recursive DFS (depth-first search) to generate all possible keys + void visit(const_iterator iter, Key& key, std::set& seen, std::vector& keys) const { - // TODO: join these 2 methods - if (rules_.empty()) { - ASSERT(depth == 2); /// we have 3 levels ATM - if (!visitor.selectDatum( keys[2], fullComputedKey)) { - return; // This it not useful + if (iter == nodes_.end()) { + auto it = seen.find(key); + if (it == seen.end()) { + seen.insert(key); + keys.push_back(key); } - } else { + return; + } - switch (depth) { - case 0: - if (!visitor.selectDatabase(keys[0].canonical(), fullComputedKey)) { - return; - }; - - // Here we recurse on the database's schema (rather than the master schema) - ASSERT(keys[0] == fullComputedKey); - visitor.databaseSchema().expandSecond(request, visitor, keys[0].canonical()); - return; - - case 1: - if (!visitor.selectIndex(keys[1].canonical(), fullComputedKey)) { - return; - } - break; - - default: - ASSERT(depth == 0 || depth == 1); - break; - } + const auto& [keyword, values] = *iter; - for (std::vector::const_iterator i = rules_.begin(); i != rules_.end(); ++i ) { - (*i)->expand(request, visitor, depth + 1, keys, fullComputedKey); - } + auto next = iter; + ++next; + + for (const auto& value : values) { + key.push(keyword, value); + visit(next, key, seen, keys); + key.pop(keyword); } - return; } - std::vector::const_iterator next = cur; - ++next; - - const std::string &keyword = (*cur)->keyword(); +private: // members + value_type nodes_; +}; - eckit::StringList values; - visitor.values(request, keyword, registry_, values); +} // namespace - // eckit::Log::info() << "keyword " << keyword << " values " << values << std::endl; +//---------------------------------------------------------------------------------------------------------------------- +// RULE - TypedKey& k = keys[depth]; +eckit::ClassSpec RuleDatum::classSpec_ = {&Rule::classSpec(), "RuleDatum"}; +eckit::Reanimator RuleDatum::reanimator_; - if (values.empty() && (*cur)->optional()) { - values.push_back((*cur)->defaultValue()); - } +eckit::ClassSpec RuleIndex::classSpec_ = {&Rule::classSpec(), "RuleIndex"}; +eckit::Reanimator RuleIndex::reanimator_; - for (eckit::StringList::const_iterator i = values.begin(); i != values.end(); ++i) { +eckit::ClassSpec RuleDatabase::classSpec_ = {&Rule::classSpec(), "RuleDatabase"}; +eckit::Reanimator RuleDatabase::reanimator_; - k.push(keyword, *i); - fullComputedKey.push(keyword, *i); +//---------------------------------------------------------------------------------------------------------------------- - if ((*cur)->match(k.canonical())) - expand(request, next, depth, keys, fullComputedKey, visitor); +Rule::Rule(const std::size_t line, Predicates& predicates, const eckit::StringDict& types) + : line_ {line}, predicates_ {std::move(predicates)} { + for (const auto& [keyword, type] : types) { registry_.addType(keyword, type); } +} - fullComputedKey.pop(keyword); - k.pop(keyword); +void Rule::decode(eckit::Stream& stream) { + size_t numPred = 0; + registry_.decode(stream); + stream >> line_; + stream >> numPred; + + predicates_.reserve(numPred); + for (size_t i = 0; i < numPred; ++i) { + predicates_.emplace_back(eckit::Reanimator::reanimate(stream)); } - } -void Rule::expand(const metkit::mars::MarsRequest &request, ReadVisitor &visitor, size_t depth, std::vector &keys, TypedKey& fullComputedKey) const { - ASSERT(keys.size() == 3); - expand(request, predicates_.begin(), depth, keys, fullComputedKey, visitor); +void Rule::encode(eckit::Stream& out) const { + registry_.encode(out); + out << line_; + out << predicates_.size(); + for (const auto& pred : predicates_) { out << *pred; } } -void Rule::expand( const Key& initialFieldKey, - std::vector::const_iterator cur, - size_t depth, - std::vector &keys, - TypedKey& fullComputedKey, - WriteVisitor &visitor) const { +//---------------------------------------------------------------------------------------------------------------------- +// MATCHING KEYS + +std::optional Rule::findMatchingKey(const Key& field) const { + + if (field.size() < predicates_.size()) { return {}; } + + TypedKey key(registry_); - static bool matchFirstFdbRule = eckit::Resource("matchFirstFdbRule", true); + for (const auto& pred : predicates_) { - if (matchFirstFdbRule && visitor.rule()) { - return; + /// @note the key is constructed from the predicate + if (!pred->match(field)) { return {}; } + + const auto& keyword = pred->keyword(); + + key.push(keyword, pred->value(field)); } - ASSERT(depth < 3); + return key.canonical(); +} - if (cur == predicates_.end()) { +std::optional Rule::findMatchingKey(const eckit::StringList& values) const { - keys[depth].registry(registry()); + if (predicates_.empty()) { return {}; } - if (rules_.empty()) { - ASSERT(depth == 2); /// we have 3 levels ATM - if (visitor.rule() != 0) { - std::ostringstream oss; - oss << "More than one rule matching " - << keys[0] << ", " - << keys[1] << ", " - << keys[2] << " " - << topRule() << " and " - << visitor.rule()->topRule(); - throw eckit::SeriousBug(oss.str()); - } - visitor.rule(this); - visitor.selectDatum( keys[2], fullComputedKey); - } else { + ASSERT(values.size() >= predicates_.size()); - switch (depth) { - case 0: - if (keys[0] != visitor.prev_[0] /*|| keys[0].registry() != visitor.prev_[0].registry()*/) { - visitor.selectDatabase(keys[0].canonical(), fullComputedKey); - visitor.prev_[0] = keys[0].canonical(); - visitor.prev_[1] = Key{}; - } - - // Here we recurse on the database's schema (rather than the master schema) - visitor.databaseSchema().expandSecond(initialFieldKey, visitor, keys[0].canonical()); - return; - - case 1: - if (keys[1] != visitor.prev_[1] /*|| keys[1].registry() != visitor.prev_[1].registry()*/) { - visitor.selectIndex(keys[1].canonical(), fullComputedKey); - visitor.prev_[1] = keys[1].canonical(); - } - break; - - default: - ASSERT(depth == 0 || depth == 1); - break; - } + TypedKey key(registry_); - for (std::vector::const_iterator i = rules_.begin(); i != rules_.end(); ++i ) { - (*i)->expand(initialFieldKey, visitor, depth + 1, keys, fullComputedKey); - } - } - return; - } + for (auto iter = predicates_.begin(); iter != predicates_.end(); ++iter) { + const auto& pred = *iter; - std::vector::const_iterator next = cur; - ++next; + const auto& keyword = pred->keyword(); - const std::string &keyword = (*cur)->keyword(); - const std::string &value = (*cur)->value(initialFieldKey); - TypedKey& k = keys[depth]; + /// @note 1-1 order between predicates and values + const auto& value = values.at(iter - predicates_.begin()); - k.push(keyword, value); - fullComputedKey.push(keyword, value); + if (!pred->match(value)) { return {}; } - if ((*cur)->match(k.canonical())) { - expand(initialFieldKey, next, depth, keys, fullComputedKey, visitor); + key.push(keyword, value); } - fullComputedKey.pop(keyword); - k.pop(keyword); + return key.canonical(); } -void Rule::expand(const Key& initialFieldKey, WriteVisitor &visitor, size_t depth, std::vector &keys, TypedKey& fullComputedKey) const { - ASSERT(keys.size() == 3); - expand(initialFieldKey, predicates_.begin(), depth, keys, fullComputedKey, visitor); -} +std::optional Rule::findMatchingKey(const Key& field, const char* missing) const { + + Key key; -void Rule::expandFirstLevel(const metkit::mars::MarsRequest& rq, std::vector::const_iterator cur, TypedKey& result, bool& found) const { + for (const auto& pred : predicates_) { - if (cur == predicates_.end()) { - found = true; - return; + const auto& keyword = pred->keyword(); + + if (const auto [iter, found] = field.find(keyword); found) { + if (pred->match(iter->second)) { + key.push(keyword, iter->second); + } else { + return {}; + } + } else { + key.push(keyword, missing); + } } - std::vector::const_iterator next = cur; - ++next; + return key; +} - const std::string& keyword = (*cur)->keyword(); - const std::vector& values = (*cur)->values(rq); +std::vector Rule::findMatchingKeys(const metkit::mars::MarsRequest& request, const char* missing) const { - // Gives a unique expansion --> only considers the first of the values suggested. - // TODO: Consider the broader case. + RuleGraph graph; - for (const std::string& value : values) { + for (const auto& pred : predicates_) { - result.push(keyword, value); + const auto& keyword = pred->keyword(); - if ((*cur)->match(result.canonical())) { - expandFirstLevel(rq, next, result, found); - } + auto& node = graph.push(keyword); - if (!found) { - result.pop(keyword); + if (!request.has(keyword)) { + node.emplace_back(missing); } else { - return; + const auto& values = pred->values(request); + + for (const auto& value : values) { + if (pred->match(value)) { node.emplace_back(value); } + } + + if (node.empty()) { break; } } } -} -void Rule::expandFirstLevel(const metkit::mars::MarsRequest& request, TypedKey& result, bool& done) const { - expandFirstLevel(request, predicates_.begin(), result, done); + /// @todo activate this + graph.canonicalise(registry_); + + return graph.makeKeys(); } +std::vector Rule::findMatchingKeys(const metkit::mars::MarsRequest& request) const { -void Rule::matchFirstLevel( const Key& dbKey, std::vector::const_iterator cur, Key& tmp, std::set& result, const char* missing) const { + RuleGraph graph; - if (cur == predicates_.end()) { - if (tmp.match(dbKey)) { - result.insert(tmp); - } - return; - } + for (const auto& pred : predicates_) { - std::vector::const_iterator next = cur; - ++next; + const auto& keyword = pred->keyword(); - const std::string &keyword = (*cur)->keyword(); + const auto& values = pred->values(request); - if (dbKey.find(keyword) == dbKey.end()) { - tmp.push(keyword, missing); - matchFirstLevel(dbKey, next, tmp, result, missing); - } else { - const std::string &value = (*cur)->value(dbKey); + /// @note do we want to allow empty values? + // if (values.empty() && pred->optional()) { values.push_back(pred->defaultValue()); } - tmp.push(keyword, value); + auto& node = graph.push(keyword); - if ((*cur)->match(tmp)) { - matchFirstLevel(dbKey, next, tmp, result, missing); + for (const auto& value : values) { + if (pred->match(value)) { node.emplace_back(value); } } + + if (node.empty()) { return {}; } } - tmp.pop(keyword); + graph.canonicalise(registry_); + return graph.makeKeys(); } -void Rule::matchFirstLevel(const Key& dbKey, std::set& result, const char* missing) const { - Key tmp{}; - matchFirstLevel(dbKey, predicates_.begin(), tmp, result, missing); -} +std::vector Rule::findMatchingKeys(const metkit::mars::MarsRequest& request, ReadVisitor& visitor) const { + RuleGraph graph; -void Rule::matchFirstLevel(const metkit::mars::MarsRequest& request, std::vector::const_iterator cur, Key& tmp, std::set& result, const char* missing) const { + for (const auto& pred : predicates_) { - if (cur == predicates_.end()) { -// if (tmp.match(request)) { - result.insert(tmp); -// } - return; - } + const auto& keyword = pred->keyword(); - std::vector::const_iterator next = cur; - ++next; + // performance optimization to avoid calling values() on visitor + if (!pred->optional() && request.countValues(keyword) == 0) { return {}; } - const std::string& keyword = (*cur)->keyword(); + eckit::StringList values; + visitor.values(request, keyword, registry_, values); - if (request.has(keyword)) { + if (values.empty() && pred->optional()) { values.push_back(pred->defaultValue()); } - const std::vector& values = (*cur)->values(request); + auto& node = graph.push(keyword); - for (const std::string& value : values) { - tmp.push(keyword, value); - if ((*cur)->match(tmp)) { - matchFirstLevel(request, next, tmp, result, missing); - } - tmp.pop(keyword); + for (const auto& value : values) { + if (pred->match(value)) { node.emplace_back(value); } } - } else { - tmp.push(keyword, missing); - matchFirstLevel(request, next, tmp, result, missing); - tmp.pop(keyword); + + if (node.empty()) { return {}; } } -} -void Rule::matchFirstLevel(const metkit::mars::MarsRequest& request, std::set& result, const char* missing) const { - Key tmp{}; - matchFirstLevel(request, predicates_.begin(), tmp, result, missing); + graph.canonicalise(registry_); + + return graph.makeKeys(); } +//---------------------------------------------------------------------------------------------------------------------- bool Rule::match(const Key& key) const { - for (std::vector::const_iterator i = predicates_.begin(); i != predicates_.end(); ++i ) { - if (!(*i)->match(key)) { - return false; - } + for (const auto& pred : predicates_) { + if (!pred->match(key)) { return false; } } return true; } -// Find the first rule that matches a list of keys -const Rule* Rule::ruleFor(const std::vector &keys, size_t depth) const { - - if (depth == keys.size()) { - return this; - } - - if (match(keys[depth])) { - - for (std::vector::const_iterator i = rules_.begin(); i != rules_.end(); ++i ) { - const Rule *r = (*i)->ruleFor(keys, depth + 1); - if (r) { - return r; - } - } - } - return 0; -} - -void Rule::fill(BaseKey& key, const eckit::StringList& values) const { - +bool Rule::tryFill(Key& key, const eckit::StringList& values) const { // See FDB-103. This is a hack to work around the indexing abstraction // being leaky. // @@ -426,18 +342,18 @@ void Rule::fill(BaseKey& key, const eckit::StringList& values) const { // --> HACK. // --> Stick a plaster over the symptom. - ASSERT(values.size() >= predicates_.size()); // Should be equal, except for quantile (FDB-103) + ASSERT(values.size() >= predicates_.size()); // Should be equal, except for quantile (FDB-103) ASSERT(values.size() <= predicates_.size() + 1); auto it_value = values.begin(); - auto it_pred = predicates_.begin(); + auto it_pred = predicates_.begin(); for (; it_pred != predicates_.end() && it_value != values.end(); ++it_pred, ++it_value) { if (values.size() == (predicates_.size() + 1) && (*it_pred)->keyword() == "quantile") { std::string actualQuantile = *it_value; ++it_value; - ASSERT(it_value != values.end()); + if (it_value == values.end()) { return false; } actualQuantile += std::string(":") + (*it_value); (*it_pred)->fill(key, actualQuantile); } else { @@ -446,90 +362,263 @@ void Rule::fill(BaseKey& key, const eckit::StringList& values) const { } // Check that everything is exactly consumed - ASSERT(it_value == values.end()); - ASSERT(it_pred == predicates_.end()); + if (it_value != values.end()) { return false; } + if (it_pred != predicates_.end()) { return false; } + return true; +} + +void Rule::fill(Key& key, const eckit::StringList& values) const { + // FDB-103 - see comment in fill re quantile + ASSERT(tryFill(key, values)); +} + +Key Rule::makeKey(const std::string& keyFingerprint) const { + Key key; + + /// @note assumed keyFingerprint is canonical + const auto values = eckit::Tokenizer(":", true).tokenize(keyFingerprint); + + fill(key, values); + + return key; } -void Rule::dump(std::ostream &s, size_t depth) const { - s << "["; - const char *sep = ""; - for (std::vector::const_iterator i = predicates_.begin(); i != predicates_.end(); ++i ) { - s << sep; - (*i)->dump(s, registry_); +//---------------------------------------------------------------------------------------------------------------------- + +void Rule::dump(std::ostream& out) const { + out << "["; + + const char* sep = ""; + for (const auto& pred : predicates_) { + out << sep; + pred->dump(out, registry_); sep = ","; } - for (std::vector::const_iterator i = rules_.begin(); i != rules_.end(); ++i ) { - (*i)->dump(s, depth + 1); + dumpChildren(out); + + out << "]"; +} + +void Rule::updateParent(const Rule* parent) { + parent_ = parent; + if (parent) { registry_.updateParent(parent_->registry_); } +} + +const TypesRegistry& Rule::registry() const { + return registry_; +} + +void Rule::print(std::ostream& out) const { + out << type() << "[line=" << line_ << "]"; +} + +bool Rule::isTopRule() const { + return parent_ == nullptr; +} + +const Rule& Rule::topRule() const { + return parent_ ? parent_->topRule() : *this; +} + +void Rule::check(const Key& key) const { + for (const auto& pred : predicates_) { + + const auto& keyword = pred->keyword(); + + if (const auto [iter, found] = key.find(keyword); found) { + const auto& value = iter->second; + const auto& tidyValue = registry().lookupType(keyword).tidy(value); + if (value != tidyValue) { + std::ostringstream oss; + oss << "Rule check - metadata not valid (not in canonical form) - found: "; + oss << keyword << "=" << value << " - expecting " << tidyValue << '\n'; + throw eckit::UserError(oss.str(), Here()); + } + } } - s << "]"; + + if (parent_) { parent_->check(key); } } -size_t Rule::depth() const { - size_t result = 0; - for (std::vector::const_iterator i = rules_.begin(); i != rules_.end(); ++i ) { - result = std::max(result, (*i)->depth()); +std::ostream& operator<<(std::ostream& out, const Rule& rule) { + rule.print(out); + return out; +} + +//---------------------------------------------------------------------------------------------------------------------- +// RULE DATUM + +RuleDatum::RuleDatum(eckit::Stream& stream) : Rule() { + decode(stream); + size_t numRules; + stream >> numRules; + ASSERT(numRules == 0); +} + + +void RuleDatum::encode(eckit::Stream& out) const { + Rule::encode(out); + out << 0ul; +} + +void RuleDatum::expand(const metkit::mars::MarsRequest& request, ReadVisitor& visitor, Key& full) const { + + for (const auto& key : findMatchingKeys(request, visitor)) { + + full.pushFrom(key); + + visitor.selectDatum(key, full); + + full.popFrom(key); } - return result + 1; } -void Rule::updateParent(const Rule *parent) { - parent_ = parent; - // if (parent && (®istry_ != &parent->registry_)) { - if (parent) { - registry_.updateParent(parent_->registry_); +bool RuleDatum::expand(const Key& field, WriteVisitor& visitor, Key& full) const { + + if (const auto key = findMatchingKey(field)) { + + full.pushFrom(*key); + + if (visitor.rule()) { + std::ostringstream oss; + oss << "More than one rule matching " << full << " " << topRule() << " and " << visitor.rule()->topRule(); + throw eckit::SeriousBug(oss.str()); + } + + if (visitor.selectDatum(*key, full)) { + visitor.rule(this); + static const bool matchFirstFdbRule = eckit::Resource("matchFirstFdbRule", true); + if (matchFirstFdbRule) { return true; } + } + + full.popFrom(*key); + } + + return false; +} + +//---------------------------------------------------------------------------------------------------------------------- +// RULE INDEX + +RuleIndex::RuleIndex(const std::size_t line, Predicates& predicates, const eckit::StringDict& types, Children& rules) + : Rule(line, predicates, types), rules_ {std::move(rules)} { } + +RuleIndex::RuleIndex(eckit::Stream& stream) : Rule() { + decode(stream); + + size_t numRules; + stream >> numRules; + rules_.reserve(numRules); + + for (size_t i=0; i < numRules; i++) { + rules_.emplace_back(new RuleDatum(stream)); } - for (std::vector::iterator i = rules_.begin(); i != rules_.end(); ++i ) { - // if (&(*i)->registry_ != ®istry_) - (*i)->updateParent(this); +} + +void RuleIndex::encode(eckit::Stream& out) const { + Rule::encode(out); + out << rules_.size(); + for (const auto& rule : rules_) { + rule->encode(out); } } -const TypesRegistry& Rule::registry() const { - return registry_; +void RuleIndex::updateParent(const Rule* parent) { + Rule::updateParent(parent); + for (auto& rule : rules_) { rule->updateParent(this); } } -void Rule::print(std::ostream &out) const { - out << "Rule[line=" << line_ ; - out << "]"; +void RuleIndex::expand(const metkit::mars::MarsRequest& request, ReadVisitor& visitor, Key& full) const { + + for (const auto& key : findMatchingKeys(request, visitor)) { + + full.pushFrom(key); + + if (visitor.selectIndex(key, full)) { + for (const auto& rule : rules_) { rule->expand(request, visitor, full); } + } + + full.popFrom(key); + } +} + +bool RuleIndex::expand(const Key& field, WriteVisitor& visitor, Key& full) const { + + if (const auto key = findMatchingKey(field)) { + + full.pushFrom(*key); + + if (visitor.selectIndex(*key, full)) { + for (const auto& rule : rules_) { + if (rule->expand(field, visitor, full)) { return true; } + } + } + + full.popFrom(*key); + } + + return false; } -const Rule &Rule::topRule() const { - if (parent_) { - return parent_->topRule(); - } else { - return *this; +//---------------------------------------------------------------------------------------------------------------------- +// RULE DATABASE + +RuleDatabase::RuleDatabase(const std::size_t line, Predicates& predicates, const eckit::StringDict& types, Children& rules) + : Rule(line, predicates, types), rules_ {std::move(rules)} { } + +RuleDatabase::RuleDatabase(eckit::Stream& stream) : Rule() { + decode(stream); + + size_t numRules; + stream >> numRules; + rules_.reserve(numRules); + + for (size_t i=0; i < numRules; i++) { + rules_.emplace_back(new RuleIndex(stream)); } } -const Schema &Rule::schema() const { - return schema_; +void RuleDatabase::encode(eckit::Stream& out) const { + Rule::encode(out); + out << rules_.size(); + for (const auto& rule : rules_) { + rule->encode(out); + } } -void Rule::check(const Key& key) const { - for (const auto& pred : predicates_ ) { - auto k = key.find(pred->keyword()); - if (k != key.end()) { - const std::string& value = (*k).second; - const Type& type = registry_.lookupType(pred->keyword()); - if (value != type.tidy(value)) { - std::stringstream ss; - ss << "Rule check - metadata not valid (not in canonical form) - found: "; - ss << pred->keyword() << "=" << value << " - expecting " << type.tidy(value) << std::endl; - throw eckit::UserError(ss.str(), Here()); +void RuleDatabase::updateParent(const Rule* /* parent */) { + for (auto& rule : rules_) { rule->updateParent(this); } +} + +void RuleDatabase::expand(const metkit::mars::MarsRequest& request, ReadVisitor& visitor) const { + + for (auto& key : findMatchingKeys(request, visitor)) { + + if (visitor.selectDatabase(key, key)) { + // (important) using the database's schema + for (const auto& rule : visitor.databaseSchema().matchingRule(key).rules()) { + rule->expand(request, visitor, key); } } } - if (parent_ != nullptr) { - parent_->check(key); - } } -std::ostream &operator<<(std::ostream &s, const Rule &x) { - x.print(s); - return s; +bool RuleDatabase::expand(const Key& field, WriteVisitor& visitor) const { + + if (auto key = findMatchingKey(field)) { + + if (visitor.selectDatabase(*key, *key)) { + // (important) using the database's schema + for (const auto& rule : visitor.databaseSchema().matchingRule(*key).rules()) { + if (rule->expand(field, visitor, *key)) { return true; } + } + } + } + + return false; } //---------------------------------------------------------------------------------------------------------------------- -} // namespace fdb5 +} // namespace fdb5 diff --git a/src/fdb5/rules/Rule.h b/src/fdb5/rules/Rule.h index fc22c53cd..70382f485 100644 --- a/src/fdb5/rules/Rule.h +++ b/src/fdb5/rules/Rule.h @@ -16,19 +16,20 @@ #ifndef fdb5_Rule_H #define fdb5_Rule_H +#include #include -#include #include +#include +#include +#include "eckit/serialisation/Reanimator.h" #include "eckit/serialisation/Streamable.h" #include "eckit/types/Types.h" + #include "fdb5/types/TypesRegistry.h" -#include "eckit/serialisation/Reanimator.h" -namespace metkit { -namespace mars { - class MarsRequest; -} +namespace metkit::mars { +class MarsRequest; } namespace fdb5 { @@ -37,116 +38,216 @@ class Schema; class Predicate; class ReadVisitor; class WriteVisitor; -class BaseKey; class Key; -class TypedKey; //---------------------------------------------------------------------------------------------------------------------- class Rule : public eckit::Streamable { + friend class Schema; + +public: // types + using Predicates = std::vector>; + +public: // methods + Rule(std::size_t line, Predicates& predicates, const eckit::StringDict& types); + + explicit Rule(eckit::Stream& stream); -public: // methods - /// Takes ownership of vectors - Rule(const Schema &schema, - size_t line, - std::vector &predicates, - std::vector &rules, - const std::map &types - ); - Rule(eckit::Stream& s); - Rule(const Schema &schema, eckit::Stream& s); + virtual const char* type() const = 0; - ~Rule(); + virtual void updateParent(const Rule* parent); + + /// @todo this different from the other findMatchingKey in that it throws and fixes quantile values + /// can we merge them ? + Key makeKey(const std::string& keyFingerprint) const; bool match(const Key& key) const; - eckit::StringList keys(size_t level) const; + void check(const Key& key) const; - void dump(std::ostream &s, size_t depth = 0) const; + void dump(std::ostream& out) const; - void expand(const metkit::mars::MarsRequest &request, - ReadVisitor &Visitor, - size_t depth, - std::vector &keys, - TypedKey& fullComputedKey) const; + const Rule& parent() const; + const Rule& topRule() const; + bool isTopRule() const; - void expand(const Key& initialFieldKey, - WriteVisitor &Visitor, - size_t depth, - std::vector &keys, - TypedKey& fullComputedKey) const; + const TypesRegistry& registry() const; - const Rule* ruleFor(const std::vector &keys, size_t depth) const; - void fill(BaseKey& key, const eckit::StringList& values) const; + void encode(eckit::Stream& out) const override; +protected: // methods - size_t depth() const; - void updateParent(const Rule *parent); + Rule() = default; - const Rule &topRule() const; + void decode(eckit::Stream& stream); - const Schema &schema() const; - const TypesRegistry& registry() const; + std::optional findMatchingKey(const Key& field) const; - const eckit::ReanimatorBase& reanimator() const override { return reanimator_; } - static const eckit::ClassSpec& classSpec() { return classSpec_; } + std::vector findMatchingKeys(const metkit::mars::MarsRequest& request, ReadVisitor& visitor) const; - void check(const Key& key) const; +private: // methods + virtual void dumpChildren(std::ostream& out) const = 0; + + std::optional findMatchingKey(const eckit::StringList& values) const; -private: // methods + std::optional findMatchingKey(const Key& field, const char* missing) const; - void expand(const metkit::mars::MarsRequest &request, - std::vector::const_iterator cur, - size_t depth, - std::vector &keys, - TypedKey& fullComputedKey, - ReadVisitor &Visitor) const; + std::vector findMatchingKeys(const metkit::mars::MarsRequest& request) const; - void expand(const Key& initialFieldKey, - std::vector::const_iterator cur, - size_t depth, - std::vector &keys, - TypedKey& fullComputedKey, - WriteVisitor &Visitor) const; + std::vector findMatchingKeys(const metkit::mars::MarsRequest& request, const char* missing) const; - void expandFirstLevel(const metkit::mars::MarsRequest& request, std::vector::const_iterator cur, TypedKey& result, bool& done) const; - void expandFirstLevel(const metkit::mars::MarsRequest& request, TypedKey& result, bool& done) const; + bool tryFill(Key& key, const eckit::StringList& values) const; - void matchFirstLevel(const Key& dbKey, std::vector::const_iterator cur, Key& tmp, std::set& result, const char* missing) const; - void matchFirstLevel(const Key& dbKey, std::set& result, const char* missing) const ; - void matchFirstLevel(const metkit::mars::MarsRequest& request, std::vector::const_iterator cur, Key& tmp, std::set& result, const char* missing) const; - void matchFirstLevel(const metkit::mars::MarsRequest& request, std::set& result, const char* missing) const ; + void fill(Key& key, const eckit::StringList& values) const; + void print(std::ostream& out) const; - void keys(size_t level, size_t depth, eckit::StringList &result, eckit::StringSet &seen) const; + friend std::ostream& operator<<(std::ostream& out, const Rule& rule); - friend std::ostream &operator<<(std::ostream &s, const Rule &x); +protected: // members + const Rule* parent_ {nullptr}; - void encode(eckit::Stream& s) const override; + std::size_t line_ {0}; - void print( std::ostream &out ) const; + Predicates predicates_; -private: // members + TypesRegistry registry_; + + // streamable static eckit::ClassSpec classSpec_; - static eckit::Reanimator reanimator_; +}; - const Schema& schema_; - const Rule* parent_; +//---------------------------------------------------------------------------------------------------------------------- +// RULE DATUM - std::vector predicates_; - std::vector rules_; +class RuleDatum : public Rule { +public: // methods + using Rule::Rule; - TypesRegistry registry_; + explicit RuleDatum(eckit::Stream& stream); - friend class Schema; - size_t line_; + void expand(const metkit::mars::MarsRequest& request, ReadVisitor& visitor, Key& full) const; + + bool expand(const Key& field, WriteVisitor& visitor, Key& full) const; + + const char* type() const override { return "RuleDatum"; } + + // streamable + + const eckit::ReanimatorBase& reanimator() const override { return reanimator_; } + + static const eckit::ClassSpec& classSpec() { return classSpec_; } + + void encode(eckit::Stream& out) const override; +private: // methods + void dumpChildren(std::ostream& /* out */) const override { } + +private: // members + // streamable + + static eckit::ClassSpec classSpec_; + static eckit::Reanimator reanimator_; }; +//---------------------------------------------------------------------------------------------------------------------- +// RULE INDEX + +class RuleIndex : public Rule { +public: // types + using Children = std::vector>; + +public: // methods + RuleIndex(std::size_t line, Predicates& predicates, const eckit::StringDict& types, Children& rules); + + explicit RuleIndex(eckit::Stream& stream); + + void expand(const metkit::mars::MarsRequest& request, ReadVisitor& visitor, Key& full) const; + + bool expand(const Key& field, WriteVisitor& visitor, Key& full) const; + + void updateParent(const Rule* parent) override; + + const Children& rules() const { return rules_; } + + const char* type() const override { return "RuleIndex"; } + + // streamable + + const eckit::ReanimatorBase& reanimator() const override { return reanimator_; } + + static const eckit::ClassSpec& classSpec() { return classSpec_; } + + void encode(eckit::Stream& out) const override; + +private: // methods + + void dumpChildren(std::ostream& out) const override { + for (const auto& rule : rules_) { rule->dump(out); } + } + +private: // members + Children rules_; + + // streamable + + static eckit::ClassSpec classSpec_; + static eckit::Reanimator reanimator_; +}; + +//---------------------------------------------------------------------------------------------------------------------- +// RULE DATABASE + +class RuleDatabase : public Rule { +public: // types + using Children = std::vector>; + +public: // methods + RuleDatabase(std::size_t line, Predicates& predicates, const eckit::StringDict& types, Children& rules); + + explicit RuleDatabase(eckit::Stream& stream); + + void expand(const metkit::mars::MarsRequest& request, ReadVisitor& visitor) const; + + bool expand(const Key& field, WriteVisitor& visitor) const; + + void updateParent(const Rule* parent) override; + + const Children& rules() const { return rules_; } + + const char* type() const override { return "RuleDatabase"; } + + // streamable + + const eckit::ReanimatorBase& reanimator() const override { return reanimator_; } + + static const eckit::ClassSpec& classSpec() { return classSpec_; } + + void encode(eckit::Stream& out) const override; + +private: // methods + + void dumpChildren(std::ostream& out) const override { + for (const auto& rule : rules_) { rule->dump(out); } + } + +private: // members + Children rules_; + + // streamable + + static eckit::ClassSpec classSpec_; + static eckit::Reanimator reanimator_; +}; + +//---------------------------------------------------------------------------------------------------------------------- + +using RuleList = std::vector>; + //---------------------------------------------------------------------------------------------------------------------- -} // namespace fdb5 +} // namespace fdb5 #endif diff --git a/src/fdb5/rules/Schema.cc b/src/fdb5/rules/Schema.cc index a2dbe9a6f..e7e1e0d46 100644 --- a/src/fdb5/rules/Schema.cc +++ b/src/fdb5/rules/Schema.cc @@ -8,18 +8,32 @@ * does it submit to any jurisdiction. */ +#include #include +#include #include #include +#include +#include +#include +#include +#include +#include +#include #include "eckit/exception/Exceptions.h" +#include "eckit/filesystem/PathName.h" +#include "eckit/log/Log.h" +#include "eckit/utils/Tokenizer.h" #include "fdb5/LibFdb5.h" #include "fdb5/database/Key.h" #include "fdb5/database/WriteVisitor.h" -#include "fdb5/rules/Rule.h" +#include "fdb5/rules/Predicate.h" #include "fdb5/rules/Schema.h" #include "fdb5/rules/SchemaParser.h" +#include "fdb5/types/Type.h" +#include "fdb5/types/TypesRegistry.h" namespace fdb5 { @@ -29,34 +43,38 @@ eckit::ClassSpec Schema::classSpec_ = { &eckit::Streamable::classSpec(), "Schema eckit::Reanimator Schema::reanimator_; +//---------------------------------------------------------------------------------------------------------------------- + Schema::Schema() = default; -Schema::Schema(const eckit::PathName &path) { +Schema::Schema(const eckit::PathName& path) { load(path); } -Schema::Schema(std::istream& s) { - load(s); +Schema::Schema(std::istream& stream) { + load(stream); } -Schema::Schema(eckit::Stream& s) : - registry_(s) { - - size_t numRules; - s >> path_; - s >> numRules; - for (size_t i=0; i < numRules; i++) { - rules_.push_back(new Rule(*this, s)); + +Schema::Schema(eckit::Stream& stream) : registry_ {stream} { + + size_t numRules = 0; + stream >> path_; + stream >> numRules; + rules_.reserve(numRules); + for (size_t i = 0; i < numRules; i++) { + rules_.emplace_back(new RuleDatabase(stream)); } check(); } -void Schema::encode(eckit::Stream& s) const { - registry_.encode(s); - s << path_; - s << rules_.size(); - for (const Rule* rule : rules_) { - rule->encode(s); +void Schema::encode(eckit::Stream& stream) const { + registry_.encode(stream); + // stream << registry_; + stream << path_; + stream << rules_.size(); + for (const auto& rule : rules_) { + rule->encode(stream); } } @@ -64,103 +82,90 @@ Schema::~Schema() { clear(); } -const Rule* Schema::ruleFor(const Key& dbKey, const Key& idxKey) const { - std::vector keys; - keys.push_back(dbKey); - keys.push_back(idxKey); +//---------------------------------------------------------------------------------------------------------------------- + +const RuleDatum& Schema::matchingRule(const Key& dbKey, const Key& idxKey) const { - for (const Rule* rule : rules_) { - const Rule* r = rule->ruleFor(keys , 0); - if (r) { - return r; + for (const auto& dbRule : rules_) { + if (!dbRule->match(dbKey)) { continue; } + for (const auto& idxRule : dbRule->rules()) { + if (!idxRule->match(idxKey)) { continue; } + /// @note returning first datum. could there be multiple datum per index ? + for (const auto& datumRule : idxRule->rules()) { return *datumRule; } } } - return 0; + + std::ostringstream msg; + msg << "No rule is matching dbKey=" << dbKey << " and idxKey=" << idxKey; + throw eckit::SeriousBug(msg.str(), Here()); } -void Schema::expand(const metkit::mars::MarsRequest &request, ReadVisitor &visitor) const { - TypedKey fullComputedKey{registry()}; - std::vector keys(3, TypedKey{{}, registry()}); +const RuleDatabase& Schema::matchingRule(const Key& dbKey) const { - for (Rule* r : rules_) { - r->expand(request, visitor, 0, keys, fullComputedKey); + for (const auto& rule : rules_) { + if (rule->match(dbKey)) { return *rule; } } -} -void Schema::expand(const Key& field, WriteVisitor &visitor) const { - TypedKey fullComputedKey{registry()}; - std::vector keys(3, TypedKey{{}, registry()}); + std::ostringstream msg; + msg << "No rule is matching dbKey=" << dbKey; + throw eckit::SeriousBug(msg.str(), Here()); +} - visitor.rule(0); // reset to no rule so we verify that we pick at least one +//---------------------------------------------------------------------------------------------------------------------- - for (Rule* r : rules_) { - r->expand(field, visitor, 0, keys, fullComputedKey); - } +void Schema::expand(const metkit::mars::MarsRequest& request, ReadVisitor& visitor) const { + for (const auto& rule : rules_) { rule->expand(request, visitor); } } -void Schema::expandSecond(const metkit::mars::MarsRequest& request, ReadVisitor& visitor, const Key& dbKey) const { +std::vector Schema::expandDatabase(const metkit::mars::MarsRequest& request) const { + std::vector result; - const Rule* dbRule = nullptr; - for (const Rule* rule : rules_) { - if (rule->match(dbKey)) { - dbRule = rule; - break; - } + for (const auto& rule : rules_) { + const auto keys = rule->findMatchingKeys(request); + result.insert(result.end(), keys.begin(), keys.end()); } - ASSERT(dbRule); - - std::vector keys(3, TypedKey{{}, registry()}); - TypedKey fullComputedKey = keys[0] = TypedKey{dbKey, registry()}; - for (std::vector:: const_iterator i = dbRule->rules_.begin(); i != dbRule->rules_.end(); ++i) { - (*i)->expand(request, visitor, 1, keys, fullComputedKey); - } + return result; } -void Schema::expandSecond(const Key& field, WriteVisitor& visitor, const Key& dbKey) const { +void Schema::expand(const Key& field, WriteVisitor& visitor) const { - const Rule* dbRule = nullptr; - for (const Rule* rule : rules_) { - if (rule->match(dbKey)) { - dbRule = rule; - break; - } - } - ASSERT(dbRule); - - std::vector keys(3, TypedKey{{}, registry()}); - TypedKey fullComputedKey = keys[0] = TypedKey{dbKey, registry()}; + visitor.rule(nullptr); // reset to no rule so we verify that we pick at least one - for (std::vector:: const_iterator i = dbRule->rules_.begin(); i != dbRule->rules_.end(); ++i) { - (*i)->expand(field, visitor, 1, keys, fullComputedKey); + for (const auto& rule : rules_) { + if (rule->expand(field, visitor)) { break; } } } -bool Schema::expandFirstLevel(const metkit::mars::MarsRequest& request, TypedKey& result) const { - bool found = false; - for (const Rule* rule : rules_) { - rule->expandFirstLevel(request, result, found); - if (found) { - result.registry(rule->registry()); - break; - } +//---------------------------------------------------------------------------------------------------------------------- + +void Schema::matchDatabase(const Key& dbKey, std::set& result, const char* missing) const { + for (const auto& rule : rules_) { + if (auto key = rule->findMatchingKey(dbKey, missing)) { result.insert(std::move(*key)); } } - return found; } -void Schema::matchFirstLevel(const Key& dbKey, std::set &result, const char* missing) const { - for (const Rule* rule : rules_) { - rule->matchFirstLevel(dbKey, result, missing); +void Schema::matchDatabase(const metkit::mars::MarsRequest& request, std::set& result, const char* missing) const { + for (const auto& rule : rules_) { + const auto keys = rule->findMatchingKeys(request, missing); + result.insert(keys.begin(), keys.end()); } } -void Schema::matchFirstLevel(const metkit::mars::MarsRequest& request, std::set& result, const char* missing) const { - for (const Rule* rule : rules_) { - rule->matchFirstLevel(request, result, missing); +std::optional Schema::matchDatabase(const std::string& fingerprint) const { + + const auto values = eckit::Tokenizer(":", true).tokenize(fingerprint); + + for (const auto& rule : rules_) { + if (auto found = rule->findMatchingKey(values)) { return found; } } + + return {}; } -void Schema::load(const eckit::PathName &path, bool replace) { +//---------------------------------------------------------------------------------------------------------------------- + +void Schema::load(const eckit::PathName& path, const bool replace) { path_ = path; @@ -172,59 +177,53 @@ void Schema::load(const eckit::PathName &path, bool replace) { ex.dumpStackTrace(); throw ex; } + load(in, replace); } -void Schema::load(std::istream& s, bool replace) { - - if (replace) { - clear(); - } +void Schema::load(std::istream& s, const bool replace) { - SchemaParser parser(s); + if (replace) { clear(); } - parser.parse(*this, rules_, registry_); + SchemaParser(s).parse(rules_, registry_); check(); } +//---------------------------------------------------------------------------------------------------------------------- + void Schema::clear() { - for (std::vector::iterator i = rules_.begin(); i != rules_.end(); ++i ) { - delete *i; - } + rules_.clear(); } -void Schema::dump(std::ostream &s) const { +void Schema::dump(std::ostream& s) const { registry_.dump(s); - for (std::vector::const_iterator i = rules_.begin(); i != rules_.end(); ++i ) { - (*i)->dump(s); - s << std::endl; + for (const auto& rule : rules_) { + rule->dump(s); + s << '\n'; } } void Schema::check() { - for (Rule* rule : rules_) { - /// @todo print offending rule in meaningful message - ASSERT(rule->depth() == 3); + for (auto& rule : rules_) { rule->registry_.updateParent(registry_); - rule->updateParent(0); + rule->updateParent(nullptr); } } -void Schema::print(std::ostream &out) const { +void Schema::print(std::ostream& out) const { out << "Schema[path=" << path_ << "]"; } -const Type &Schema::lookupType(const std::string &keyword) const { +const Type& Schema::lookupType(const std::string& keyword) const { return registry_.lookupType(keyword); } - bool Schema::empty() const { return rules_.empty(); } -const std::string &Schema::path() const { +const std::string& Schema::path() const { return path_; } @@ -232,12 +231,13 @@ const TypesRegistry& Schema::registry() const { return registry_; } -std::ostream &operator<<(std::ostream &s, const Schema &x) { - x.print(s); - return s; +std::ostream& operator<<(std::ostream& out, const Schema& schema) { + schema.print(out); + return out; } //---------------------------------------------------------------------------------------------------------------------- +// REGISTRY SchemaRegistry& SchemaRegistry::instance() { static SchemaRegistry me; @@ -251,18 +251,22 @@ const Schema& SchemaRegistry::add(const eckit::PathName& path, Schema* schema) { } const Schema& SchemaRegistry::get(const eckit::PathName& path) { - std::lock_guard lock(m_); - auto it = schemas_.find(path); - if (it != schemas_.end()) { - return *it->second; + std::lock_guard lock(m_); + + auto iter = schemas_.find(path); + + if (iter == schemas_.end()) { + bool done = false; + + std::tie(iter, done) = schemas_.emplace(path, std::make_unique(path)); + + ASSERT(done); } - Schema* p = new Schema(path); - ASSERT(p); - schemas_[path] = std::unique_ptr(p); - return *schemas_[path]; + ASSERT(iter->second); + return *iter->second; } //---------------------------------------------------------------------------------------------------------------------- -} // namespace fdb5 +} // namespace fdb5 diff --git a/src/fdb5/rules/Schema.h b/src/fdb5/rules/Schema.h index db56cc946..37f732c88 100644 --- a/src/fdb5/rules/Schema.h +++ b/src/fdb5/rules/Schema.h @@ -20,16 +20,17 @@ #include #include #include +#include #include #include #include #include "eckit/filesystem/PathName.h" -#include "eckit/serialisation/Streamable.h" #include "eckit/serialisation/Reanimator.h" +#include "eckit/serialisation/Streamable.h" #include "fdb5/config/Config.h" -#include "fdb5/types/TypesRegistry.h" +#include "fdb5/rules/Rule.h" namespace metkit::mars { class MarsRequest; @@ -38,78 +39,90 @@ class MarsRequest; namespace fdb5 { class Key; -class Rule; class ReadVisitor; class WriteVisitor; -class Schema; +class TypesRegistry; //---------------------------------------------------------------------------------------------------------------------- class Schema : public eckit::Streamable { -public: // methods - +public: // methods Schema(); - Schema(const eckit::PathName &path); - Schema(std::istream& s); - Schema(eckit::Stream& s); + Schema(const eckit::PathName& path); + Schema(std::istream& stream); + Schema(eckit::Stream& stream); + + ~Schema() override; + + // expand + + void expand(const Key& field, WriteVisitor& visitor) const; - ~Schema(); + void expand(const metkit::mars::MarsRequest& request, ReadVisitor& visitor) const; - void expand(const Key& field, WriteVisitor &visitor) const; - void expand(const metkit::mars::MarsRequest &request, ReadVisitor &visitor) const; + std::vector expandDatabase(const metkit::mars::MarsRequest& request) const; - // Each database has its own internal schema. So expand() above results in - // expandFurther being called on the relevant schema from the DB, to start - // iterating on that schemas rules. - void expandSecond(const Key& field, WriteVisitor &visitor, const Key& dbKey) const; - void expandSecond(const metkit::mars::MarsRequest& request, ReadVisitor &visitor, const Key& dbKey) const; + // match - bool expandFirstLevel(const metkit::mars::MarsRequest& request, TypedKey& result) const ; - void matchFirstLevel(const Key& dbKey, std::set &result, const char* missing) const ; - void matchFirstLevel(const metkit::mars::MarsRequest& request, std::set& result, const char* missing) const ; + void matchDatabase(const metkit::mars::MarsRequest& request, std::set& result, const char* missing) const; - const Rule* ruleFor(const Key& dbKey, const Key& idxKey) const; + void matchDatabase(const Key& dbKey, std::set& result, const char* missing) const; + + std::optional matchDatabase(const std::string& fingerprint) const; + + /// @throws eckit::SeriousBug if no rule is found + const RuleDatabase& matchingRule(const Key& dbKey) const; + + /// @throws eckit::SeriousBug if no rule is found + const RuleDatum& matchingRule(const Key& dbKey, const Key& idxKey) const; + + void load(const eckit::PathName& path, bool replace = false); - void load(const eckit::PathName &path, bool replace = false); void load(std::istream& s, bool replace = false); - void dump(std::ostream &s) const; + // accessors + + void dump(std::ostream& s) const; bool empty() const; - const Type &lookupType(const std::string &keyword) const; + const Type& lookupType(const std::string& keyword) const; - const std::string &path() const; + const std::string& path() const; const TypesRegistry& registry() const; - + + // streamable + const eckit::ReanimatorBase& reanimator() const override { return reanimator_; } - static const eckit::ClassSpec& classSpec() { return classSpec_; } -private: // methods + static const eckit::ClassSpec& classSpec() { return classSpec_; } - void clear(); +private: // methods void check(); - friend std::ostream &operator<<(std::ostream &s, const Schema &x); - - void encode(eckit::Stream& s) const override; + void clear(); - void print( std::ostream &out ) const; + void encode(eckit::Stream& stream) const override; -private: // members + void print(std::ostream& out) const; - static eckit::ClassSpec classSpec_; - static eckit::Reanimator reanimator_; + friend std::ostream& operator<<(std::ostream& out, const Schema& schema); friend void Config::overrideSchema(const eckit::PathName& schemaPath, Schema* schema); +private: // members TypesRegistry registry_; - - std::vector rules_; + + RuleList rules_; + std::string path_; + // streamable + + static eckit::ClassSpec classSpec_; + static eckit::Reanimator reanimator_; }; //---------------------------------------------------------------------------------------------------------------------- @@ -119,6 +132,7 @@ class Schema : public eckit::Streamable { class SchemaRegistry { public: static SchemaRegistry& instance(); + const Schema& add(const eckit::PathName& path, Schema* schema); const Schema& get(const eckit::PathName& path); @@ -129,6 +143,6 @@ class SchemaRegistry { //---------------------------------------------------------------------------------------------------------------------- -} // namespace fdb5 +} // namespace fdb5 #endif diff --git a/src/fdb5/rules/SchemaParser.cc b/src/fdb5/rules/SchemaParser.cc index b15bf72ae..5755cd932 100644 --- a/src/fdb5/rules/SchemaParser.cc +++ b/src/fdb5/rules/SchemaParser.cc @@ -13,16 +13,16 @@ /// @author Tiago Quintino /// @date April 2016 - #include "fdb5/rules/SchemaParser.h" -#include "fdb5/rules/Rule.h" -#include "fdb5/rules/Predicate.h" #include "fdb5/rules/MatchAlways.h" #include "fdb5/rules/MatchAny.h" -#include "fdb5/rules/MatchValue.h" -#include "fdb5/rules/MatchOptional.h" #include "fdb5/rules/MatchHidden.h" -#include "fdb5/types/TypesRegistry.h" +#include "fdb5/rules/MatchOptional.h" +#include "fdb5/rules/MatchValue.h" +#include "fdb5/rules/Predicate.h" +#include "fdb5/rules/Rule.h" + +#include namespace fdb5 { @@ -62,7 +62,7 @@ std::string SchemaParser::parseIdent(bool value, bool emptyOK) { } } -Predicate *SchemaParser::parsePredicate(std::map &types) { +std::unique_ptr SchemaParser::parsePredicate(eckit::StringDict& types) { std::set values; std::string k = parseIdent(false, false); @@ -78,7 +78,7 @@ Predicate *SchemaParser::parsePredicate(std::map &type if (c == '?') { consume(c); - return new Predicate(k, new MatchOptional(parseIdent(true, true))); + return std::make_unique(k, new MatchOptional(parseIdent(true, true))); } if (c == '-') { @@ -87,7 +87,7 @@ Predicate *SchemaParser::parsePredicate(std::map &type // Register ignore type types[k] = "Ignore"; } - return new Predicate(k, new MatchHidden(parseIdent(true, true))); + return std::make_unique(k, new MatchHidden(parseIdent(true, true))); } if (c != ',' && c != '[' && c != ']') { @@ -102,99 +102,146 @@ Predicate *SchemaParser::parsePredicate(std::map &type } switch (values.size()) { - case 0: - return new Predicate(k, new MatchAlways()); - break; - - case 1: - return new Predicate(k, new MatchValue(*values.begin())); - break; - - default: - return new Predicate(k, new MatchAny(values)); - break; + case 0: return std::make_unique(k, new MatchAlways()); break; + case 1: return std::make_unique(k, new MatchValue(*values.begin())); break; + default: return std::make_unique(k, new MatchAny(values)); break; } } -void SchemaParser::parseTypes(std::map &types) { +void SchemaParser::parseTypes(eckit::StringDict& types) { for (;;) { - std::string name = parseIdent(false, true); + const auto name = parseIdent(false, true); if (name.empty()) { break; } consume(':'); - std::string type = parseIdent(false, false); + const auto type = parseIdent(false, false); consume(';'); ASSERT(types.find(name) == types.end()); types[name] = type; } } -Rule *SchemaParser::parseRule(const Schema &owner) { - std::vector predicates; - std::vector rules; - std::map types; +std::unique_ptr SchemaParser::parseDatum() { + Rule::Predicates predicates; + eckit::StringDict types; consume('['); - size_t line = line_ + 1; + const std::size_t line = line_ + 1; char c = peek(); if (c == ']') { consume(c); - return new Rule(owner, line, predicates, rules, types); + return std::make_unique(line, predicates, types); } + for (;;) { + + c = peek(); + + predicates.emplace_back(parsePredicate(types)); + while ((c = peek()) == ',') { + consume(c); + predicates.emplace_back(parsePredicate(types)); + } + + c = peek(); + if (c == ']') { + consume(c); + return std::make_unique(line, predicates, types); + } + } +} + +std::unique_ptr SchemaParser::parseIndex() { + Rule::Predicates predicates; + eckit::StringDict types; + RuleIndex::Children rules; + + consume('['); + + const std::size_t line = line_ + 1; + + char c = peek(); + if (c == ']') { + consume(c); + return std::make_unique(line, predicates, types, rules); + } for (;;) { - char c = peek(); + c = peek(); - if ( c == '[') { - while ( c == '[') { - rules.push_back(parseRule(owner)); - c = peek(); - } + if (c == '[') { + rules.emplace_back(parseDatum()); } else { - predicates.push_back(parsePredicate(types)); - while ( (c = peek()) == ',') { + predicates.emplace_back(parsePredicate(types)); + while ((c = peek()) == ',') { consume(c); - predicates.push_back(parsePredicate(types)); + predicates.emplace_back(parsePredicate(types)); } } c = peek(); if (c == ']') { consume(c); - return new Rule(owner, line, predicates, rules, types); + return std::make_unique(line, predicates, types, rules); } + } +} + +std::unique_ptr SchemaParser::parseDatabase() { + Rule::Predicates predicates; + eckit::StringDict types; + RuleDatabase::Children rules; + + consume('['); + const std::size_t line = line_ + 1; + char c = peek(); + if (c == ']') { + consume(c); + return std::make_unique(line, predicates, types, rules); } -} -SchemaParser::SchemaParser(std::istream &in) : StreamParser(in, true) { + for (;;) { + + c = peek(); + + if (c == '[') { + rules.emplace_back(parseIndex()); + } else { + predicates.emplace_back(parsePredicate(types)); + while ((c = peek()) == ',') { + consume(c); + predicates.emplace_back(parsePredicate(types)); + } + } + + c = peek(); + if (c == ']') { + consume(c); + return std::make_unique(line, predicates, types, rules); + } + } } -void SchemaParser::parse(const Schema &owner, - std::vector &result, TypesRegistry ®istry) { - char c; - std::map types; +void SchemaParser::parse(RuleList& result, TypesRegistry& registry) { + eckit::StringDict types; parseTypes(types); - for (std::map::const_iterator i = types.begin(); i != types.end(); ++i) { - registry.addType(i->first, i->second); - } + for (const auto& [keyword, type] : types) { registry.addType(keyword, type); } + + char c; + while ((c = peek()) == '[') { result.emplace_back(parseDatabase()); } - while ((c = peek()) == '[') { - result.push_back(parseRule(owner)); - } if (c) { throw StreamParser::Error(std::string("Error parsing rules: remaining char: ") + c); } } - //---------------------------------------------------------------------------------------------------------------------- -} // namespace eckit +} // namespace fdb5 diff --git a/src/fdb5/rules/SchemaParser.h b/src/fdb5/rules/SchemaParser.h index cae777316..ddf249a40 100644 --- a/src/fdb5/rules/SchemaParser.h +++ b/src/fdb5/rules/SchemaParser.h @@ -14,39 +14,42 @@ #ifndef fdb5_SchemaParser_h #define fdb5_SchemaParser_h +#include +#include +#include + #include "eckit/parser/StreamParser.h" #include "eckit/types/Types.h" -namespace fdb5 { +#include "fdb5/rules/Rule.h" -class Schema; -class Rule; -class Predicate; -class TypesRegistry; +namespace fdb5 { //---------------------------------------------------------------------------------------------------------------------- class SchemaParser : public eckit::StreamParser { -public: // methods +public: // methods + SchemaParser(std::istream& in) : StreamParser(in, true) { } - SchemaParser(std::istream &in); + void parse(RuleList& result, TypesRegistry& registry); - void parse(const Schema &owner, std::vector &, TypesRegistry ®istry); +private: // methods + std::string parseIdent(bool value, bool emptyOK); -private: // methods + std::unique_ptr parseDatum(); - std::string parseIdent(bool value, bool emptyOK); + std::unique_ptr parseIndex(); - Rule *parseRule(const Schema &owner); + std::unique_ptr parseDatabase(); - Predicate *parsePredicate(std::map &types); - void parseTypes(std::map &); + std::unique_ptr parsePredicate(eckit::StringDict& types); + void parseTypes(eckit::StringDict& types); }; //---------------------------------------------------------------------------------------------------------------------- -} // namespace eckit +} // namespace fdb5 #endif diff --git a/src/fdb5/toc/AdoptVisitor.cc b/src/fdb5/toc/AdoptVisitor.cc index 386da2f19..07b360586 100644 --- a/src/fdb5/toc/AdoptVisitor.cc +++ b/src/fdb5/toc/AdoptVisitor.cc @@ -30,14 +30,14 @@ AdoptVisitor::AdoptVisitor(Archiver& owner, const Key& initialFieldKey, const Pa ASSERT(length_ > Length(0)); } -bool AdoptVisitor::selectDatum(const TypedKey& datumKey, const TypedKey& fullComputedKey) { - checkMissingKeys(fullComputedKey); +bool AdoptVisitor::selectDatum(const Key& datumKey, const Key& fullKey) { + checkMissingKeys(fullKey); CatalogueWriter* cat = catalogue(); ASSERT(cat); if (cat->type() == TocEngine::typeName()) { - cat->index(datumKey.canonical(), eckit::URI("file", path_), offset_, length_); + cat->index(datumKey, eckit::URI("file", path_), offset_, length_); return true; } return false; diff --git a/src/fdb5/toc/AdoptVisitor.h b/src/fdb5/toc/AdoptVisitor.h index fe2f33726..8e49a0e84 100644 --- a/src/fdb5/toc/AdoptVisitor.h +++ b/src/fdb5/toc/AdoptVisitor.h @@ -25,7 +25,6 @@ namespace metkit { class MarsRequest; } namespace fdb5 { class Archiver; -class TypedKey; //---------------------------------------------------------------------------------------------------------------------- @@ -40,8 +39,7 @@ class AdoptVisitor : public BaseArchiveVisitor { eckit::Length length); protected: // methods - - bool selectDatum(const TypedKey& datumKey, const TypedKey& fullComputedKey) override; + bool selectDatum(const Key& datumKey, const Key& fullKey) override; void print( std::ostream& out ) const override; diff --git a/src/fdb5/toc/RootManager.cc b/src/fdb5/toc/RootManager.cc index cef3a7fc7..9de9757f4 100644 --- a/src/fdb5/toc/RootManager.cc +++ b/src/fdb5/toc/RootManager.cc @@ -153,25 +153,22 @@ class DbPathNamer { } var = false; - j = k.find(word); - if(j != k.end()) { - if(!missing) { - result += (*j).second; - } - else { - if((*j).second == missing || (*j).second.empty()) { - result += keyregex_.find(word)->second; // we know it exists because it is ensured in match() - } - else - { + if (const auto [iter, found] = k.find(word); found) { + if (!missing) { + result += iter->second; + } else { + if (iter->second == missing || iter->second.empty()) { + // we know it exists because it is ensured in match() + result += keyregex_.find(word)->second; + } else { result += (*j).second; } } - } - else { - std::ostringstream os; - os << "FDB RootManager substituteVars: cannot find a value for '" << word << "' in " < RootManager::possibleDbPathNames(const Key& key, const std::ostringstream oss; const char *sep = ""; - - for (auto& k : key.names()) { - auto& v = key.get(k); + for (const auto& k : key.names()) { + const auto& v = key.get(k); oss << sep; - oss << (v == missing || v.empty() ? missing : key.canonicalValue(k)); + oss << (v == missing || v.empty() ? missing : v); sep = ":"; } result.push_back(oss.str()); @@ -668,7 +664,7 @@ std::vector RootManager::visitableRoots(const Key& key) { std::vector RootManager::visitableRoots(const metkit::mars::MarsRequest& request) { std::set keys; - config_.schema().matchFirstLevel(request, keys, ""); + config_.schema().matchDatabase(request, keys, ""); return visitableRoots(keys); } diff --git a/src/fdb5/toc/TocCatalogueReader.cc b/src/fdb5/toc/TocCatalogueReader.cc index 0649c6873..947761d0d 100644 --- a/src/fdb5/toc/TocCatalogueReader.cc +++ b/src/fdb5/toc/TocCatalogueReader.cc @@ -9,14 +9,14 @@ */ #include +#include #include "eckit/log/Log.h" #include "fdb5/LibFdb5.h" +#include "fdb5/database/Key.h" #include "fdb5/toc/TocCatalogueReader.h" #include "fdb5/toc/TocIndex.h" -#include "fdb5/toc/TocStats.h" -#include "fdb5/toc/RootManager.h" namespace fdb5 { @@ -24,19 +24,16 @@ namespace fdb5 { TocCatalogueReader::TocCatalogueReader(const Key& dbKey, const fdb5::Config& config) : TocCatalogue(dbKey, config) { - loadIndexesAndRemap(); } -TocCatalogueReader::TocCatalogueReader(const eckit::URI& uri, const fdb5::Config& config) : - TocCatalogue(uri.path(), ControlIdentifiers{}, config) { - loadIndexesAndRemap(); -} +TocCatalogueReader::TocCatalogueReader(const eckit::URI& uri, const fdb5::Config& config): + TocCatalogue(uri.path(), ControlIdentifiers {}, config) { } TocCatalogueReader::~TocCatalogueReader() { LOG_DEBUG_LIB(LibFdb5) << "Closing DB " << *dynamic_cast(this) << std::endl; } -void TocCatalogueReader::loadIndexesAndRemap() { +void TocCatalogueReader::loadIndexesAndRemap() const { std::vector remapKeys; /// @todo: this should throw DatabaseNotFoundException if the toc file is not found std::vector indexes = loadIndexes(false, nullptr, nullptr, &remapKeys); @@ -57,16 +54,14 @@ bool TocCatalogueReader::selectIndex(const Key& idxKey) { currentIndexKey_ = idxKey; matching_.clear(); - for (auto idx = indexes_.begin(); idx != indexes_.end(); ++idx) { - if (idx->first.key() == idxKey) { - matching_.push_back(&(*idx)); - } + for (const auto& pair : mappedIndexes()) { + if (pair.first.key() == idxKey) { matching_.emplace_back(&pair); } } LOG_DEBUG_LIB(LibFdb5) << "TocCatalogueReader::selectIndex " << idxKey << ", found " << matching_.size() << " matche(s)" << std::endl; - return (matching_.size() != 0); + return !matching_.empty(); } void TocCatalogueReader::deselectIndex() { @@ -87,13 +82,13 @@ bool TocCatalogueReader::open() { return true; } -bool TocCatalogueReader::axis(const std::string &keyword, eckit::StringSet &s) const { +bool TocCatalogueReader::axis(const std::string& keyword, eckit::DenseSet& s) const { bool found = false; - for (auto m = matching_.begin(); m != matching_.end(); ++m) { - if ((*m)->first.axes().has(keyword)) { + for (const auto* pair : matching_) { + const auto& index = pair->first; + if (index.axes().has(keyword)) { found = true; - const eckit::DenseSet& a = (*m)->first.axes().values(keyword); - s.insert(a.begin(), a.end()); + s.merge(index.axes().values(keyword)); } } return found; @@ -109,14 +104,37 @@ bool TocCatalogueReader::retrieve(const Key& key, Field& field) const { LOG_DEBUG_LIB(LibFdb5) << "Trying to retrieve key " << key << std::endl; LOG_DEBUG_LIB(LibFdb5) << "Scanning indexes " << matching_.size() << std::endl; - for (const auto& m : matching_) { - const Index& idx(m->first); - Key remapKey = m->second; + const MatchList* matching = nullptr; + + const auto& names = key.names(); + for (const auto& name : names) { + Key tmpKey = key; + tmpKey.unset(name); + + // if (std::map::const_iterator iter = keyMatching_.find(tmpKey); iter != keyMatching_.end()) { + if (const auto& iter = keyMatching_.find(tmpKey); iter != keyMatching_.end()) { + matching = &(iter->second); + break; + } + + // make refined list + for (const auto* pair : matching_) { + if (pair->first.mayContainPartial(tmpKey)) { keyMatching_[tmpKey].emplace_back(pair); } + } + } - if (idx.mayContain(key)) { - const_cast(idx).open(); - if (idx.get(key, remapKey, field)) { - return true; + if (matching) { + for (const auto* pair : *matching) { + const auto& [index, remapKey] = *pair; + const_cast(index).open(); + if (index.get(key, remapKey, field)) { return true; } + } + } else { + for (const auto* pair : matching_) { + const auto& [index, remapKey] = *pair; + if (index.mayContain(key)) { + const_cast(index).open(); + if (index.get(key, remapKey, field)) { return true; } } } } @@ -130,10 +148,8 @@ void TocCatalogueReader::print(std::ostream &out) const { std::vector TocCatalogueReader::indexes(bool sorted) const { std::vector returnedIndexes; - returnedIndexes.reserve(indexes_.size()); - for (auto idx = indexes_.begin(); idx != indexes_.end(); ++idx) { - returnedIndexes.emplace_back(idx->first); - } + returnedIndexes.reserve(mappedIndexes().size()); + for (const auto& pair : mappedIndexes()) { returnedIndexes.emplace_back(pair.first); } // If required, sort the indexes by file, and location within the file, for efficient iteration. if (sorted) { diff --git a/src/fdb5/toc/TocCatalogueReader.h b/src/fdb5/toc/TocCatalogueReader.h index be0c17a29..ff766cb2e 100644 --- a/src/fdb5/toc/TocCatalogueReader.h +++ b/src/fdb5/toc/TocCatalogueReader.h @@ -16,6 +16,16 @@ #ifndef fdb5_TocCatalogueReader_H #define fdb5_TocCatalogueReader_H +#include +#include +#include +#include +#include + +#include "fdb5/database/Catalogue.h" +#include "fdb5/database/Field.h" +#include "fdb5/database/Index.h" +#include "fdb5/database/Key.h" #include "fdb5/toc/TocCatalogue.h" namespace fdb5 { @@ -25,6 +35,10 @@ namespace fdb5 { /// DB that implements the FDB on POSIX filesystems class TocCatalogueReader : public TocCatalogue, public CatalogueReader { +private: // types + using IndexKey = std::pair; + using MapList = std::vector; + using MatchList = std::vector; public: // methods @@ -36,9 +50,8 @@ class TocCatalogueReader : public TocCatalogue, public CatalogueReader { std::vector indexes(bool sorted) const override; DbStats stats() const override { return TocHandler::stats(); } -private: // methods - - void loadIndexesAndRemap(); +private: // methods + void loadIndexesAndRemap() const; bool selectIndex(const Key& idxKey) override; void deselectIndex() override; @@ -46,23 +59,35 @@ class TocCatalogueReader : public TocCatalogue, public CatalogueReader { void flush(size_t archivedFields) override {} void clean() override {} void close() override; - - bool axis(const std::string &keyword, eckit::StringSet &s) const override; + + bool axis(const std::string& keyword, eckit::DenseSet& s) const override; bool retrieve(const Key& key, Field& field) const override; void print( std::ostream &out ) const override; + template + static auto& getOrMapIndexes(T& toc) { + if (toc.indexes_.empty()) { toc.loadIndexesAndRemap(); } + return toc.indexes_; + } + + auto mappedIndexes() -> MapList& { return getOrMapIndexes(*this); } + + auto mappedIndexes() const -> const MapList& { return getOrMapIndexes(*this); } + private: // members // Indexes matching current key. If there is a key remapping for a mounted // SubToc, then this is stored alongside - std::vector*> matching_; + MatchList matching_; + + // A lookup for further refined details, if we can go beyond the current set of matching indexes + mutable std::map keyMatching_; // All indexes // If there is a key remapping for a mounted SubToc, this is stored alongside - std::vector> indexes_; - + mutable MapList indexes_; }; //---------------------------------------------------------------------------------------------------------------------- diff --git a/src/fdb5/toc/TocCatalogueWriter.cc b/src/fdb5/toc/TocCatalogueWriter.cc index 0099af0b2..f13e9b019 100644 --- a/src/fdb5/toc/TocCatalogueWriter.cc +++ b/src/fdb5/toc/TocCatalogueWriter.cc @@ -156,11 +156,10 @@ void TocCatalogueWriter::reconsolidateIndexesAndTocs() { ~ConsolidateIndexVisitor() override {} private: void visitDatum(const Field& field, const Key& datumKey) override { - // TODO: Do a sneaky schema.expand() here, prepopulated with the current DB/index/Rule, + /// @todo Do a sneaky schema.expand() here, prepopulated with the current DB/index/Rule, // to extract the full key, including optional values. const TocFieldLocation& location(static_cast(field.location())); writer_.index(datumKey, location.uri(), location.offset(), location.length()); - } void visitDatum(const Field& field, const std::string& keyFingerprint) override { EntryVisitor::visitDatum(field, keyFingerprint); @@ -255,8 +254,8 @@ void TocCatalogueWriter::overlayDB(const Catalogue& otherCat, const std::set& dbs) c eckit::StdDir d(path.c_str()); - // Once readdir_r finally gets deprecated and removed, we may need to + // Once readdir_r finally gets deprecated and removed, we may need to // protecting readdir() as not yet guarranteed thread-safe by POSIX // technically it should only be needed on a per-directory basis // this should be a resursive mutex - // AutoLock lock(mutex_); + // AutoLock lock(mutex_); for(;;) { @@ -115,13 +115,13 @@ bool TocEngine::canHandle(const eckit::URI& uri, const Config& config) const static void matchKeyToDB(const Key& key, std::set& keys, const char* missing, const Config& config) { const Schema& schema = config.schema(); - schema.matchFirstLevel(key, keys, missing); + schema.matchDatabase(key, keys, missing); } static void matchRequestToDB(const metkit::mars::MarsRequest& rq, std::set& keys, const char* missing, const Config& config) { const Schema& schema = config.schema(); - schema.matchFirstLevel(rq, keys, missing); + schema.matchDatabase(rq, keys, missing); } static constexpr const char* regexForMissingValues = "[^:/]*"; @@ -221,6 +221,7 @@ std::vector TocEngine::databases(const metkit::mars::MarsRequest& re std::vector result; for (eckit::PathName path : databasesMatchRegex) { try { + /// @todo we don't have to open tocs to check if they match the request if (path.exists()) { if (!path.isDir()) path = path.dirName(); diff --git a/src/fdb5/toc/TocHandler.cc b/src/fdb5/toc/TocHandler.cc index 90947c209..33cf428ba 100644 --- a/src/fdb5/toc/TocHandler.cc +++ b/src/fdb5/toc/TocHandler.cc @@ -188,12 +188,15 @@ TocHandler::TocHandler(const eckit::PathName& path, const Key& parentKey, Memory } for (const auto& kv : parentKey) { - auto it = key.find(kv.first); - if (it == key.end()) { + const auto [it, found] = key.find(kv.first); + + if (!found) { std::stringstream ss; ss << "Keys insufficiently matching for mount: " << key << " : " << parentKey; throw UserError(ss.str(), Here()); - } else if (kv.second != it->second) { + } + + if (kv.second != it->second) { remapKey_.set(kv.first, kv.second); } } diff --git a/src/fdb5/toc/TocIndex.cc b/src/fdb5/toc/TocIndex.cc index bedcb4312..4b8228c1d 100644 --- a/src/fdb5/toc/TocIndex.cc +++ b/src/fdb5/toc/TocIndex.cc @@ -8,14 +8,12 @@ * does it submit to any jurisdiction. */ -#include "eckit/log/BigNum.h" - -#include "fdb5/LibFdb5.h" -#include "fdb5/toc/TocStats.h" #include "fdb5/toc/TocIndex.h" +#include "fdb5/LibFdb5.h" #include "fdb5/toc/BTreeIndex.h" #include "fdb5/toc/FieldRef.h" #include "fdb5/toc/TocFieldLocation.h" +#include "fdb5/toc/TocStats.h" namespace fdb5 { @@ -47,26 +45,34 @@ class TocIndexCloser { /// before the type_ members of Index, but Indexs WILL be constructed before /// the members of TocIndex -TocIndex::TocIndex(const Key& key, const Catalogue& catalogue, const eckit::PathName &path, off_t offset, Mode mode, const std::string& type ) : - UriStoreWrapper(path.dirName()), - IndexBase(key, type, catalogue), - btree_(nullptr), - dirty_(false), - mode_(mode), - location_(path, offset), - preloadBTree_(false) { -} - -TocIndex::TocIndex(eckit::Stream &s, const Catalogue& catalogue, const int version, const eckit::PathName &directory, const eckit::PathName &path, - off_t offset, bool preloadBTree): - UriStoreWrapper(directory, s), - IndexBase(s, version, catalogue), - btree_(nullptr), - dirty_(false), - mode_(TocIndex::READ), - location_(path, offset), - preloadBTree_(preloadBTree) { -} +TocIndex::TocIndex(const Key& key, + const Catalogue& catalogue, + const eckit::PathName& path, + off_t offset, + Mode mode, + const std::string& type) + : UriStoreWrapper(path.dirName()), + IndexBase(key, type), + btree_(nullptr), + dirty_(false), + mode_(mode), + location_(path, offset), + preloadBTree_(false) { } + +TocIndex::TocIndex(eckit::Stream& s, + const Catalogue& catalogue, + const int version, + const eckit::PathName& directory, + const eckit::PathName& path, + off_t offset, + bool preloadBTree) + : UriStoreWrapper(directory, s), + IndexBase(s, version), + btree_(nullptr), + dirty_(false), + mode_(TocIndex::READ), + location_(path, offset), + preloadBTree_(preloadBTree) { } TocIndex::~TocIndex() { close(); @@ -200,7 +206,7 @@ std::string TocIndex::defaulType() { return BTreeIndex::defaulType(); } -const std::vector TocIndex::dataURIs() const { +std::vector TocIndex::dataURIs() const { return uris_.paths(); } diff --git a/src/fdb5/toc/TocIndex.h b/src/fdb5/toc/TocIndex.h index d112c6c4d..eede6dbd9 100644 --- a/src/fdb5/toc/TocIndex.h +++ b/src/fdb5/toc/TocIndex.h @@ -85,11 +85,11 @@ class TocIndex : void flock() const override; void funlock() const override; - -private: // methods +private: // methods const IndexLocation& location() const override { return location_; } - const std::vector dataURIs() const override; + + std::vector dataURIs() const override; bool dirty() const override; diff --git a/src/fdb5/toc/TocStats.h b/src/fdb5/toc/TocStats.h index 80700fc61..764a04834 100644 --- a/src/fdb5/toc/TocStats.h +++ b/src/fdb5/toc/TocStats.h @@ -161,7 +161,8 @@ class TocStatsReportVisitor : public virtual StatsReportVisitor { bool visitDatabase(const Catalogue& catalogue) override; void visitDatum(const Field& field, const std::string& keyFingerprint) override; - void visitDatum(const Field& field, const Key& datumKey) override { NOTIMP; } + + void visitDatum(const Field& /*field*/, const Key& /*datumKey*/) override { NOTIMP; } // This visitor is only legit for one DB - so don't reset database void catalogueComplete(const Catalogue& catalogue) override; diff --git a/src/fdb5/tools/FDBVisitTool.cc b/src/fdb5/tools/FDBVisitTool.cc index 04c6cce9d..ae7651cb3 100644 --- a/src/fdb5/tools/FDBVisitTool.cc +++ b/src/fdb5/tools/FDBVisitTool.cc @@ -14,8 +14,9 @@ #include "eckit/option/VectorOption.h" #include "eckit/option/CmdArgs.h" -#include "metkit/mars/MarsParser.h" #include "metkit/mars/MarsExpension.h" +#include "metkit/mars/MarsParser.h" +#include "metkit/mars/MarsRequest.h" #include "fdb5/tools/FDBVisitTool.h" @@ -105,7 +106,7 @@ std::vector FDBVisitTool::requests(const std::string& verb) cons if (all_) { ASSERT(requests_.empty()); - requests.emplace_back(FDBToolRequest(metkit::mars::MarsRequest{}, all_, minimumKeys_)); + requests.emplace_back(metkit::mars::MarsRequest {}, all_, minimumKeys_); } else { for (const std::string& request_string : requests_) { diff --git a/src/fdb5/tools/fdb-copy.cc b/src/fdb5/tools/fdb-copy.cc index f4e927dac..92ab90ac6 100644 --- a/src/fdb5/tools/fdb-copy.cc +++ b/src/fdb5/tools/fdb-copy.cc @@ -27,10 +27,10 @@ using namespace eckit::option; class FDBCopy : public fdb5::FDBTool { - virtual void execute(const CmdArgs &args); - virtual void usage(const std::string &tool) const; + void execute(const CmdArgs& args) override; + void usage(const std::string& tool) const override; - public: +public: FDBCopy(int argc, char **argv): fdb5::FDBTool(argc, argv) { options_.push_back(new SimpleOption("verbose", "Print verbose output")); options_.push_back(new SimpleOption("raw", "Process the MARS request without expansion")); @@ -101,7 +101,7 @@ void FDBCopy::execute(const CmdArgs& args) { fdb5::HandleGatherer handles(sort); fdb5::FDB fdbRead(readConfig); - + for (const auto& request : requests) { eckit::Log::info() << request << std::endl; handles.add(fdbRead.retrieve(request)); @@ -112,9 +112,8 @@ void FDBCopy::execute(const CmdArgs& args) { fdb5::MessageArchiver fdbWriter(fdb5::Key(), false, verbose, writeConfig); fdbWriter.archive(*dh); } - + int main(int argc, char **argv) { FDBCopy app(argc, argv); return app.start(); } - diff --git a/src/fdb5/tools/fdb-hide.cc b/src/fdb5/tools/fdb-hide.cc index f9849ef54..682027dc1 100644 --- a/src/fdb5/tools/fdb-hide.cc +++ b/src/fdb5/tools/fdb-hide.cc @@ -8,62 +8,53 @@ * does it submit to any jurisdiction. */ +#include "eckit/exception/Exceptions.h" #include "eckit/option/CmdArgs.h" #include "eckit/option/SimpleOption.h" +#include "fdb5/LibFdb5.h" #include "fdb5/api/helpers/FDBToolRequest.h" #include "fdb5/config/Config.h" -#include "fdb5/database/Key.h" -#include "fdb5/LibFdb5.h" +#include "fdb5/database/Catalogue.h" #include "fdb5/rules/Schema.h" #include "fdb5/toc/TocCatalogueWriter.h" #include "fdb5/toc/TocEngine.h" #include "fdb5/tools/FDBTool.h" -using namespace eckit; using namespace eckit::option; -namespace fdb5 { -namespace tools { +namespace fdb5::tools { //---------------------------------------------------------------------------------------------------------------------- -class FdbHide : public FDBTool { - -public: // methods - - FdbHide(int argc, char **argv) : - FDBTool(argc, argv), - doit_(false) { +class FdbHide: public FDBTool { +public: // methods + FdbHide(int argc, char** argv): FDBTool(argc, argv) { options_.push_back(new SimpleOption("doit", "Do the actual change")); } -private: // methods - - virtual void init(const option::CmdArgs& args); - virtual void execute(const option::CmdArgs& args); - virtual void usage(const std::string &tool) const; +private: // methods + void init(const CmdArgs& args) override; + void execute(const CmdArgs& args) override; + void usage(const std::string& tool) const override; -private: // members - - bool doit_; +private: // members + bool doit_ {false}; }; -void FdbHide::usage(const std::string &tool) const { +void FdbHide::usage(const std::string& tool) const { + + eckit::Log::info() << "\nUsage: " << tool << " [options] [DB request]\n\n\n"; - Log::info() << std::endl - << "Usage: " << tool << " [options] [DB request]" << std::endl - << std::endl - << std::endl; FDBTool::usage(tool); } -void FdbHide::init(const option::CmdArgs& args) { +void FdbHide::init(const CmdArgs& args) { FDBTool::init(args); - doit_ = args.getBool("doit", false); + doit_ = args.getBool("doit", doit_); } -void FdbHide::execute(const option::CmdArgs& args) { +void FdbHide::execute(const CmdArgs& args) { Config conf = config(args); @@ -78,41 +69,37 @@ void FdbHide::execute(const option::CmdArgs& args) { const auto& dbrequest = dbrequests.front(); ASSERT(!dbrequest.all()); - const Schema& schema = conf.schema(); + const auto& keys = conf.schema().expandDatabase(dbrequest.request()); - TypedKey dbkey{schema.registry()}; - ASSERT(schema.expandFirstLevel(dbrequest.request(), dbkey)); + if (keys.empty()) { throw eckit::UserError("Invalid request", Here()); } - const auto dbCanonicalKey = dbkey.canonical(); + /// @todo do we want to assert that expandDatabase returns only one key ? - std::unique_ptr db = CatalogueReaderFactory::instance().build(dbCanonicalKey, conf); - if (!db->exists()) { - std::stringstream ss; - ss << "Database not found: " << dbkey << std::endl; - throw UserError(ss.str(), Here()); - } + for (const auto& key : keys) { - if (db->type() != TocEngine::typeName()) { - std::stringstream ss; - ss << "Only TOC DBs currently supported" << std::endl; - throw UserError(ss.str(), Here()); - } + auto db = CatalogueReaderFactory::instance().build(key, conf); + if (!db->exists()) { + std::stringstream ss; + ss << "Database not found: " << key << std::endl; + throw eckit::UserError(ss.str(), Here()); + } + + if (db->type() != TocEngine::typeName()) { throw eckit::UserError("Only TOC DBs currently supported", Here()); } - eckit::Log::info() << "Hide contents of DB: " << *db << std::endl; - if (doit_) { - std::unique_ptr dbWriter = CatalogueWriterFactory::instance().build(dbCanonicalKey, conf); - TocCatalogueWriter* tocDB = dynamic_cast(dbWriter.get()); - ASSERT(tocDB); - tocDB->hideContents(); - } else { - eckit::Log::info() << "Run with --doit to make changes" << std::endl; + eckit::Log::info() << "Hide contents of DB: " << *db << std::endl; + if (doit_) { + auto dbWriter = CatalogueWriterFactory::instance().build(key, conf); + auto* tocDB = dynamic_cast(dbWriter.get()); + tocDB->hideContents(); + } else { + eckit::Log::info() << "Run with --doit to make changes" << std::endl; + } } } //---------------------------------------------------------------------------------------------------------------------- -} // namespace tools -} // namespace fbb5 +} // namespace fdb5::tools int main(int argc, char **argv) { fdb5::tools::FdbHide app(argc, argv); diff --git a/src/fdb5/tools/fdb-inspect.cc b/src/fdb5/tools/fdb-inspect.cc new file mode 100644 index 000000000..d9b07bb98 --- /dev/null +++ b/src/fdb5/tools/fdb-inspect.cc @@ -0,0 +1,140 @@ +/* + * (C) Copyright 1996- ECMWF. + * + * This software is licensed under the terms of the Apache Licence Version 2.0 + * which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. + * In applying this licence, ECMWF does not waive the privileges and immunities + * granted to it by virtue of its status as an intergovernmental organisation nor + * does it submit to any jurisdiction. + */ + +/// @file fdb-inspect.cc +/// @author Metin Cakircali +/// @date Aug 2024 + +#include "eckit/exception/Exceptions.h" +#include "eckit/log/Log.h" +#include "eckit/option/CmdArgs.h" +#include "eckit/option/SimpleOption.h" +#include "fdb5/LibFdb5.h" +#include "fdb5/api/FDB.h" +#include "fdb5/api/helpers/FDBToolRequest.h" +#include "fdb5/api/helpers/ListElement.h" +#include "fdb5/api/helpers/ListIterator.h" +#include "fdb5/tools/FDBTool.h" +#include "fdb5/tools/FDBVisitTool.h" +#include "metkit/mars/MarsRequest.h" + +#include +#include +#include +#include +#include +#include + +namespace fdb5::tools { + +//---------------------------------------------------------------------------------------------------------------------- +// HELPER FUNCTIONS + +namespace { + +std::atomic elemCount {0}; + +using metkit::mars::MarsRequest; + +void inspect(const Config& config, const MarsRequest& request, bool output) { + auto iter = FDB(config).inspect(request); + + ListElement elem; + while (iter.next(elem)) { + ++elemCount; + if (output) { + elem.print(eckit::Log::info(), true, false, true, ", "); + eckit::Log::info() << std::endl; + } + } +} + +} // namespace + +//---------------------------------------------------------------------------------------------------------------------- + +class FDBInspectTool: public FDBVisitTool { +public: // methods + FDBInspectTool(int argc, char** argv): FDBVisitTool(argc, argv, "") { + using eckit::option::SimpleOption; + options_.push_back(new SimpleOption("output", "Print the output of the inspection")); + options_.push_back(new SimpleOption("parallel", "Number of parallel tasks to run")); + } + +private: // methods + void init(const eckit::option::CmdArgs& args) override; + + void execute(const eckit::option::CmdArgs& args) override; + + bool output_ {false}; + + std::size_t parallel_ {1}; +}; + +//---------------------------------------------------------------------------------------------------------------------- + +void FDBInspectTool::init(const eckit::option::CmdArgs& args) { + FDBVisitTool::init(args); + + output_ = args.getBool("output", output_); + + parallel_ = args.getUnsigned("parallel", parallel_); + + if (parallel_ < 1) { throw eckit::UserError("Number of parallel tasks must be greater than 0"); } +} + +//---------------------------------------------------------------------------------------------------------------------- + +void printHeader(const FDBToolRequest& request) { + eckit::Log::info() << "Inspecting request: " << request << std::endl; +} + +void FDBInspectTool::execute(const eckit::option::CmdArgs& args) { + const auto fdbConfig = config(args); + const auto toolRequests = requests(); + + eckit::Log::info() << "Number of requests: " << toolRequests.size() << std::endl; + + if (parallel_ == 1) { + for (const auto& req : toolRequests) { inspect(fdbConfig, req.request(), output_); } + } else { + using req_iter_t = std::vector::const_iterator; + + const auto inspectFn = [&](req_iter_t begin, req_iter_t end) { + for (auto reqIter = begin; reqIter != end; ++reqIter) { inspect(fdbConfig, reqIter->request(), output_); } + }; + + if (toolRequests.size() < parallel_) { parallel_ = toolRequests.size(); } + + const auto requestPerThread = toolRequests.size() / parallel_; + + std::vector threads(parallel_); + + auto reqIter = toolRequests.begin(); + for (auto iter = threads.begin(); iter != threads.end() - 1; ++iter) { + *iter = std::thread(inspectFn, reqIter, reqIter + requestPerThread); + reqIter += requestPerThread; + } + threads.back() = std::thread(inspectFn, reqIter, toolRequests.end()); + + for (auto&& thread : threads) { thread.join(); } + } + + eckit::Log::info() << "Number of elements: " << elemCount << '\n'; +} + +//---------------------------------------------------------------------------------------------------------------------- + +} // namespace fdb5::tools + +int main(int argc, char** argv) { + fdb5::tools::FDBInspectTool app(argc, argv); + return app.start(); +} diff --git a/src/fdb5/tools/fdb-list.cc b/src/fdb5/tools/fdb-list.cc index 907faa756..5a9372f09 100644 --- a/src/fdb5/tools/fdb-list.cc +++ b/src/fdb5/tools/fdb-list.cc @@ -8,17 +8,27 @@ * does it submit to any jurisdiction. */ +#include +#include +#include #include +#include +#include +#include "eckit/exception/Exceptions.h" +#include "eckit/log/JSON.h" +#include "eckit/log/Log.h" #include "eckit/option/CmdArgs.h" #include "eckit/option/SimpleOption.h" -#include "eckit/option/CmdArgs.h" -#include "eckit/log/JSON.h" +#include "fdb5/LibFdb5.h" #include "metkit/hypercube/HyperCube.h" +#include "metkit/mars/MarsRequest.h" #include "fdb5/api/FDB.h" #include "fdb5/api/helpers/FDBToolRequest.h" +#include "fdb5/api/helpers/ListElement.h" +#include "fdb5/api/helpers/ListIterator.h" #include "fdb5/database/Index.h" #include "fdb5/rules/Schema.h" #include "fdb5/tools/FDBVisitTool.h" @@ -26,88 +36,82 @@ using namespace eckit; using namespace eckit::option; -namespace fdb5 { -namespace tools { +namespace fdb5::tools { //---------------------------------------------------------------------------------------------------------------------- -class FDBList : public FDBVisitTool { - - public: // methods - - FDBList(int argc, char **argv) : - FDBVisitTool(argc, argv, "class,expver"), - location_(false), - timestamp_(false), - length_(false), - full_(false), - porcelain_(false), - json_(false) { - +class FDBList: public FDBVisitTool { +public: // methods + FDBList(int argc, char** argv): FDBVisitTool(argc, argv, "class,expver") { options_.push_back(new SimpleOption("location", "Also print the location of each field")); options_.push_back(new SimpleOption("timestamp", "Also print the timestamp when the field was indexed")); options_.push_back(new SimpleOption("length", "Also print the field size")); options_.push_back(new SimpleOption("full", "Include all entries (including masked duplicates)")); - options_.push_back(new SimpleOption("porcelain", "Streamlined and stable output for input into other tools")); + options_.push_back( + new SimpleOption("porcelain", + "Streamlined and stable output. Useful as input for other tools or scripts." + "Incompatible with options: location, timestamp, and length")); options_.push_back(new SimpleOption("json", "Output available fields in JSON form")); options_.push_back(new SimpleOption("compact", "Aggregate available fields in MARS requests")); + options_.push_back(new SimpleOption("depth", "Output entries up to 'depth' levels deep [1-3]")); } - private: // methods - - virtual void execute(const CmdArgs& args); - virtual void init(const CmdArgs &args); - - bool location_; - bool timestamp_; - bool length_; - bool full_; - bool porcelain_; - bool json_; - bool compact_; +private: // methods + void execute(const CmdArgs& args) override; + void init(const CmdArgs& args) override; + + bool location_ {false}; + bool timestamp_ {false}; + bool length_ {false}; + bool full_ {false}; + bool porcelain_ {false}; + bool json_ {false}; + bool compact_ {false}; + int depth_ {3}; }; +//---------------------------------------------------------------------------------------------------------------------- -std::string keySignature(const fdb5::Key& key) { +static std::string keySignature(const fdb5::Key& key) { std::string signature; std::string separator; - for (auto k : key.keys()) { - signature += separator+k; - separator=":"; + for (auto&& k : key.keys()) { + signature += separator + k; + separator = ":"; } return signature; } - void FDBList::init(const CmdArgs& args) { FDBVisitTool::init(args); - location_ = args.getBool("location", false); - timestamp_ = args.getBool("timestamp", false); - length_ = args.getBool("length", false); - full_ = args.getBool("full", false); - porcelain_ = args.getBool("porcelain", false); - json_ = args.getBool("json", false); - compact_ = args.getBool("compact", false); + location_ = args.getBool("location", location_); + timestamp_ = args.getBool("timestamp", timestamp_); + length_ = args.getBool("length", length_); + full_ = args.getBool("full", full_); + porcelain_ = args.getBool("porcelain", porcelain_); + json_ = args.getBool("json", json_); + compact_ = args.getBool("compact", compact_); + depth_ = args.getInt("depth", depth_); + + ASSERT(depth_ > 0 && depth_ < 4); if (json_) { + eckit::Log::debug() << "Setting porcelain=true" << '\n'; porcelain_ = true; - if (location_ || timestamp_ || length_) { - throw UserError("--json and --location/--timestamp/--length not compatible", Here()); - } + } + + if (porcelain_) { + if (location_) { throw UserError("--porcelain and --location are not compatible", Here()); } + if (timestamp_) { throw UserError("--porcelain and --timestamp are not compatible", Here()); } + if (length_) { throw UserError("--porcelain and --length are not compatible", Here()); } + if (compact_) { throw UserError("--porcelain and --compact are not compatible", Here()); } } if (compact_) { - if (location_) { - throw UserError("--compact and --location are not compatible", Here()); - } - if (full_) { - throw UserError("--compact and --full are not compatible", Here()); - } - if (porcelain_) { - throw UserError("--compact and --porcelain are not compatible", Here()); - } + if (location_) { throw UserError("--compact and --location are not compatible", Here()); } + if (full_) { throw UserError("--compact and --full are not compatible", Here()); } } /// @todo option ignore-errors @@ -119,7 +123,7 @@ void FDBList::execute(const CmdArgs& args) { std::unique_ptr json; if (json_) { - json.reset(new JSON(Log::info())); + json = std::make_unique(Log::info()); json->startList(); } @@ -132,14 +136,14 @@ void FDBList::execute(const CmdArgs& args) { } // If --full is supplied, then include all entries including duplicates. - auto listObject = fdb.list(request, !full_ && !compact_); + auto listObject = fdb.list(request, !full_ && !compact_, depth_); std::map>>> requests; ListElement elem; while (listObject.next(elem)) { if (compact_) { - std::vector keys = elem.key(); + const auto& keys = elem.keys(); ASSERT(keys.size() == 3); std::string treeAxes = keys[0]; @@ -162,19 +166,20 @@ void FDBList::execute(const CmdArgs& args) { it->second.emplace(signature, std::make_pair(keys[2].request(), std::unordered_set{keys[2]})); } } - } else { - if (json_) { - (*json) << elem; - } else { - if (porcelain_) { - elem.print(Log::info(), location_, false, false); - } else { - elem.print(Log::info(), location_, length_, timestamp_, ", "); - } - Log::info() << std::endl; - } + continue; } - } + + // JSON output + if (json) { + *json << elem; + continue; + } + + elem.print(Log::info(), location_, length_, timestamp_, ", "); + Log::info() << std::endl; + + } // while + if (compact_) { for (const auto& tree: requests) { for (const auto& leaf: tree.second) { @@ -197,20 +202,17 @@ void FDBList::execute(const CmdArgs& args) { } } // n.b. finding no data is not an error for fdb-list - } - if (json_) { - json->endList(); - } + } // requests + + if (json) { json->endList(); } } //---------------------------------------------------------------------------------------------------------------------- -} // namespace tools -} // namespace fdb5 +} // namespace fdb5::tools int main(int argc, char **argv) { fdb5::tools::FDBList app(argc, argv); return app.start(); } - diff --git a/src/fdb5/tools/fdb-overlay.cc b/src/fdb5/tools/fdb-overlay.cc index 900364827..d62913e0b 100644 --- a/src/fdb5/tools/fdb-overlay.cc +++ b/src/fdb5/tools/fdb-overlay.cc @@ -8,72 +8,66 @@ * does it submit to any jurisdiction. */ +#include "eckit/exception/Exceptions.h" #include "eckit/option/CmdArgs.h" #include "eckit/option/SimpleOption.h" #include "eckit/option/VectorOption.h" +#include "fdb5/LibFdb5.h" #include "fdb5/api/helpers/FDBToolRequest.h" #include "fdb5/config/Config.h" #include "fdb5/database/Key.h" -#include "fdb5/LibFdb5.h" #include "fdb5/rules/Schema.h" #include "fdb5/toc/TocEngine.h" #include "fdb5/tools/FDBTool.h" -using namespace eckit; +#include + using namespace eckit::option; -namespace fdb5 { -namespace tools { +namespace fdb5::tools { //---------------------------------------------------------------------------------------------------------------------- -class FdbOverlay : public FDBTool { - -public: // methods +class FdbOverlay: public FDBTool { - FdbOverlay(int argc, char **argv) : - FDBTool(argc, argv), - variableKeys_{"class", "expver"}, - remove_(false), - force_(false) { - options_.push_back(new VectorOption("variable-keys", - "The keys that may vary between mounted DBs", - 0, ",")); +public: // methods + FdbOverlay(int argc, char** argv): FDBTool(argc, argv), variableKeys_ {"class", "expver"} { + options_.push_back( + new VectorOption("variable-keys", "The keys that may vary between mounted DBs", 0, ",")); options_.push_back(new SimpleOption("remove", "Remove a previously FDB overlay")); options_.push_back(new SimpleOption("force", "Apply overlay even if target already exists")); } -private: // methods - - virtual void init(const option::CmdArgs& args); - virtual void execute(const option::CmdArgs& args); - virtual void usage(const std::string &tool) const; - -private: // members +private: // methods + virtual void init(const CmdArgs& args); + virtual void execute(const CmdArgs& args); + virtual void usage(const std::string& tool) const; +private: // members std::vector variableKeys_; - bool remove_; - bool force_; + + bool remove_ {false}; + bool force_ {false}; }; -void FdbOverlay::usage(const std::string &tool) const { +void FdbOverlay::usage(const std::string& tool) const { + + eckit::Log::info() << "\nUsage: " << tool << " [options] [source DB request] [target DB request]\n\n\n"; - Log::info() << std::endl - << "Usage: " << tool << " [options] [source DB request] [target DB request]" << std::endl - << std::endl - << std::endl; FDBTool::usage(tool); } -void FdbOverlay::init(const option::CmdArgs& args) { +void FdbOverlay::init(const CmdArgs& args) { FDBTool::init(args); args.get("variable-keys", variableKeys_); - remove_ = args.getBool("remove", false); - force_ = args.getBool("force", false); + remove_ = args.getBool("remove", remove_); + force_ = args.getBool("force", force_); } -void FdbOverlay::execute(const option::CmdArgs& args) { +void FdbOverlay::execute(const CmdArgs& args) { + + const Config conf = config(args); if (args.count() != 2) { usage("fdb-overlay"); @@ -81,66 +75,69 @@ void FdbOverlay::execute(const option::CmdArgs& args) { } auto parsedSource = FDBToolRequest::requestsFromString("domain=g," + args(0), {}, false, "read"); - auto parsedTarget = FDBToolRequest::requestsFromString("domain=g," + args(1), {}, false, "read"); ASSERT(parsedSource.size() == 1); + + auto parsedTarget = FDBToolRequest::requestsFromString("domain=g," + args(1), {}, false, "read"); ASSERT(parsedTarget.size() == 1); const auto& sourceRequest = parsedSource.front(); - const auto& targetRequest = parsedTarget.front(); ASSERT(!sourceRequest.all()); + + const auto& targetRequest = parsedTarget.front(); ASSERT(!targetRequest.all()); - Config conf = config(args); - const Schema& schema = conf.schema(); + const auto sources = conf.schema().expandDatabase(sourceRequest.request()); + ASSERT(!sources.empty()); + + const auto targets = conf.schema().expandDatabase(targetRequest.request()); + ASSERT(!targets.empty()); - TypedKey source{conf.schema().registry()}; - TypedKey target{conf.schema().registry()}; - ASSERT(schema.expandFirstLevel(sourceRequest.request(), source)); - ASSERT(schema.expandFirstLevel(targetRequest.request(), target)); + const auto& source = sources.front(); + const auto& target = targets.front(); if (remove_) { - Log::info() << "Removing " << source << " from " << target << std::endl; + eckit::Log::info() << "Removing " << source << " from " << target << std::endl; } else { - Log::info() << "Applying " << source << " onto " << target << std::endl; + eckit::Log::info() << "Applying " << source << " onto " << target << std::endl; } if (source.keys() != target.keys()) { std::stringstream ss; ss << "Keys insufficiently matching for mount: " << source << " : " << target << std::endl; - throw UserError(ss.str(), Here()); + throw eckit::UserError(ss.str(), Here()); } std::set vkeys(variableKeys_.begin(), variableKeys_.end()); for (const auto& kv : target) { - auto it = source.find(kv.first); - ASSERT(it != source.end()); + const auto [it, found] = source.find(kv.first); + ASSERT(found); if (kv.second != it->second && vkeys.find(kv.first) == vkeys.end()) { std::stringstream ss; ss << "Key " << kv.first << " not allowed to differ between DBs: " << source << " : " << target; - throw UserError(ss.str(), Here()); + throw eckit::UserError(ss.str(), Here()); } } - std::unique_ptr dbSource = CatalogueReaderFactory::instance().build(source.canonical(), conf); + std::unique_ptr dbSource = CatalogueReaderFactory::instance().build(source, conf); if (!dbSource->exists()) { std::stringstream ss; ss << "Source database not found: " << source << std::endl; - throw UserError(ss.str(), Here()); + throw eckit::UserError(ss.str(), Here()); } if (dbSource->type() != TocEngine::typeName()) { std::stringstream ss; ss << "Only TOC DBs currently supported" << std::endl; - throw UserError(ss.str(), Here()); + throw eckit::UserError(ss.str(), Here()); } - std::unique_ptr dbTarget = CatalogueReaderFactory::instance().build(target.canonical(), conf); + std::unique_ptr dbTarget = CatalogueReaderFactory::instance().build(target, conf); if (remove_) { if (!dbTarget->exists()) { std::stringstream ss; ss << "Target database must already exist: " << target << std::endl; - throw UserError(ss.str(), Here()); + throw eckit::UserError(ss.str(), Here()); } } else { if (dbTarget->exists() && !force_) { @@ -148,13 +145,13 @@ void FdbOverlay::execute(const option::CmdArgs& args) { ss << "Target database already exists: " << target << std::endl; eckit::Log::error() << ss.str() << std::endl; eckit::Log::error() << "To mount to existing target, rerun with --force" << std::endl; - throw UserError(ss.str(), Here()); + throw eckit::UserError(ss.str(), Here()); } } ASSERT(dbTarget->uri() != dbSource->uri()); - std::unique_ptr newCatalogue = CatalogueWriterFactory::instance().build(target.canonical(), conf); + std::unique_ptr newCatalogue = CatalogueWriterFactory::instance().build(target, conf); if (newCatalogue->type() == TocEngine::typeName() && dbSource->type() == TocEngine::typeName()) { newCatalogue->overlayDB(*dbSource, vkeys, remove_); } @@ -162,8 +159,7 @@ void FdbOverlay::execute(const option::CmdArgs& args) { //---------------------------------------------------------------------------------------------------------------------- -} // namespace tools -} // namespace fbb5 +} // namespace fdb5::tools int main(int argc, char **argv) { fdb5::tools::FdbOverlay app(argc, argv); diff --git a/src/fdb5/tools/fdb-patch.cc b/src/fdb5/tools/fdb-patch.cc index 16e91b3c3..2ff0fbba0 100644 --- a/src/fdb5/tools/fdb-patch.cc +++ b/src/fdb5/tools/fdb-patch.cc @@ -143,7 +143,7 @@ void FDBPatch::execute(const CmdArgs& args) { // (n.b. listed key is broken down as-per the schema) Key key; - for (const Key& k : elem.keyParts_) { + for (const Key& k : elem.keys()) { for (const auto& kv : k) { key.set(kv.first, kv.second); } diff --git a/src/fdb5/tools/fdb-root.cc b/src/fdb5/tools/fdb-root.cc index 970858a0b..7ab1c6461 100644 --- a/src/fdb5/tools/fdb-root.cc +++ b/src/fdb5/tools/fdb-root.cc @@ -8,53 +8,48 @@ * does it submit to any jurisdiction. */ +#include "eckit/exception/Exceptions.h" #include "eckit/option/CmdArgs.h" #include "eckit/option/SimpleOption.h" +#include "fdb5/LibFdb5.h" #include "fdb5/api/helpers/FDBToolRequest.h" #include "fdb5/config/Config.h" -#include "fdb5/database/Key.h" -#include "fdb5/LibFdb5.h" #include "fdb5/rules/Schema.h" #include "fdb5/tools/FDBTool.h" -namespace fdb5 { -namespace tools { +#include + +namespace fdb5::tools { //---------------------------------------------------------------------------------------------------------------------- class FdbRoot : public FDBTool { - -public: // methods - - FdbRoot(int argc, char **argv) : - FDBTool(argc, argv) { - options_.push_back(new eckit::option::SimpleOption("create", "If a DB does not exist for the provided key, create it")); +public: // methods + FdbRoot(int argc, char** argv) : FDBTool(argc, argv) { + options_.push_back( + new eckit::option::SimpleOption("create", "If a DB does not exist for the provided key, create it")); } -private: // methods - - virtual void execute(const eckit::option::CmdArgs& args); - virtual void usage(const std::string &tool) const; +private: // methods + void execute(const eckit::option::CmdArgs& args) override; + void usage(const std::string& tool) const override; }; -void FdbRoot::usage(const std::string &tool) const { - - eckit::Log::info() << std::endl - << "Usage: " << tool << " [options] [request1] [request2] ..." << std::endl - << std::endl - << std::endl - << "Examples:" << std::endl - << "=========" << std::endl << std::endl - << tool << " class=od,expver=0001,stream=oper,date=20160428,time=1200" - << std::endl - << std::endl; +void FdbRoot::usage(const std::string& tool) const { + + eckit::Log::info() << "\nUsage: " << tool << " [options] [request1] [request2] ...\n\n\n" + << "Examples:\n=========\n\n" + << tool << " class=od,expver=0001,stream=oper,date=20160428,time=1200\n\n"; + FDBTool::usage(tool); } void FdbRoot::execute(const eckit::option::CmdArgs& args) { - bool create_db = args.getBool("create", false); + const auto conf = config(args); + + const bool create = args.getBool("create", false); for (size_t i = 0; i < args.count(); ++i) { @@ -63,24 +58,23 @@ void FdbRoot::execute(const eckit::option::CmdArgs& args) { for (const auto& request : parsed) { - Config conf = config(args); - const Schema& schema = conf.schema(); - TypedKey result{conf.schema().registry()}; - ASSERT( schema.expandFirstLevel(request.request(), result) ); - const auto key = result.canonical(); + const auto& keys = conf.schema().expandDatabase(request.request()); - eckit::Log::info() << result << std::endl; + if (keys.empty()) { throw eckit::UserError("Invalid request", Here()); } - // 'Touch' the database (which will create it if it doesn't exist) + /// @todo this is running over keys, which needs more thoughts - std::unique_ptr cat = CatalogueReaderFactory::instance().build(key, conf); + for (const auto& key : keys) { - if (!cat->exists() && create_db) { - cat = CatalogueWriterFactory::instance().build(key, conf); - } + eckit::Log::info() << key << std::endl; + + // 'Touch' the database (which will create it if it doesn't exist) + + std::unique_ptr cat = CatalogueReaderFactory::instance().build(key, conf); + + if (!cat->exists() && create) { cat = CatalogueWriterFactory::instance().build(key, conf); } - if (cat->exists()) { - eckit::Log::info() << (*cat) << std::endl; + if (cat->exists()) { eckit::Log::info() << (*cat) << std::endl; } } } } @@ -88,10 +82,9 @@ void FdbRoot::execute(const eckit::option::CmdArgs& args) { //---------------------------------------------------------------------------------------------------------------------- -} // namespace tools -} // namespace fbb5 +} // namespace fdb5::tools -int main(int argc, char **argv) { +int main(int argc, char** argv) { fdb5::tools::FdbRoot app(argc, argv); return app.start(); } diff --git a/src/fdb5/types/TypeParam.cc b/src/fdb5/types/TypeParam.cc index ea87bae71..5a02b75d5 100644 --- a/src/fdb5/types/TypeParam.cc +++ b/src/fdb5/types/TypeParam.cc @@ -38,7 +38,7 @@ void TypeParam::getValues(const metkit::mars::MarsRequest &request, const CatalogueReader* cat) const { ASSERT(cat); - eckit::StringSet ax; + eckit::DenseSet ax; cat->axis(keyword, ax); diff --git a/src/fdb5/types/TypeStep.cc b/src/fdb5/types/TypeStep.cc index 1b6a0c017..12bf40716 100644 --- a/src/fdb5/types/TypeStep.cc +++ b/src/fdb5/types/TypeStep.cc @@ -8,6 +8,7 @@ * does it submit to any jurisdiction. */ +#include "eckit/container/DenseSet.h" #include "eckit/utils/Translator.h" #include "metkit/mars/MarsRequest.h" @@ -17,6 +18,7 @@ #include "fdb5/types/TypesFactory.h" #include "fdb5/types/TypeStep.h" #include "fdb5/database/Catalogue.h" +#include using metkit::mars::StepRange; using metkit::mars::StepRangeNormalise; @@ -71,7 +73,7 @@ void TypeStep::getValues(const metkit::mars::MarsRequest& request, // Get the axis - eckit::StringSet ax; + eckit::DenseSet ax; cat->axis("step", ax); std::vector axis; diff --git a/src/fdb5/types/TypesRegistry.cc b/src/fdb5/types/TypesRegistry.cc index 30670bbba..c4eccf8f1 100644 --- a/src/fdb5/types/TypesRegistry.cc +++ b/src/fdb5/types/TypesRegistry.cc @@ -1,4 +1,4 @@ - /* +/* * (C) Copyright 1996- ECMWF. * * This software is licensed under the terms of the Apache Licence Version 2.0 @@ -8,11 +8,14 @@ * does it submit to any jurisdiction. */ +#include #include #include #include "eckit/exception/Exceptions.h" +#include "eckit/log/Log.h" +#include "fdb5/LibFdb5.h" #include "fdb5/types/Type.h" #include "fdb5/types/TypesFactory.h" #include "fdb5/types/TypesRegistry.h" @@ -20,74 +23,71 @@ #include "metkit/mars/MarsRequest.h" #include "metkit/mars/Parameter.h" - namespace fdb5 { +namespace fdb5 { //---------------------------------------------------------------------------------------------------------------------- -eckit::ClassSpec TypesRegistry::classSpec_ = { &eckit::Streamable::classSpec(), "TypesRegistry", }; +eckit::ClassSpec TypesRegistry::classSpec_ = {&eckit::Streamable::classSpec(), "TypesRegistry"}; eckit::Reanimator TypesRegistry::reanimator_; -TypesRegistry::TypesRegistry() = default; - -TypesRegistry::TypesRegistry(eckit::Stream& s) { +//---------------------------------------------------------------------------------------------------------------------- - size_t numTypes; - std::string name; - std::string type; +TypesRegistry::TypesRegistry(eckit::Stream& stream) { - s >> numTypes; - for (size_t i=0; i> name; - s >> type; - types_[name] = type; - } + decode(stream); } -void TypesRegistry::encode(eckit::Stream& s) const { +void TypesRegistry::decode(eckit::Stream& stream) { - s << types_.size(); - for (auto t: types_) { - s << t.first; - s << t.second; + size_t typeSize = 0; + stream >> typeSize; + for (size_t i = 0; i < typeSize; ++i) { + std::string keyword, type; + + stream >> keyword; + stream >> type; + + types_[keyword] = type; } } - -TypesRegistry::~TypesRegistry() { - for (auto& item : cache_) { delete item.second; } +void TypesRegistry::encode(eckit::Stream& out) const { + out << types_.size(); + for (const auto& [keyword, type] : types_) { + out << keyword; + out << type; + } } void TypesRegistry::updateParent(const TypesRegistry& parent) { - parent_ = std::cref(parent); + parent_ = &parent; } -void TypesRegistry::addType(const std::string &keyword, const std::string &type) { +void TypesRegistry::addType(const std::string& keyword, const std::string& type) { ASSERT(types_.find(keyword) == types_.end()); types_[keyword] = type; } -const Type &TypesRegistry::lookupType(const std::string &keyword) const { - - std::map::const_iterator j = cache_.find(keyword); - - if (j != cache_.end()) { - return *(*j).second; - } else { - std::string type = "Default"; - std::map::const_iterator i = types_.find(keyword); - if (i != types_.end()) { - type = (*i).second; - } else { - if (parent_) { - return parent_.value().get().lookupType(keyword); - } - } - - Type* newKH = TypesFactory::build(type, keyword); - cache_[keyword] = newKH; - return *newKH; +const Type& TypesRegistry::lookupType(const std::string& keyword) const { + + if (auto iter = cache_.find(keyword); iter != cache_.end()) { return *iter->second; } + + std::string type = "Default"; + + if (auto iter = types_.find(keyword); iter != types_.end()) { + type = iter->second; + } else if (parent_) { + return parent_->lookupType(keyword); + } + + auto* newType = TypesFactory::build(type, keyword); + + if (const auto [iter, success] = cache_.try_emplace(keyword, newType); !success) { + LOG_DEBUG_LIB(LibFdb5) << "Failed to insert new type into cache" << std::endl; } + + return *newType; } metkit::mars::MarsRequest TypesRegistry::canonicalise(const metkit::mars::MarsRequest& request) const { @@ -95,40 +95,31 @@ metkit::mars::MarsRequest TypesRegistry::canonicalise(const metkit::mars::MarsRe for (const auto& param : request.parameters()) { const std::vector& srcVals = param.values(); - std::vector vals; + std::vector vals; vals.reserve(srcVals.size()); - for (const auto& v : srcVals) { - vals.push_back(lookupType(param.name()).toKey(v)); - } + for (const auto& v : srcVals) { vals.push_back(lookupType(param.name()).toKey(v)); } result.values(param.name(), vals); } return result; } -std::ostream &operator<<(std::ostream &s, const TypesRegistry &x) { - x.print(s); - return s; -} - -void TypesRegistry::print( std::ostream &out ) const { +void TypesRegistry::print(std::ostream& out) const { out << this << "(" << types_ << ")"; } -void TypesRegistry::dump( std::ostream &out ) const { - for (std::map::const_iterator i = types_.begin(); i != types_.end(); ++i) { - out << i->first << ":" << i->second << ";" << std::endl; - } +void TypesRegistry::dump(std::ostream& out) const { + for (const auto& [keyword, type] : types_) { out << keyword << ":" << type << ";" << std::endl; } } - -void TypesRegistry::dump( std::ostream &out, const std::string &keyword ) const { - std::map::const_iterator i = types_.find(keyword); - +void TypesRegistry::dump(std::ostream& out, const std::string& keyword) const { out << keyword; - if (i != types_.end()) { - out << ":" << i->second; - } + if (auto iter = types_.find(keyword); iter != types_.end()) { out << ":" << iter->second; } +} + +std::ostream& operator<<(std::ostream& out, const TypesRegistry& registry) { + registry.print(out); + return out; } //---------------------------------------------------------------------------------------------------------------------- diff --git a/src/fdb5/types/TypesRegistry.h b/src/fdb5/types/TypesRegistry.h index 057b9f7e9..9080f7b5a 100644 --- a/src/fdb5/types/TypesRegistry.h +++ b/src/fdb5/types/TypesRegistry.h @@ -16,67 +16,68 @@ #ifndef fdb5_TypesRegistry_H #define fdb5_TypesRegistry_H -#include #include #include -#include -#include +#include #include "eckit/serialisation/Streamable.h" +#include "fdb5/types/Type.h" + namespace metkit::mars { class MarsRequest; } namespace fdb5 { -class Type; - //---------------------------------------------------------------------------------------------------------------------- -class TypesRegistry : public eckit::Streamable { - -public: // methods +class TypesRegistry : public eckit::Streamable { - TypesRegistry(); - explicit TypesRegistry(eckit::Stream& s); +public: // methods + TypesRegistry() = default; - ~TypesRegistry() override; + explicit TypesRegistry(eckit::Stream& stream); - const Type &lookupType(const std::string &keyword) const; + const Type& lookupType(const std::string& keyword) const; - void addType(const std::string &, const std::string &); + void addType(const std::string&, const std::string&); void updateParent(const TypesRegistry& parent); - void dump( std::ostream &out ) const; - void dump( std::ostream &out, const std::string &keyword ) const; + void dump(std::ostream& out) const; + void dump(std::ostream& out, const std::string& keyword) const; metkit::mars::MarsRequest canonicalise(const metkit::mars::MarsRequest& request) const; - const eckit::ReanimatorBase& reanimator() const override { return reanimator_; } - static const eckit::ClassSpec& classSpec() { return classSpec_; } - void encode(eckit::Stream& s) const override; + // streamable -private: // members + const eckit::ReanimatorBase& reanimator() const override { return reanimator_; } - static eckit::ClassSpec classSpec_; - static eckit::Reanimator reanimator_; + static const eckit::ClassSpec& classSpec() { return classSpec_; } + void encode(eckit::Stream& out) const override; + void decode(eckit::Stream& stream); - typedef std::map TypeMap; +private: // methods - mutable TypeMap cache_; + void print(std::ostream& out) const; + + friend std::ostream& operator<<(std::ostream& s, const TypesRegistry& x); +private: // members std::map types_; - std::optional> parent_; - friend std::ostream &operator<<(std::ostream &s, const TypesRegistry &x); + const TypesRegistry* parent_ {nullptr}; + using TypeMap = std::map>; + mutable TypeMap cache_; - void print( std::ostream &out ) const; + // streamable + static eckit::ClassSpec classSpec_; + static eckit::Reanimator reanimator_; }; //---------------------------------------------------------------------------------------------------------------------- -} // namespace fdb5 +} // namespace fdb5 #endif diff --git a/tests/fdb/api/ApiSpy.h b/tests/fdb/api/ApiSpy.h index 40a160b62..ef9ea1489 100644 --- a/tests/fdb/api/ApiSpy.h +++ b/tests/fdb/api/ApiSpy.h @@ -91,8 +91,9 @@ class ApiSpy : public fdb5::FDBBase { return fdb5::ListIterator(0); } - fdb5::ListIterator list(const fdb5::FDBToolRequest& request) override { + fdb5::ListIterator list(const fdb5::FDBToolRequest& /* request */, const int level) override { counts_.list += 1; + ASSERT(level == 3); return fdb5::ListIterator(0); } diff --git a/tests/fdb/api/test_fdb_c.cc b/tests/fdb/api/test_fdb_c.cc index 7d8d34c5f..7a9876570 100644 --- a/tests/fdb/api/test_fdb_c.cc +++ b/tests/fdb/api/test_fdb_c.cc @@ -9,8 +9,9 @@ */ #include +#include +#include -#include "eckit/config/Resource.h" #include "eckit/filesystem/PathName.h" #include "eckit/io/Buffer.h" #include "eckit/io/DataHandle.h" @@ -44,9 +45,10 @@ void key_compare(const std::vector& keys, fdb_listiterator_t *it, boo EXPECT(err == FDB_SUCCESS); size_t level = 0; - for (auto key: keys) { - for (auto k1: key) { + for (const auto& key : keys) { + for (const auto& k1 : key) { int err = fdb_splitkey_next_metadata(sk, &k, &v, checkLevel ? &l : nullptr); + std::cerr << "k=" << k << " v=" << v << " l=" << l << std::endl; EXPECT(err == FDB_SUCCESS); EXPECT(k1.first == k); EXPECT(k1.second == v); @@ -58,11 +60,12 @@ void key_compare(const std::vector& keys, fdb_listiterator_t *it, boo } err = fdb_splitkey_next_metadata(sk, &k, &v, &l); EXPECT(err == FDB_ITERATION_COMPLETE); - + err = fdb_delete_splitkey(sk); } CASE( "fdb_c - archive & list" ) { + const int depth = 3; size_t length; DataHandle *dh; @@ -110,10 +113,10 @@ CASE( "fdb_c - archive & list" ) { const char **item= new const char*; fdb_listiterator_t* it; - fdb_list(fdb, request, &it, true); + fdb_list(fdb, request, &it, true, depth); int err = fdb_listiterator_next(it); ASSERT(err == FDB_SUCCESS); - + const char *uri; size_t off, attr_len; @@ -135,7 +138,7 @@ CASE( "fdb_c - archive & list" ) { fdb_request_add1(request, "param", "139"); - fdb_list(fdb, request, &it, true); + fdb_list(fdb, request, &it, true, depth); err = fdb_listiterator_next(it); ASSERT(err == FDB_ITERATION_COMPLETE); fdb_delete_listiterator(it); @@ -154,17 +157,17 @@ CASE( "fdb_c - archive & list" ) { EXPECT(FDB_SUCCESS == fdb_flush(fdb)); fdb_request_add1(request, "levelist", "400"); - fdb_list(fdb, request, &it, true); + fdb_list(fdb, request, &it, true, depth); err = fdb_listiterator_next(it); ASSERT(err == FDB_ITERATION_COMPLETE); fdb_delete_listiterator(it); fdb_request_add1(request, "param", "138"); - fdb_list(fdb, request, &it, true); + fdb_list(fdb, request, &it, true, depth); err = fdb_listiterator_next(it); ASSERT(err == FDB_SUCCESS); - + fdb_listiterator_attrs(it, &uri, &off, &attr_len); EXPECT(attr_len == 3280398); @@ -184,7 +187,7 @@ CASE( "fdb_c - archive & list" ) { const char* values[] = {"400", "300"}; fdb_request_add(request, "levelist", values, 2); - fdb_list(fdb, request, &it, true); + fdb_list(fdb, request, &it, true, depth); err = fdb_listiterator_next(it); ASSERT(err == FDB_SUCCESS); @@ -220,6 +223,7 @@ CASE( "fdb_c - archive & list" ) { #if fdb5_HAVE_GRIB CASE( "fdb_c - multiple archive & list" ) { + const int depth = 3; size_t length1, length2, length3; DataHandle *dh; @@ -320,7 +324,7 @@ CASE( "fdb_c - multiple archive & list" ) { const char **item= new const char*; bool exist; fdb_listiterator_t* it; - fdb_list(fdb, request, &it, true); + fdb_list(fdb, request, &it, true, depth); int err = fdb_listiterator_next(it); ASSERT(err == FDB_SUCCESS); @@ -331,7 +335,7 @@ CASE( "fdb_c - multiple archive & list" ) { fdb_delete_listiterator(it); fdb_request_add1(request, "step", "1"); - fdb_list(fdb, request, &it, true); + fdb_list(fdb, request, &it, true, depth); err = fdb_listiterator_next(it); ASSERT(err == FDB_ITERATION_COMPLETE); fdb_delete_listiterator(it); @@ -339,21 +343,94 @@ CASE( "fdb_c - multiple archive & list" ) { fdb_request_add1(request, "step", "0"); const char* values[] = {"400", "300"}; fdb_request_add(request, "levelist", values, 2); - fdb_list(fdb, request, &it, true); + fdb_list(fdb, request, &it, true, depth); err = fdb_listiterator_next(it); ASSERT(err == FDB_SUCCESS); - + key_compare(k1, it); err = fdb_listiterator_next(it); ASSERT(err == FDB_SUCCESS); - + key_compare(k2, it); err = fdb_listiterator_next(it); ASSERT(err == FDB_ITERATION_COMPLETE); fdb_delete_listiterator(it); } + +CASE("fdb_c - list depth=1,2,3") { + fdb_handle_t* fdb = nullptr; + fdb_new_handle(&fdb); + + std::vector key300d1 { + {{"class", "rd"}, {"expver", "xxxx"}, {"stream", "oper"}, {"date", "20191110"}, {"time", "0000"}, {"domain", "g"}}, + }; + + std::vector key300d2 { + {{"class", "rd"}, {"expver", "xxxx"}, {"stream", "oper"}, {"date", "20191110"}, {"time", "0000"}, {"domain", "g"}}, + {{"type", "an"}, {"levtype", "pl"}}, + }; + + std::vector key300d3 { + {{"class", "rd"}, {"expver", "xxxx"}, {"stream", "oper"}, {"date", "20191110"}, {"time", "0000"}, {"domain", "g"}}, + {{"type", "an"}, {"levtype", "pl"}}, + {{"step", "0"}, {"levelist", "300"}, {"param", "138"}}, + }; + + fdb_request_t* request = nullptr; + fdb_new_request(&request); + fdb_request_add1(request, "domain", "g"); + fdb_request_add1(request, "stream", "oper"); + fdb_request_add1(request, "levtype", "pl"); + fdb_request_add1(request, "levelist", "300"); + fdb_request_add1(request, "date", "20191110"); + fdb_request_add1(request, "time", "0000"); + fdb_request_add1(request, "step", "0"); + fdb_request_add1(request, "param", "138"); + fdb_request_add1(request, "class", "rd"); + fdb_request_add1(request, "type", "an"); + fdb_request_add1(request, "expver", "xxxx"); + + { // depth=1 + fdb_listiterator_t* iter = nullptr; + fdb_list(fdb, request, &iter, true, 1); + int err = fdb_listiterator_next(iter); + ASSERT(err == FDB_SUCCESS); + + key_compare(key300d1, iter); + + err = fdb_listiterator_next(iter); + ASSERT(err == FDB_ITERATION_COMPLETE); + fdb_delete_listiterator(iter); + } + + { // depth=2 + fdb_listiterator_t* iter = nullptr; + fdb_list(fdb, request, &iter, true, 2); + int err = fdb_listiterator_next(iter); + ASSERT(err == FDB_SUCCESS); + + key_compare(key300d2, iter); + + err = fdb_listiterator_next(iter); + ASSERT(err == FDB_ITERATION_COMPLETE); + fdb_delete_listiterator(iter); + } + + { // depth=3 + fdb_listiterator_t* iter = nullptr; + fdb_list(fdb, request, &iter, true, 3); + int err = fdb_listiterator_next(iter); + ASSERT(err == FDB_SUCCESS); + + key_compare(key300d3, iter); + + err = fdb_listiterator_next(iter); + ASSERT(err == FDB_ITERATION_COMPLETE); + fdb_delete_listiterator(iter); + } +} #endif CASE( "fdb_c - retrieve bad request" ) { @@ -480,7 +557,7 @@ CASE( "fdb_c - expand" ) { size_t numValues; char** values; - + fdb_request_get(request, "date", &values, &numValues); EXPECT_EQUAL(numValues, 2); EXPECT_EQUAL(0, strncmp(values[0], "20191110", 8)); diff --git a/tests/fdb/daos/test_daos_catalogue.cc b/tests/fdb/daos/test_daos_catalogue.cc index 0ee22ca88..84e1ac99f 100644 --- a/tests/fdb/daos/test_daos_catalogue.cc +++ b/tests/fdb/daos/test_daos_catalogue.cc @@ -539,7 +539,7 @@ CASE("DaosCatalogue tests") { count = 0; while (listObject.next(info)) { - info.print(std::cout, true, true); + info.print(std::cout, true, true, false," "); std::cout << std::endl; ++count; } @@ -756,7 +756,7 @@ CASE("DaosCatalogue tests") { count = 0; while (listObject.next(info)) { - info.print(std::cout, true, true); + info.print(std::cout, true, true, false," "); std::cout << std::endl; ++count; } @@ -776,7 +776,7 @@ CASE("DaosCatalogue tests") { count = 0; while (listObject.next(info)) { - info.print(std::cout, true, true); + info.print(std::cout, true, true, false," "); std::cout << std::endl; ++count; } @@ -808,7 +808,7 @@ CASE("DaosCatalogue tests") { count = 0; while (listObject.next(info)) { - info.print(std::cout, true, true); + info.print(std::cout, true, true, false," "); std::cout << std::endl; ++count; } diff --git a/tests/fdb/daos/test_daos_store.cc b/tests/fdb/daos/test_daos_store.cc index 5ab8651a0..f33118927 100644 --- a/tests/fdb/daos/test_daos_store.cc +++ b/tests/fdb/daos/test_daos_store.cc @@ -384,7 +384,7 @@ CASE("DaosStore tests") { count = 0; while (listObject.next(info)) { - info.print(std::cout, true, true); + info.print(std::cout, true, true, false," "); std::cout << std::endl; ++count; } diff --git a/tests/fdb/test_fdb5_service.cc b/tests/fdb/test_fdb5_service.cc index 6956d79a5..1d469cb1f 100644 --- a/tests/fdb/test_fdb5_service.cc +++ b/tests/fdb/test_fdb5_service.cc @@ -18,9 +18,7 @@ #include #include "eckit/io/DataHandle.h" -#include "eckit/io/FileHandle.h" #include "eckit/io/MemoryHandle.h" -#include "eckit/io/MultiHandle.h" #include "eckit/runtime/Main.h" #include "eckit/types/Types.h" #include "eckit/utils/Translator.h" @@ -28,8 +26,8 @@ #include "eckit/serialisation/FileStream.h" #include "eckit/io/AutoCloser.h" -#include "metkit/mars/MarsRequest.h" #include "metkit/mars/MarsExpension.h" +#include "metkit/mars/MarsRequest.h" #include "metkit/mars/TypeAny.h" #include "fdb5/database/Key.h" @@ -308,7 +306,7 @@ CASE ( "test_fdb_service" ) { fdb5::ListElement el; EXPECT(iter.next(el)); - eckit::PathName path = el.location().uri().path().dirName(); + eckit::PathName path = el.uri().path().dirName(); DIR* dirp = ::opendir(path.asString().c_str()); struct dirent* dp; @@ -491,7 +489,7 @@ CASE ( "test_fdb_service_subtoc" ) { fdb5::ListElement el; EXPECT(iter.next(el)); - eckit::PathName path = el.location().uri().path().dirName(); + eckit::PathName path = el.uri().path().dirName(); DIR* dirp = ::opendir(path.asString().c_str()); struct dirent* dp; diff --git a/tests/fdb/tools/CMakeLists.txt b/tests/fdb/tools/CMakeLists.txt index 42343c489..c20c61fb6 100644 --- a/tests/fdb/tools/CMakeLists.txt +++ b/tests/fdb/tools/CMakeLists.txt @@ -2,7 +2,7 @@ list( APPEND fdb_tools_tests fdb_info ) foreach( _t ${fdb_tools_tests} ) - + configure_file( ${_t}.sh.in ${_t}.sh @ONLY ) ecbuild_add_test( @@ -28,3 +28,8 @@ foreach( _t ${fdb_tools_grib_tests} ) endforeach() add_subdirectory( auxiliary ) + +add_subdirectory( list ) + +add_subdirectory( inspect ) + diff --git a/tests/fdb/tools/inspect/CMakeLists.txt b/tests/fdb/tools/inspect/CMakeLists.txt new file mode 100644 index 000000000..f7af6fe85 --- /dev/null +++ b/tests/fdb/tools/inspect/CMakeLists.txt @@ -0,0 +1,8 @@ +set(test_name fdb_inspect) + +configure_file( ${test_name}.sh.in ${test_name}.sh @ONLY ) + +ecbuild_add_test( + TYPE SCRIPT + CONDITION HAVE_FDB_BUILD_TOOLS + COMMAND ${test_name}.sh) diff --git a/tests/fdb/tools/inspect/fdb_inspect.sh.in b/tests/fdb/tools/inspect/fdb_inspect.sh.in new file mode 100755 index 000000000..c7da996ad --- /dev/null +++ b/tests/fdb/tools/inspect/fdb_inspect.sh.in @@ -0,0 +1,64 @@ +#!/usr/bin/env bash + +set -eu + +yell() { echo "$(basename "$0"): $*" >&2; } +die() { yell "$*"; exit 1; } +try() { "$@" || die "Errored HERE => '$*'"; } + +export PATH=@CMAKE_BINARY_DIR@/bin:$PATH +export FDB5_CONFIG_FILE="local.yaml" +export FDB_HOME=@PROJECT_BINARY_DIR@ + +tname=inspect_test +srcdir=@CMAKE_CURRENT_SOURCE_DIR@ +bindir=@CMAKE_CURRENT_BINARY_DIR@ + +######################################################################################################################## + +try cd $bindir + +rm -rf $tname || true +mkdir -p $tname/localroot + +try cd $tname + +for f in local.yaml x.grib; do + cp "$srcdir/$f" ./ +done + +######################################################################################################################## +# Create a grib file with 4 steps and populate FDB + +try grib_set -s step=0 x.grib 0.grib +try grib_set -s step=6 x.grib 6.grib +try grib_set -s step=9 x.grib 9.grib + +for f in *.grib; do + try grib_set -s type=an "$f" "cf.$f" +done + +for f in *.grib; do + try cat "$f" >> "$tname.grib" +done + +try fdb-write "$tname.grib" + +######################################################################################################################## +# test all + +requests="class=rd,expver=xxxx,stream=oper,date=20201102,time=0000,domain=g,type=fc,levtype=sfc,step=0,param=166 \ +class=rd,expver=xxxx,stream=oper,date=20201102,time=0000,domain=g,type=fc,levtype=sfc,step=12,param=166 \ +class=rd,expver=xxxx,stream=oper,date=20201102,time=0000,domain=g,type=fc,levtype=sfc,step=6,param=166 \ +class=rd,expver=xxxx,stream=oper,date=20201102,time=0000,domain=g,type=fc,levtype=sfc,step=9,param=166 \ +class=rd,expver=xxxx,stream=oper,date=20201102,time=0000,domain=g,type=an,levtype=sfc,step=0,param=166 \ +class=rd,expver=xxxx,stream=oper,date=20201102,time=0000,domain=g,type=an,levtype=sfc,step=12,param=166 \ +class=rd,expver=xxxx,stream=oper,date=20201102,time=0000,domain=g,type=an,levtype=sfc,step=6,param=166 \ +class=rd,expver=xxxx,stream=oper,date=20201102,time=0000,domain=g,type=an,levtype=sfc,step=9,param=166" + +out=$(fdb-inspect $requests) + +exp="Number of requests: 8 +Number of elements: 8" + +try test "$exp" = "$out" diff --git a/tests/fdb/tools/inspect/local.yaml b/tests/fdb/tools/inspect/local.yaml new file mode 100644 index 000000000..8e43d2e8f --- /dev/null +++ b/tests/fdb/tools/inspect/local.yaml @@ -0,0 +1,6 @@ +--- +type: local +engine: toc +spaces: + - roots: + - path: ./localroot diff --git a/tests/fdb/tools/inspect/x.grib b/tests/fdb/tools/inspect/x.grib new file mode 120000 index 000000000..635e33b3e --- /dev/null +++ b/tests/fdb/tools/inspect/x.grib @@ -0,0 +1 @@ +../../../regressions/FDB-307/x.grib \ No newline at end of file diff --git a/tests/fdb/tools/list/CMakeLists.txt b/tests/fdb/tools/list/CMakeLists.txt new file mode 100644 index 000000000..07d3fa583 --- /dev/null +++ b/tests/fdb/tools/list/CMakeLists.txt @@ -0,0 +1,8 @@ +set(test_name fdb_list) + +configure_file( ${test_name}.sh.in ${test_name}.sh @ONLY ) + +ecbuild_add_test( + TYPE SCRIPT + CONDITION HAVE_FDB_BUILD_TOOLS + COMMAND ${test_name}.sh) diff --git a/tests/fdb/tools/list/fdb_list.sh.in b/tests/fdb/tools/list/fdb_list.sh.in new file mode 100755 index 000000000..020e5348a --- /dev/null +++ b/tests/fdb/tools/list/fdb_list.sh.in @@ -0,0 +1,91 @@ +#!/usr/bin/env bash + +# set -eu + +yell() { echo "$(basename "$0"): $*" >&2; } +die() { yell "$*"; exit 1; } +try() { "$@" || die "Errored HERE => '$*'"; } + +function request() { + echo "RETRIEVE,CLASS=OD,TYPE=CF,STREAM=ENFO,EXPVER=0001,LEVTYPE=PL,DATE=20201106,TIME=1200,STEP=00,DOMAIN=G,$1" > req; +} + +export PATH=@CMAKE_BINARY_DIR@/bin:$PATH +export FDB5_CONFIG_FILE="local.yaml" +export FDB_HOME=@PROJECT_BINARY_DIR@ + +tname=list_test +srcdir=@CMAKE_CURRENT_SOURCE_DIR@ +bindir=@CMAKE_CURRENT_BINARY_DIR@ + +######################################################################################################################## + +try cd $bindir + +rm -rf $tname || true +mkdir -p $tname/localroot + +try cd $tname + +for f in local.yaml x.grib; do + cp "$srcdir/$f" ./ +done + +######################################################################################################################## +# Create a grib file with 4 steps and populate FDB + +try grib_set -s step=0 x.grib 0.grib +try grib_set -s step=6 x.grib 6.grib +try grib_set -s step=9 x.grib 9.grib + +for f in *.grib; do + try grib_set -s type=an "$f" "cf.$f" +done + +for f in *.grib; do + try cat "$f" >> "$tname.grib" +done + +try fdb-write "$tname.grib" + +######################################################################################################################## +# test all +out=$(fdb-list --all --minimum-keys= --porcelain) +exp="{class=rd,expver=xxxx,stream=oper,date=20201102,time=0000,domain=g}{type=fc,levtype=sfc}{step=0,param=166} +{class=rd,expver=xxxx,stream=oper,date=20201102,time=0000,domain=g}{type=fc,levtype=sfc}{step=12,param=166} +{class=rd,expver=xxxx,stream=oper,date=20201102,time=0000,domain=g}{type=fc,levtype=sfc}{step=6,param=166} +{class=rd,expver=xxxx,stream=oper,date=20201102,time=0000,domain=g}{type=fc,levtype=sfc}{step=9,param=166} +{class=rd,expver=xxxx,stream=oper,date=20201102,time=0000,domain=g}{type=an,levtype=sfc}{step=0,param=166} +{class=rd,expver=xxxx,stream=oper,date=20201102,time=0000,domain=g}{type=an,levtype=sfc}{step=12,param=166} +{class=rd,expver=xxxx,stream=oper,date=20201102,time=0000,domain=g}{type=an,levtype=sfc}{step=6,param=166} +{class=rd,expver=xxxx,stream=oper,date=20201102,time=0000,domain=g}{type=an,levtype=sfc}{step=9,param=166}" +try test "$exp" = "$out" + +# test date depth=1 +out=$(fdb-list date=20201102 --minimum-keys="" --porcelain --depth=1) +exp="{class=rd,expver=xxxx,stream=oper,date=20201102,time=0000,domain=g}" +try test "$exp" = "$out" + +# test date depth=2 +out=$(fdb-list date=20201102 --minimum-keys="" --porcelain --depth=2) +exp="{class=rd,expver=xxxx,stream=oper,date=20201102,time=0000,domain=g}{type=fc,levtype=sfc} +{class=rd,expver=xxxx,stream=oper,date=20201102,time=0000,domain=g}{type=an,levtype=sfc}" +try test "$exp" = "$out" + +# test cf depth=1 +out=$(fdb-list type=an --minimum-keys="" --porcelain --depth=1) +exp="{class=rd,expver=xxxx,stream=oper,date=20201102,time=0000,domain=g}" +try test "$exp" = "$out" + +# test cf depth=2 +out=$(fdb-list type=an --minimum-keys="" --porcelain --depth=2) +exp="{class=rd,expver=xxxx,stream=oper,date=20201102,time=0000,domain=g}{type=an,levtype=sfc}" +try test "$exp" = "$out" + +# test cf depth=3 +out=$(fdb-list type=an --minimum-keys="" --porcelain --depth=3) +exp="{class=rd,expver=xxxx,stream=oper,date=20201102,time=0000,domain=g}{type=an,levtype=sfc}{step=0,param=166} +{class=rd,expver=xxxx,stream=oper,date=20201102,time=0000,domain=g}{type=an,levtype=sfc}{step=12,param=166} +{class=rd,expver=xxxx,stream=oper,date=20201102,time=0000,domain=g}{type=an,levtype=sfc}{step=6,param=166} +{class=rd,expver=xxxx,stream=oper,date=20201102,time=0000,domain=g}{type=an,levtype=sfc}{step=9,param=166}" +try test "$exp" = "$out" diff --git a/tests/fdb/tools/list/local.yaml b/tests/fdb/tools/list/local.yaml new file mode 100644 index 000000000..8e43d2e8f --- /dev/null +++ b/tests/fdb/tools/list/local.yaml @@ -0,0 +1,6 @@ +--- +type: local +engine: toc +spaces: + - roots: + - path: ./localroot diff --git a/tests/fdb/tools/list/x.grib b/tests/fdb/tools/list/x.grib new file mode 120000 index 000000000..635e33b3e --- /dev/null +++ b/tests/fdb/tools/list/x.grib @@ -0,0 +1 @@ +../../../regressions/FDB-307/x.grib \ No newline at end of file diff --git a/tests/fdb/type/test_toKey.cc b/tests/fdb/type/test_toKey.cc index 22e255f61..bf2e3b9aa 100644 --- a/tests/fdb/type/test_toKey.cc +++ b/tests/fdb/type/test_toKey.cc @@ -13,154 +13,164 @@ #include "eckit/testing/Test.h" #include "fdb5/config/Config.h" -#include "fdb5/database/Archiver.h" #include "fdb5/database/ArchiveVisitor.h" - +#include "fdb5/database/Archiver.h" +#include "fdb5/database/Key.h" #include "fdb5/rules/Rule.h" using namespace eckit::testing; using namespace eckit; - -namespace fdb { -namespace test { +namespace fdb::test { fdb5::Config config; + char data[4]; -CASE( "ClimateDaily - no expansion" ) { +CASE("ClimateDaily - no expansion") { - fdb5::Key key{}; - EXPECT(key.valuesToString() == ""); - EXPECT_THROWS(key.canonicalValue("date")); + fdb5::Key key; + EXPECT_EQUAL(key.valuesToString(), ""); + EXPECT_THROWS(key.value("date")); key.set("date", "20210427"); - EXPECT_NO_THROW(key.canonicalValue("date")); - EXPECT(key.canonicalValue("date") == "20210427"); - EXPECT(key.valuesToString() == "20210427"); + EXPECT_NO_THROW(key.value("date")); + EXPECT_EQUAL(key.value("date"), "20210427"); + EXPECT_EQUAL(key.valuesToString(), "20210427"); key.set("stream", "dacl"); - EXPECT(key.canonicalValue("date") == "20210427"); - EXPECT(key.valuesToString() == "20210427:dacl"); - + EXPECT_EQUAL(key.value("date"), "20210427"); + EXPECT_EQUAL(key.valuesToString(), "20210427:dacl"); } -CASE( "Step & ClimateDaily - expansion" ) { +CASE("Step & ClimateDaily - expansion") { + fdb5::Key key; - fdb5::TypedKey key(config.schema().registry()); - EXPECT(key.valuesToString() == ""); - EXPECT_THROWS(key.canonicalValue("date")); + { + fdb5::TypedKey tKey(config.schema().registry()); + EXPECT_EQUAL(tKey.canonical().valuesToString(), ""); + EXPECT_THROWS(tKey["date"]); - key.set("date", "20210427"); - EXPECT_NO_THROW(key.canonicalValue("date")); - EXPECT(key.canonicalValue("date") == "20210427"); - EXPECT(key.valuesToString() == "20210427"); + tKey.set("date", "20210427"); + EXPECT_NO_THROW(key = tKey.canonical()); + EXPECT_EQUAL(key["date"], "20210427"); + EXPECT_EQUAL(key.valuesToString(), "20210427"); - key.set("stream", "dacl"); - EXPECT(key.canonicalValue("date") == "20210427"); - EXPECT(key.valuesToString() == "20210427:dacl"); + tKey.set("stream", "dacl"); + EXPECT_NO_THROW(key = tKey.canonical()); + EXPECT_EQUAL(key["date"], "20210427"); + EXPECT_EQUAL(key.valuesToString(), "20210427:dacl"); - key.set("time", "12:aa"); - EXPECT_THROWS(key.canonicalValue("time")); + tKey.set("time", "12:aa"); + EXPECT_THROWS(tKey.canonical()); - key.set("time", "12am"); - EXPECT_THROWS(key.canonicalValue("time")); + tKey.set("time", "12am"); + EXPECT_THROWS(tKey.canonical()); - key.set("time", "123"); - EXPECT(key.canonicalValue("time") == "0123"); + tKey.set("time", "123"); + EXPECT_EQUAL(tKey.canonical()["time"], "0123"); - key.set("time", "1:23"); - EXPECT(key.canonicalValue("time") == "0123"); + tKey.set("time", "1:23"); + EXPECT_EQUAL(tKey.canonical()["time"], "0123"); - key.set("time", "01::23::45"); - EXPECT_THROWS(key.canonicalValue("time")); + tKey.set("time", "01::23::45"); + EXPECT_THROWS(tKey.canonical()); - key.set("time", ":01:23:45:"); - EXPECT_THROWS(key.canonicalValue("time")); + tKey.set("time", ":01:23:45:"); + EXPECT_THROWS(tKey.canonical()); - key.set("time", "12:99"); - EXPECT_THROWS(key.canonicalValue("time")); + tKey.set("time", "12:99"); + EXPECT_THROWS(tKey.canonical()); - key.set("time", "7700"); - EXPECT_THROWS(key.canonicalValue("time")); + tKey.set("time", "7700"); + EXPECT_THROWS(tKey.canonical()); - key.set("time", "01:23:45:67"); - EXPECT_THROWS(key.canonicalValue("time")); + tKey.set("time", "01:23:45:67"); + EXPECT_THROWS(tKey.canonical()); - key.set("time", "12"); - EXPECT(key.canonicalValue("time") == "1200"); + tKey.set("time", "12"); + EXPECT_EQUAL(tKey.canonical()["time"], "1200"); - key.set("time", "6"); - EXPECT(key.canonicalValue("time") == "0600"); + tKey.set("time", "6"); + EXPECT_EQUAL(tKey.canonical()["time"], "0600"); - key.set("time", "06:21"); - EXPECT(key.canonicalValue("time") == "0621"); + tKey.set("time", "06:21"); + EXPECT_EQUAL(tKey.canonical()["time"], "0621"); - key.set("time", "00:18:00"); - EXPECT(key.canonicalValue("time") == "0018"); + tKey.set("time", "00:18:00"); + EXPECT_EQUAL(tKey.canonical()["time"], "0018"); - key.set("time", "00"); - EXPECT(key.canonicalValue("time") == "0000"); + tKey.set("time", "00"); + EXPECT_EQUAL(tKey.canonical()["time"], "0000"); - key.set("time", "0"); - EXPECT(key.canonicalValue("time") == "0000"); + tKey.set("time", "0"); + EXPECT_EQUAL(tKey.canonical()["time"], "0000"); - key.set("time", "00:00"); - EXPECT(key.canonicalValue("time") == "0000"); + tKey.set("time", "00:00"); + EXPECT_EQUAL(tKey.canonical()["time"], "0000"); - key.set("time", "00:00:00"); - EXPECT(key.canonicalValue("time") == "0000"); + tKey.set("time", "00:00:00"); + EXPECT_NO_THROW(key = tKey.canonical()); + EXPECT_EQUAL(key["time"], "0000"); + EXPECT_EQUAL(key.valuesToString(), "20210427:dacl:0000"); - EXPECT(key.valuesToString() == "20210427:dacl:0000"); + tKey.set("class", "ei"); + tKey.set("expver", "7799"); + tKey.set("domain", "g"); + tKey.set("type", "pb"); + tKey.set("levtype", "pl"); + tKey.set("step", "02-12"); + tKey.set("quantile", "99:100"); + tKey.set("levelist", "50"); + tKey.set("param", "129.128"); - key.set("class", "ei"); - key.set("expver", "7799"); - key.set("domain", "g"); - key.set("type", "pb"); - key.set("levtype", "pl"); - key.set("step", "02-12"); - key.set("quantile", "99:100"); - key.set("levelist", "50"); - key.set("param", "129.128"); + EXPECT_NO_THROW(key = tKey.canonical()); + EXPECT_EQUAL(key.valuesToString(), "20210427:dacl:0000:ei:7799:g:pb:pl:2-12:99:100:50:129.128"); + } fdb5::Config conf = config.expandConfig(); - fdb5::Archiver archiver(conf); - fdb5::ArchiveVisitor visitor(archiver, key.canonical(), data, 4); - conf.schema().expand(key.canonical(), visitor); - key.registry(visitor.rule()->registry()); + fdb5::Archiver archiver(conf); + fdb5::ArchiveVisitor visitor(archiver, key, data, 4); + config.schema().expand(key, visitor); - EXPECT(key.canonicalValue("date") == "0427"); - EXPECT(key.canonicalValue("time") == "0000"); + fdb5::TypedKey tKey(visitor.rule()->registry()); + tKey.pushFrom(key); - EXPECT(key.valuesToString() == "0427:dacl:0000:ei:7799:g:pb:pl:2-12:99:100:50:129.128"); + EXPECT_NO_THROW(key = tKey.canonical()); + EXPECT_EQUAL(key["date"], "0427"); + EXPECT_EQUAL(key["time"], "0000"); + EXPECT_EQUAL(key["step"], "2-12"); + EXPECT_EQUAL(key.valuesToString(), "0427:dacl:0000:ei:7799:g:pb:pl:2-12:99:100:50:129.128"); - key.set("step", "00"); - EXPECT(key.canonicalValue("step") == "0"); + tKey.set("step", "00"); + EXPECT_EQUAL(tKey.canonical()["step"], "0"); - key.set("step", "1"); - EXPECT(key.canonicalValue("step") == "1"); + tKey.set("step", "1"); + EXPECT_EQUAL(tKey.canonical()["step"], "1"); - key.set("step", "0-1"); - EXPECT(key.canonicalValue("step") == "0-1"); + tKey.set("step", "0-1"); + EXPECT_EQUAL(tKey.canonical()["step"], "0-1"); - key.set("step", "30m"); - EXPECT(key.canonicalValue("step") == "30m"); + tKey.set("step", "30m"); + EXPECT_EQUAL(tKey.canonical()["step"], "30m"); - key.set("step", "60m"); - EXPECT(key.canonicalValue("step") == "1"); + tKey.set("step", "60m"); + EXPECT_EQUAL(tKey.canonical()["step"], "1"); - key.set("step", "30m-60m"); - EXPECT(key.canonicalValue("step") == "30m-1"); + tKey.set("step", "30m-60m"); + EXPECT_EQUAL(tKey.canonical()["step"], "30m-1"); - key.set("step", "30m-1"); - EXPECT(key.canonicalValue("step") == "30m-1"); + tKey.set("step", "30m-1"); + EXPECT_EQUAL(tKey.canonical()["step"], "30m-1"); - key.set("step", "60m-120m"); - EXPECT(key.canonicalValue("step") == "1-2"); + tKey.set("step", "60m-120m"); + EXPECT_EQUAL(tKey.canonical()["step"], "1-2"); } +CASE("Levelist") { + + fdb5::Key key; -CASE( "Levelist" ) { eckit::DenseSet values; values.insert("100"); values.insert("200"); @@ -170,126 +180,164 @@ CASE( "Levelist" ) { values.insert("0.333333"); values.sort(); - fdb5::TypedKey key(config.schema().registry()); - EXPECT(key.valuesToString() == ""); - EXPECT_THROWS(key.canonicalValue("levelist")); - - key.set("levelist", "925"); - EXPECT_NO_THROW(key.canonicalValue("levelist")); - EXPECT(key.canonicalValue("levelist") == "925"); - EXPECT(key.match("levelist", values)); - - key.set("levelist", "200.0"); - EXPECT(key.canonicalValue("levelist") == "200"); - EXPECT(key.match("levelist", values)); - - key.set("levelist", "200.0000000"); - EXPECT(key.canonicalValue("levelist") == "200"); - EXPECT(key.match("levelist", values)); - - key.set("levelist", "200.1"); - EXPECT(key.canonicalValue("levelist") == "200.1"); - EXPECT(!key.match("levelist", values)); - - key.set("levelist", "300"); - EXPECT(key.canonicalValue("levelist") == "300"); - EXPECT(!key.match("levelist", values)); - - key.set("levelist", "0.7"); - EXPECT(key.canonicalValue("levelist") == "0.7"); - EXPECT(key.match("levelist", values)); - - key.set("levelist", "0.7000"); - EXPECT(key.canonicalValue("levelist") == "0.7"); - EXPECT(key.match("levelist", values)); - - key.set("levelist", "0.5"); - EXPECT(key.canonicalValue("levelist") == "0.5"); - EXPECT(!key.match("levelist", values)); - - key.set("levelist", "0.333"); - EXPECT(key.canonicalValue("levelist") == "0.333"); - EXPECT(!key.match("levelist", values)); - - key.set("levelist", "0.333333"); - EXPECT(key.canonicalValue("levelist") == "0.333333"); - EXPECT(key.match("levelist", values)); + fdb5::TypedKey tKey(config.schema().registry()); + + EXPECT_NO_THROW(key = tKey.canonical()); + EXPECT_EQUAL(key.valuesToString(), ""); + EXPECT_THROWS(key["levelist"]); + + tKey.set("levelist", "925"); + EXPECT_NO_THROW(key = tKey.canonical()); + EXPECT_NO_THROW(key["levelist"]); + EXPECT_EQUAL(key["levelist"], "925"); + EXPECT(key.matchValues("levelist", values)); + + tKey.set("levelist", "200.0"); + EXPECT_NO_THROW(key = tKey.canonical()); + EXPECT_EQUAL(key["levelist"], "200"); + EXPECT(key.matchValues("levelist", values)); + + tKey.set("levelist", "200.0000000"); + EXPECT_NO_THROW(key = tKey.canonical()); + EXPECT_EQUAL(key["levelist"], "200"); + EXPECT(key.matchValues("levelist", values)); + + tKey.set("levelist", "200.1"); + EXPECT_NO_THROW(key = tKey.canonical()); + EXPECT_EQUAL(key["levelist"], "200.1"); + EXPECT(!key.matchValues("levelist", values)); + + tKey.set("levelist", "300"); + EXPECT_NO_THROW(key = tKey.canonical()); + EXPECT_EQUAL(key["levelist"], "300"); + EXPECT(!key.matchValues("levelist", values)); + + tKey.set("levelist", "0.7"); + EXPECT_NO_THROW(key = tKey.canonical()); + EXPECT_EQUAL(key["levelist"], "0.7"); + EXPECT(key.matchValues("levelist", values)); + + tKey.set("levelist", "0.7000"); + EXPECT_NO_THROW(key = tKey.canonical()); + EXPECT_EQUAL(key["levelist"], "0.7"); + EXPECT(key.matchValues("levelist", values)); + + tKey.set("levelist", "0.5"); + EXPECT_NO_THROW(key = tKey.canonical()); + EXPECT_EQUAL(key["levelist"], "0.5"); + EXPECT(!key.matchValues("levelist", values)); + + tKey.set("levelist", "0.333"); + EXPECT_NO_THROW(key = tKey.canonical()); + EXPECT_EQUAL(key["levelist"], "0.333"); + EXPECT(!key.matchValues("levelist", values)); + + tKey.set("levelist", "0.333333"); + EXPECT_NO_THROW(key = tKey.canonical()); + EXPECT_EQUAL(key["levelist"], "0.333333"); + EXPECT(key.matchValues("levelist", values)); // this works (probably shouldn't), simply becasue to_string uses the same precision as printf %f (default 6) /// @note don't use to_string when canonicalising Keys - key.set("levelist", std::to_string(double(1./3.))); - EXPECT(key.canonicalValue("levelist") == "0.333333"); - EXPECT(key.match("levelist", values)); + tKey.set("levelist", std::to_string(double(1. / 3.))); + EXPECT_NO_THROW(key = tKey.canonical()); + EXPECT_EQUAL(key["levelist"], "0.333333"); + EXPECT(key.matchValues("levelist", values)); } -CASE( "Expver, Time & ClimateDaily - string ctor - expansion" ) { - - fdb5::TypedKey key = fdb5::TypedKey::parseString( - "class=ei,expver=1,stream=dacl,domain=g,type=pb,levtype=pl,date=20210427,time=6,step=0,quantile=99:100,levelist=50,param=129.128", - config.schema().registry()); - - EXPECT(key.canonicalValue("date") == "20210427"); - EXPECT(key.canonicalValue("time") == "0600"); - EXPECT(key.valuesToString() == "ei:0001:dacl:g:pb:pl:20210427:0600:0:99:100:50:129.128"); - - fdb5::Archiver archiver; - fdb5::ArchiveVisitor visitor(archiver, key.canonical(), data, 4); - config.schema().expand(key.canonical(), visitor); - key.registry(visitor.rule()->registry()); - - EXPECT(key.canonicalValue("date") == "0427"); - EXPECT(key.valuesToString() == "ei:0001:dacl:g:pb:pl:0427:0600:0:99:100:50:129.128"); +CASE("Expver, Time & ClimateDaily - string ctor - expansion") { + + fdb5::Key key; + + { + auto parsed = fdb5::Key::parse( + "class=ei,expver=1,stream=dacl,domain=g,type=pb,levtype=pl,date=" "20210427,time=6,step=0,quantile=99:100," "levelist=50,param=129.128"); + fdb5::TypedKey tKey(config.schema().registry()); + tKey.pushFrom(parsed); + key = tKey.tidy(); + } + + EXPECT_EQUAL(key["date"], "20210427"); + EXPECT_EQUAL(key["time"], "0600"); + EXPECT_EQUAL(key.valuesToString(), "ei:0001:dacl:g:pb:pl:20210427:0600:0:99:100:50:129.128"); + + { + fdb5::Archiver archiver; + fdb5::ArchiveVisitor visitor(archiver, key, data, 4); + config.schema().expand(key, visitor); + fdb5::TypedKey tKey(visitor.rule()->registry()); + tKey.pushFrom(key); + EXPECT_NO_THROW(key = tKey.canonical()); + } + + EXPECT_EQUAL(key["date"], "0427"); + EXPECT_EQUAL(key.valuesToString(), "ei:0001:dacl:g:pb:pl:0427:0600:0:99:100:50:129.128"); } -CASE( "ClimateMonthly - string ctor - expansion" ) { +CASE("ClimateMonthly - string ctor - expansion") { - fdb5::TypedKey key = fdb5::TypedKey::parseString( - "class=op,expver=1,stream=mnth,domain=g,type=cl,levtype=pl,date=20210427,time=0000,levelist=50,param=129.128", - config.schema().registry()); + fdb5::Key key; - EXPECT(key.canonicalValue("date") == "20210427"); - EXPECT(key.valuesToString() == "op:0001:mnth:g:cl:pl:20210427:0000:50:129.128"); + { + auto parsed = fdb5::Key::parse("class=op,expver=1,stream=mnth,domain=g,type=cl,levtype=pl,date=20210427,time=" "0000,levelist=50,param=129.128"); + fdb5::TypedKey tKey(config.schema().registry()); + tKey.pushFrom(parsed); + key = tKey.tidy(); + } - fdb5::Archiver archiver; - fdb5::ArchiveVisitor visitor(archiver, key.canonical(), data, 4); - config.schema().expand(key.canonical(), visitor); - key.registry(visitor.rule()->registry()); + EXPECT_EQUAL(key["date"], "20210427"); + EXPECT_EQUAL(key.valuesToString(), "op:0001:mnth:g:cl:pl:20210427:0000:50:129.128"); - EXPECT(key.canonicalValue("date") == "4"); - EXPECT(key.valuesToString() == "op:0001:mnth:g:cl:pl:4:0000:50:129.128"); + { + fdb5::Archiver archiver; + fdb5::ArchiveVisitor visitor(archiver, key, data, 4); + config.schema().expand(key, visitor); + fdb5::TypedKey tKey(visitor.rule()->registry()); + tKey.pushFrom(key); + EXPECT_NO_THROW(key = tKey.canonical()); + } + EXPECT_EQUAL(key["date"], "4"); + EXPECT_EQUAL(key.valuesToString(), "op:0001:mnth:g:cl:pl:4:0000:50:129.128"); } -// do we need to keep this behaviour? should we rely on metkit for date expansion and remove it from TypedKey? -CASE( "Date - string ctor - expansion" ) { +// do we need to keep this behaviour? should we rely on metkit for date expansion and remove it from Key? +CASE("Date - string ctor - expansion") { + + fdb5::Key key; - fdb5::TypedKey key = fdb5::TypedKey::parseString( - "class=od,expver=1,stream=oper,type=ofb,date=-2,time=0000,obsgroup=MHS,reportype=3001", - config.schema().registry()); + { + auto parsed = + fdb5::Key::parse("class=od,expver=1,stream=oper,type=ofb,date=-2,time=0000,obsgroup=MHS,reportype=3001"); + fdb5::TypedKey tKey(config.schema().registry()); + tKey.pushFrom(parsed); + key = tKey.tidy(); + } eckit::Date now(-2); - eckit::Translator t; - EXPECT(key.canonicalValue("date") == t(now.yyyymmdd())); - EXPECT(key.valuesToString() == "od:0001:oper:ofb:"+t(now.yyyymmdd())+":0000:mhs:3001"); + eckit::Translator t; - fdb5::Archiver archiver; - fdb5::ArchiveVisitor visitor(archiver, key.canonical(), data, 4); - config.schema().expand(key.canonical(), visitor); - key.registry(visitor.rule()->registry()); + EXPECT_EQUAL(key["date"], t(now.yyyymmdd())); + EXPECT_EQUAL(key.valuesToString(), "od:0001:oper:ofb:" + t(now.yyyymmdd()) + ":0000:mhs:3001"); - EXPECT(key.canonicalValue("date") == t(now.yyyymmdd())); - EXPECT(key.valuesToString() == "od:0001:oper:ofb:"+t(now.yyyymmdd())+":0000:mhs:3001"); + { + fdb5::Archiver archiver; + fdb5::ArchiveVisitor visitor(archiver, key, data, 4); + config.schema().expand(key, visitor); + fdb5::TypedKey tKey(visitor.rule()->registry()); + tKey.pushFrom(key); + EXPECT_NO_THROW(key = tKey.canonical()); + } + EXPECT_EQUAL(key["date"], t(now.yyyymmdd())); + EXPECT_EQUAL(key.valuesToString(), "od:0001:oper:ofb:" + t(now.yyyymmdd()) + ":0000:mhs:3001"); } - //---------------------------------------------------------------------------------------------------------------------- -} // namespace test -} // namespace fdb +} // namespace fdb::test -int main(int argc, char **argv) -{ - return run_tests ( argc, argv ); +int main(int argc, char** argv) { + return run_tests(argc, argv); } diff --git a/tests/regressions/FDB-238/FDB-238.sh.in b/tests/regressions/FDB-238/FDB-238.sh.in index 26367d97a..eefbab240 100755 --- a/tests/regressions/FDB-238/FDB-238.sh.in +++ b/tests/regressions/FDB-238/FDB-238.sh.in @@ -70,49 +70,8 @@ cat > content.6 < content.6dup < content.12 < content.12dup < 6dup.grib -cmp checkV2.dup.grib 6dup.grib +cmp checkV2.dup.grib 6.grib unset FDB_DEDUPLICATE_FIELDS diff --git a/tests/regressions/FDB-303/FDB-303.sh.in b/tests/regressions/FDB-303/FDB-303.sh.in index ac02b65d2..80eb726d8 100755 --- a/tests/regressions/FDB-303/FDB-303.sh.in +++ b/tests/regressions/FDB-303/FDB-303.sh.in @@ -76,11 +76,11 @@ cmp out list $fdblist class=od,expver=0001,stream=wamo,domain=g,type=cl,levtype=sfc,date=1,time=12 --porcelain | tee out cmp out list12 -$fdblist class=od,expver=0001,stream=wamo,domain=g,type=cl,levtype=sfc,date=jan,time=12 --porcelain | tee out -cmp out list12 +# $fdblist class=od,expver=0001,stream=wamo,domain=g,type=cl,levtype=sfc,date=jan,time=12 --porcelain | tee out +# cmp out list12 -$fdblist class=od,expver=0001,stream=wamo,domain=g,type=cl,levtype=sfc,date=20240102 --porcelain | tee out -cmp out list +# $fdblist class=od,expver=0001,stream=wamo,domain=g,type=cl,levtype=sfc,date=20240102 --porcelain | tee out +# cmp out list cat > read <