From f90fa2b8b77da476084cc831030ae5975d382ac5 Mon Sep 17 00:00:00 2001 From: Enrico Guiraud Date: Tue, 26 Sep 2023 16:28:12 -0600 Subject: [PATCH 1/2] Make Binning and Multibinning logic more uniform With this patch, also in Binning we store the default value at the end of the array of values, not at the beginning. This makes it possible to factor out common logic between Binning and MultiBinning (in the next commit). --- src/correction.cc | 19 ++++++++++--------- 1 file changed, 10 insertions(+), 9 deletions(-) diff --git a/src/correction.cc b/src/correction.cc index f5a379da..db22a9f3 100644 --- a/src/correction.cc +++ b/src/correction.cc @@ -368,9 +368,9 @@ Binning::Binning(const JSONObject& json, const Correction& context) } // set bin contents - contents_.push_back(std::move(default_value)); for (size_t i=0; i < content.Size(); ++i) contents_.push_back(resolve_content(content[i], context)); + contents_.push_back(std::move(default_value)); } const Content& Binning::child(const std::vector& values) const { @@ -381,7 +381,7 @@ const Content& Binning::child(const std::vector& values) const { if (value < bins->low || value >= bins->high) { switch (flow_) { case _FlowBehavior::value: - return contents_[0u]; // the default value + return contents_.back(); // the default value case _FlowBehavior::clamp: binIdx = value < bins->low ? 0 : bins->n - 1; // assuming we always have at least 1 bin break; @@ -392,16 +392,16 @@ const Content& Binning::child(const std::vector& values) const { } } - return contents_[binIdx + 1u]; // skipping the default value at index 0 + return contents_[binIdx]; } // otherwise we have non-uniform binning const auto bins = std::get<_NonUniformBins>(bins_); auto it = std::upper_bound(std::begin(bins), std::end(bins), value); - if ( it == std::begin(bins) ) { + if ( it == std::begin(bins) ) { // underflow if ( flow_ == _FlowBehavior::value ) { - // default value already at std::begin + return contents_.back(); // the default value } else if ( flow_ == _FlowBehavior::error ) { throw std::runtime_error("Index below bounds in Binning for input argument " + std::to_string(variableIdx_) + " value: " + std::to_string(value)); @@ -410,9 +410,9 @@ const Content& Binning::child(const std::vector& values) const { it++; } } - else if ( it == std::end(bins) ) { + else if ( it == std::end(bins) ) { // overflow if ( flow_ == _FlowBehavior::value ) { - it = std::begin(bins); + return contents_.back(); } else if ( flow_ == _FlowBehavior::error ) { throw std::runtime_error("Index above bounds in Binning for input argument " + std::to_string(variableIdx_) + " value: " + std::to_string(value)); @@ -422,7 +422,9 @@ const Content& Binning::child(const std::vector& values) const { } } - return contents_[std::distance(std::begin(bins), it)]; + // -1 because upper_bound returns the edge _after_ the bin we are interested in + const std::size_t binIdx = std::distance(std::begin(bins), it) - 1; + return contents_[binIdx]; } MultiBinning::MultiBinning(const JSONObject& json, const Correction& context) @@ -492,7 +494,6 @@ MultiBinning::MultiBinning(const JSONObject& json, const Correction& context) } // TODO factor out logic in common with Binning::child. -// One notable difference is that MultiBinning stores the default value at the end of content_ instead of the beginning. const Content& MultiBinning::child(const std::vector& values) const { size_t idx {0}; size_t localidx {0}; From 1d5c7332401b8a180627c8c347559310af802908 Mon Sep 17 00:00:00 2001 From: Enrico Guiraud Date: Tue, 26 Sep 2023 18:03:48 -0600 Subject: [PATCH 2/2] Factor out common logic in [Multi]Binning::child Removes a TODO, removes duplicated code. --- src/correction.cc | 160 +++++++++++++++++++--------------------------- 1 file changed, 64 insertions(+), 96 deletions(-) diff --git a/src/correction.cc b/src/correction.cc index db22a9f3..8af1afd9 100644 --- a/src/correction.cc +++ b/src/correction.cc @@ -145,7 +145,64 @@ namespace { const std::vector& values; }; -} + + std::size_t find_bin_idx(double value, + const std::variant<_UniformBins, _NonUniformBins> &bins_, + const _FlowBehavior &flow, + std::size_t variableIdx, + const char *name) + { + if ( auto *bins = std::get_if<_UniformBins>(&bins_) ) { // uniform binning + if (value < bins->low || value >= bins->high) { + switch (flow) { + case _FlowBehavior::value: + return bins->n; // the default value is stored at the end of the content array, after the last bin + case _FlowBehavior::clamp: + return value < bins->low ? 0 : bins->n - 1; // assuming we always have at least 1 bin + case _FlowBehavior::error: + const std::string belowOrAbove = value < bins->low ? "below" : "above"; + auto msg = "Index " + belowOrAbove + " bounds in " + name + " for input argument " + std::to_string(variableIdx) + " value: " + std::to_string(value); + throw std::runtime_error(std::move(msg)); + } + } + + std::size_t binIdx = bins->n * ((value - bins->low) / (bins->high - bins->low)); + return binIdx; + } + + // otherwise we have non-uniform binning + using namespace std::string_literals; + const auto bins = std::get<_NonUniformBins>(bins_); + + auto it = std::upper_bound(std::begin(bins), std::end(bins), value); + if ( it == std::begin(bins) ) { // underflow + if ( flow == _FlowBehavior::value ) { + return bins.size() - 1; // the default value is stored at the end of the content array, after the last bin + } + else if ( flow == _FlowBehavior::error ) { + throw std::runtime_error("Index below bounds in "s + name + " for input argument " + std::to_string(variableIdx) + " value: " + std::to_string(value)); + } + else { // clamp + it++; + } + } + else if ( it == std::end(bins) ) { // overflow + if ( flow == _FlowBehavior::value ) { + return bins.size() - 1; + } + else if ( flow == _FlowBehavior::error ) { + throw std::runtime_error("Index above bounds in "s + name + " for input argument " + std::to_string(variableIdx) + " value: " + std::to_string(value)); + } + else { // clamp + it--; + } + } + + // -1 because upper_bound returns the edge _after_ the bin we are interested in + const std::size_t binIdx = std::distance(std::begin(bins), it) - 1; + return binIdx; + } +} // end of anonymous namespace Variable::Variable(const JSONObject& json) : name_(json.getRequired("name")), @@ -375,55 +432,7 @@ Binning::Binning(const JSONObject& json, const Correction& context) const Content& Binning::child(const std::vector& values) const { double value = std::get(values[variableIdx_]); - - if ( auto *bins = std::get_if<_UniformBins>(&bins_) ) { // uniform binning - std::size_t binIdx = bins->n * ((value - bins->low) / (bins->high - bins->low)); - if (value < bins->low || value >= bins->high) { - switch (flow_) { - case _FlowBehavior::value: - return contents_.back(); // the default value - case _FlowBehavior::clamp: - binIdx = value < bins->low ? 0 : bins->n - 1; // assuming we always have at least 1 bin - break; - case _FlowBehavior::error: - const std::string belowOrAbove = value < bins->low ? "below" : "above"; - const auto msg = "Index " + belowOrAbove + " bounds in Binning for input argument " + std::to_string(variableIdx_) + " value: " + std::to_string(value); - throw std::runtime_error(std::move(msg)); - } - } - - return contents_[binIdx]; - } - - // otherwise we have non-uniform binning - const auto bins = std::get<_NonUniformBins>(bins_); - - auto it = std::upper_bound(std::begin(bins), std::end(bins), value); - if ( it == std::begin(bins) ) { // underflow - if ( flow_ == _FlowBehavior::value ) { - return contents_.back(); // the default value - } - else if ( flow_ == _FlowBehavior::error ) { - throw std::runtime_error("Index below bounds in Binning for input argument " + std::to_string(variableIdx_) + " value: " + std::to_string(value)); - } - else { // clamp - it++; - } - } - else if ( it == std::end(bins) ) { // overflow - if ( flow_ == _FlowBehavior::value ) { - return contents_.back(); - } - else if ( flow_ == _FlowBehavior::error ) { - throw std::runtime_error("Index above bounds in Binning for input argument " + std::to_string(variableIdx_) + " value: " + std::to_string(value)); - } - else { // clamp - it--; - } - } - - // -1 because upper_bound returns the edge _after_ the bin we are interested in - const std::size_t binIdx = std::distance(std::begin(bins), it) - 1; + std::size_t binIdx = find_bin_idx(value, bins_, flow_, variableIdx_, "Binning"); return contents_[binIdx]; } @@ -493,59 +502,18 @@ MultiBinning::MultiBinning(const JSONObject& json, const Correction& context) } } -// TODO factor out logic in common with Binning::child. const Content& MultiBinning::child(const std::vector& values) const { size_t idx {0}; size_t localidx {0}; + size_t dim {0}; for (const auto& [variableIdx, stride, edgesVariant] : axes_) { double value = std::get(values[variableIdx]); - - if ( auto *bins = std::get_if<_UniformBins>(&edgesVariant) ) { // uniform bins - std::size_t binIdx = bins->n * ((value - bins->low) / (bins->high - bins->low)); - if (value < bins->low || value >= bins->high) { - switch (flow_) { - case _FlowBehavior::value: - return content_.back(); // the default value - case _FlowBehavior::clamp: - binIdx = value < bins->low ? 0 : bins->n - 1; // assuming we always have at least 1 bin - break; - case _FlowBehavior::error: - const std::string belowOrAbove = value < bins->low ? "below" : "above"; - const auto msg = "Index " + belowOrAbove + " bounds in MultiBinning for input argument " + std::to_string(variableIdx) + " value: " + std::to_string(value); - throw std::runtime_error(std::move(msg)); - } - } - localidx = binIdx; - } else { // non-uniform bins - const auto edges = std::get<_NonUniformBins>(edgesVariant); - auto it = std::upper_bound(std::begin(edges), std::end(edges), value); - if ( it == std::begin(edges) ) { - if ( flow_ == _FlowBehavior::value ) { - return *content_.rbegin(); - } - else if ( flow_ == _FlowBehavior::error ) { - throw std::runtime_error("Index below bounds in MultiBinning for input argument " + std::to_string(variableIdx) + " val: " + std::to_string(value)); - } - else { // clamp - it++; - } - } - else if ( it == std::end(edges) ) { - if ( flow_ == _FlowBehavior::value ) { - return content_.back(); - } - else if ( flow_ == _FlowBehavior::error ) { - throw std::runtime_error("Index above bounds in MultiBinning input argument" + std::to_string(variableIdx) + " val: " + std::to_string(value)); - } - else { // clamp - it--; - } - } - localidx = std::distance(std::begin(edges), it) - 1; - } - + localidx = find_bin_idx(value, edgesVariant, flow_, variableIdx, "MultiBinning"); + if ( localidx == nbins(dim) ) // find_bin_idx is indicating we need to return the default value + return content_.back(); idx += localidx * stride; + ++dim; } return content_.at(idx);