Skip to content

Commit

Permalink
Add overflow behavior handling in binning nodes (#37)
Browse files Browse the repository at this point in the history
* Add overflow behavior handling in binning nodes

Closes #5

* Add binning tests

And fix a bug! std::lower_bound gives edges[i] < x <= edges[i+1]
std::upper_bound gives edges[i] <= x < edges[i+1] which is what we specified

* Update rendered schemas

Need to add this to pre-commit or something
  • Loading branch information
nsmith- authored Feb 16, 2021
1 parent ed40f31 commit 26df0d5
Show file tree
Hide file tree
Showing 7 changed files with 281 additions and 17 deletions.
5 changes: 5 additions & 0 deletions data/conversion.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,7 @@ def build_discrbinning(sf):
build_formula(sf[(sf["discrMin"] >= lo) & (sf["discrMax"] <= hi)])
for lo, hi in zip(edges[:-1], edges[1:])
],
"flow": "clamp",
}
)

Expand All @@ -66,6 +67,7 @@ def build_ptbinning(sf):
build_discrbinning(sf[(sf["ptMin"] >= lo) & (sf["ptMax"] <= hi)])
for lo, hi in zip(edges[:-1], edges[1:])
],
"flow": "clamp",
}
)

Expand All @@ -81,6 +83,7 @@ def build_etabinning(sf):
build_ptbinning(sf[(sf["etaMin"] >= lo) & (sf["etaMax"] <= hi)])
for lo, hi in zip(edges[:-1], edges[1:])
],
"flow": "error",
}
)

Expand Down Expand Up @@ -183,6 +186,7 @@ def build_pts(sf):
"input": "pt",
"edges": edges,
"content": content,
"flow": "clamp",
}
)

Expand All @@ -207,6 +211,7 @@ def build_etas(sf):
"input": "eta",
"edges": edges,
"content": content,
"flow": "error",
}
)

Expand Down
85 changes: 83 additions & 2 deletions data/schemav2.json
Original file line number Diff line number Diff line change
Expand Up @@ -149,6 +149,10 @@
"const": "category",
"type": "string"
},
"input": {
"title": "Input",
"type": "string"
},
"content": {
"title": "Content",
"type": "array",
Expand All @@ -159,6 +163,7 @@
},
"required": [
"nodetype",
"input",
"content"
],
"additionalProperties": false
Expand All @@ -173,6 +178,13 @@
"const": "multibinning",
"type": "string"
},
"inputs": {
"title": "Inputs",
"type": "array",
"items": {
"type": "string"
}
},
"edges": {
"title": "Edges",
"type": "array",
Expand Down Expand Up @@ -205,12 +217,47 @@
}
]
}
},
"flow": {
"title": "Flow",
"anyOf": [
{
"$ref": "#/definitions/Binning"
},
{
"$ref": "#/definitions/MultiBinning"
},
{
"$ref": "#/definitions/Category"
},
{
"$ref": "#/definitions/Formula"
},
{
"type": "number"
},
{
"title": "Flow Literal['Clamp', 'Error']",
"anyOf": [
{
"const": "clamp",
"type": "string"
},
{
"const": "error",
"type": "string"
}
]
}
]
}
},
"required": [
"nodetype",
"inputs",
"edges",
"content"
"content",
"flow"
],
"additionalProperties": false
},
Expand Down Expand Up @@ -256,13 +303,47 @@
}
]
}
},
"flow": {
"title": "Flow",
"anyOf": [
{
"$ref": "#/definitions/Binning"
},
{
"$ref": "#/definitions/MultiBinning"
},
{
"$ref": "#/definitions/Category"
},
{
"$ref": "#/definitions/Formula"
},
{
"type": "number"
},
{
"title": "Flow Literal['Clamp', 'Error']",
"anyOf": [
{
"const": "clamp",
"type": "string"
},
{
"const": "error",
"type": "string"
}
]
}
]
}
},
"required": [
"nodetype",
"input",
"edges",
"content"
"content",
"flow"
],
"additionalProperties": false
},
Expand Down
7 changes: 7 additions & 0 deletions include/correction.h
Original file line number Diff line number Diff line change
Expand Up @@ -85,6 +85,9 @@ class Formula {
double eval_ast(const Ast& ast, const std::vector<double>& variables) const;
};

// common internal for Binning and MultiBinning
enum class _FlowBehavior {value, clamp, error};

class Binning {
public:
Binning(const rapidjson::Value& json, const std::vector<Variable>& inputs);
Expand All @@ -93,6 +96,8 @@ class Binning {
private:
std::vector<std::tuple<double, Content>> bins_;
size_t variableIdx_;
_FlowBehavior flow_;
std::unique_ptr<const Content> default_value_;
};

class MultiBinning {
Expand All @@ -105,6 +110,8 @@ class MultiBinning {
// variableIdx, stride, edges
std::vector<std::tuple<size_t, size_t, std::vector<double>>> axes_;
std::vector<Content> content_;
_FlowBehavior flow_;
std::unique_ptr<const Content> default_value_;
};

class Category {
Expand Down
67 changes: 59 additions & 8 deletions src/correction.cc
Original file line number Diff line number Diff line change
Expand Up @@ -325,23 +325,48 @@ Binning::Binning(const rapidjson::Value& json, const std::vector<Variable>& inpu
throw std::runtime_error("Inconsistency in Binning: number of content nodes does not match binning");
}
bins_.reserve(edges.size());
// first bin is a dummy content node (represets lower_bound returning underflow)
// TODO: good spot to put overflow default behavior
// first bin is a dummy content node (represents upper_bound returning underflow)
bins_.push_back({*edges.begin(), 0.});
for (size_t i=0; i < content.Size(); ++i) {
bins_.push_back({edges[i + 1], resolve_content(content[i], inputs)});
}
variableIdx_ = find_variable_index(json["input"], inputs);
if ( json["flow"] == "clamp" ) {
flow_ = _FlowBehavior::clamp;
}
else if ( json["flow"] == "error" ) {
flow_ = _FlowBehavior::error;
}
else { // Content node
flow_ = _FlowBehavior::value;
default_value_ = std::make_unique<const Content>(resolve_content(json["flow"], inputs));
}
}

const Content& Binning::child(const std::vector<Variable::Type>& values) const {
double value = std::get<double>(values[variableIdx_]);
auto it = std::lower_bound(std::begin(bins_), std::end(bins_), value, [](const auto& a, auto b) { return std::get<0>(a) < b; });
auto it = std::upper_bound(std::begin(bins_), std::end(bins_), value, [](const double& a, const auto& b) { return a < std::get<0>(b); });
if ( it == std::begin(bins_) ) {
throw std::runtime_error("Index below bounds in Binning for input " + std::to_string(variableIdx_) + " value: " + std::to_string(value));
if ( flow_ == _FlowBehavior::value ) {
return *default_value_;
}
else if ( flow_ == _FlowBehavior::error ) {
throw std::runtime_error("Index below bounds in Binning for input " + std::to_string(variableIdx_) + " value: " + std::to_string(value));
}
else { // clamp
it++;
}
}
else if ( it == std::end(bins_) ) {
throw std::runtime_error("Index above bounds in Binning for input " + std::to_string(variableIdx_) + " value: " + std::to_string(value));
if ( flow_ == _FlowBehavior::value ) {
return *default_value_;
}
else if ( flow_ == _FlowBehavior::error ) {
throw std::runtime_error("Index above bounds in Binning for input " + std::to_string(variableIdx_) + " value: " + std::to_string(value));
}
else { // clamp
it--;
}
}
return std::get<1>(*it);
}
Expand Down Expand Up @@ -373,18 +398,44 @@ MultiBinning::MultiBinning(const rapidjson::Value& json, const std::vector<Varia
if ( content_.size() != stride ) {
throw std::runtime_error("Inconsistency in MultiBinning: number of content nodes does not match binning");
}
if ( json["flow"] == "clamp" ) {
flow_ = _FlowBehavior::clamp;
}
else if ( json["flow"] == "error" ) {
flow_ = _FlowBehavior::error;
}
else { // Content node
flow_ = _FlowBehavior::value;
default_value_ = std::make_unique<const Content>(resolve_content(json["flow"], inputs));
}
}

const Content& MultiBinning::child(const std::vector<Variable::Type>& values) const {
size_t idx {0};
for (const auto& [variableIdx, stride, edges] : axes_) {
double value = std::get<double>(values[variableIdx]);
auto it = std::lower_bound(std::begin(edges), std::end(edges), value);
auto it = std::upper_bound(std::begin(edges), std::end(edges), value);
if ( it == std::begin(edges) ) {
throw std::runtime_error("Index below bounds in MultiBinning for input " + std::to_string(variableIdx) + " val: " + std::to_string(value));
if ( flow_ == _FlowBehavior::value ) {
return *default_value_;
}
else if ( flow_ == _FlowBehavior::error ) {
throw std::runtime_error("Index below bounds in MultiBinning for input " + std::to_string(variableIdx) + " val: " + std::to_string(value));
}
else { // clamp
it++;
}
}
else if ( it == std::end(edges) ) {
throw std::runtime_error("Index above bounds in MultiBinning input " + std::to_string(variableIdx) + " val: " + std::to_string(value));
if ( flow_ == _FlowBehavior::value ) {
return *default_value_;
}
else if ( flow_ == _FlowBehavior::error ) {
throw std::runtime_error("Index above bounds in MultiBinning input " + std::to_string(variableIdx) + " val: " + std::to_string(value));
}
else { // clamp
it--;
}
}
size_t localidx = std::distance(std::begin(edges), it) - 1;
idx += localidx * stride;
Expand Down
2 changes: 2 additions & 0 deletions src/correctionlib/convert.py
Original file line number Diff line number Diff line change
Expand Up @@ -90,6 +90,7 @@ def build_data(
else build_data(value, axes[i:], variables[i:])
for value in flatten_to(values, i - 1)
],
"flow": "error", # TODO: can also produce overflow guard bins and clamp
}
)
return Binning.parse_obj(
Expand All @@ -103,6 +104,7 @@ def build_data(
else build_data(value, axes[1:], variables[1:])
for value in values
],
"flow": "error", # TODO: can also produce overflow guard bins and clamp
}
)

Expand Down
4 changes: 4 additions & 0 deletions src/correctionlib/schemav2.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,8 @@ class Binning(Model):
edges: List[float]
"Edges of the binning, where edges[i] <= x < edges[i+1] => f(x, ...) = content[i](...)"
content: List[Content]
flow: Union[Content, Literal["clamp", "error"]]
"Overflow behavior for out-of-bounds values"


class MultiBinning(Model):
Expand All @@ -56,6 +58,8 @@ class MultiBinning(Model):
to the element at i0 in dimension 0, i1 in dimension 1, etc. and d0 = len(edges[0]), etc.
"""
content: List[Content]
flow: Union[Content, Literal["clamp", "error"]]
"Overflow behavior for out-of-bounds values"


class CategoryItem(Model):
Expand Down
Loading

0 comments on commit 26df0d5

Please sign in to comment.