Skip to content

Commit

Permalink
Fix a bucket mapping issue with the origin inspector latency metrics
Browse files Browse the repository at this point in the history
The exporter maps bucketed counters from the real-time stats API to
a Prometheus histogram. Since it doesn't have the actual values that
were used to build the bucketed counters it has to pick a value for
each bucket to call Observe() with.

The current code uses values that are mapping to the wrong buckets.
Here's a subset of the buckets from the origin latency histogram
definition:

0.001, 0.005, 0.010

Calling Observe() with the value of 0.005 places the value in the 1-5ms
bucket, not the 5-10ms bucket. The result of the current behavior is that
the counts for each bucket are shifted to the next smallest bucket.

This fixes the issue by using the values at the end of each bucket interval
as the parameters to Observe(). This will result in increased origin latency
values when users upgrade to a release with this commit, but the increased
numbers are more accurate.
  • Loading branch information
crivera-fastly committed Jul 25, 2024
1 parent 11aba6d commit de3ecfb
Show file tree
Hide file tree
Showing 2 changed files with 58 additions and 61 deletions.
79 changes: 38 additions & 41 deletions pkg/origin/process.go
Original file line number Diff line number Diff line change
Expand Up @@ -110,62 +110,59 @@ func process(serviceID, serviceName, datacenter, origin string, stats Stats, m *
// Latency stats are clearly from xxx_bucket{le="v"} metrics,
// but I don't see a good way to re-populate a histogram from
// those numbers. (If I'm missing something, file an issue!)
//
// Our clue is the final bucket, which says it's observations
// "of 60s and above". Based on that we use the lower bound of
// each stat as the observed value, except for the first bucket
// which we yolo as 500us because 0 doesn't really make sense??
// We use the upper bound of each bucket because the interval
// is (start, end]
for v, n := range map[float64]uint64{
60.00: stats.Latency60000plus,
10.00: stats.Latency10000to60000,
5.000: stats.Latency5000to10000,
1.000: stats.Latency1000to5000,
0.500: stats.Latency500to1000,
0.250: stats.Latency250to500,
0.100: stats.Latency100to250,
0.050: stats.Latency50to100,
0.010: stats.Latency10to50,
0.005: stats.Latency5to10,
0.001: stats.Latency1to5,
0.0005: stats.Latency0to1, // yolo
61.00: stats.Latency60000plus,
60.00: stats.Latency10000to60000,
10.00: stats.Latency5000to10000,
5.000: stats.Latency1000to5000,
1.000: stats.Latency500to1000,
0.500: stats.Latency250to500,
0.250: stats.Latency100to250,
0.100: stats.Latency50to100,
0.050: stats.Latency10to50,
0.010: stats.Latency5to10,
0.005: stats.Latency1to5,
0.001: stats.Latency0to1,
} {
for i := uint64(0); i < n; i++ {
m.LatencySeconds.WithLabelValues(serviceID, serviceName, datacenter, origin, srcDelivery).Observe(v)
}
}

for v, n := range map[float64]uint64{
60.00: stats.WafLatency60000plus,
10.00: stats.WafLatency10000to60000,
5.000: stats.WafLatency5000to10000,
1.000: stats.WafLatency1000to5000,
0.500: stats.WafLatency500to1000,
0.250: stats.WafLatency250to500,
0.100: stats.WafLatency100to250,
0.050: stats.WafLatency50to100,
0.010: stats.WafLatency10to50,
0.005: stats.WafLatency5to10,
0.001: stats.WafLatency1to5,
0.0005: stats.WafLatency0to1, // yolo
61.00: stats.WafLatency60000plus,
60.00: stats.WafLatency10000to60000,
10.00: stats.WafLatency5000to10000,
5.000: stats.WafLatency1000to5000,
1.000: stats.WafLatency500to1000,
0.500: stats.WafLatency250to500,
0.250: stats.WafLatency100to250,
0.100: stats.WafLatency50to100,
0.050: stats.WafLatency10to50,
0.010: stats.WafLatency5to10,
0.005: stats.WafLatency1to5,
0.001: stats.WafLatency0to1,
} {
for i := uint64(0); i < n; i++ {
m.LatencySeconds.WithLabelValues(serviceID, serviceName, datacenter, origin, srcWaf).Observe(v)
}
}

for v, n := range map[float64]uint64{
60.00: stats.ComputeLatency60000plus,
10.00: stats.ComputeLatency10000to60000,
5.000: stats.ComputeLatency5000to10000,
1.000: stats.ComputeLatency1000to5000,
0.500: stats.ComputeLatency500to1000,
0.250: stats.ComputeLatency250to500,
0.100: stats.ComputeLatency100to250,
0.050: stats.ComputeLatency50to100,
0.010: stats.ComputeLatency10to50,
0.005: stats.ComputeLatency5to10,
0.001: stats.ComputeLatency1to5,
0.0005: stats.ComputeLatency0to1, // yolo
61.00: stats.ComputeLatency60000plus,
60.00: stats.ComputeLatency10000to60000,
10.00: stats.ComputeLatency5000to10000,
5.000: stats.ComputeLatency1000to5000,
1.000: stats.ComputeLatency500to1000,
0.500: stats.ComputeLatency250to500,
0.250: stats.ComputeLatency100to250,
0.100: stats.ComputeLatency50to100,
0.050: stats.ComputeLatency10to50,
0.010: stats.ComputeLatency5to10,
0.005: stats.ComputeLatency1to5,
0.001: stats.ComputeLatency0to1,
} {
for i := uint64(0); i < n; i++ {
m.LatencySeconds.WithLabelValues(serviceID, serviceName, datacenter, origin, srcCompute).Observe(v)
Expand Down
40 changes: 20 additions & 20 deletions pkg/rt/common_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -4881,7 +4881,7 @@ const originsResponseFixture = `{

var expectedOriginsMetricsOutputMap = map[string]float64{
`testspace_origin_latency_seconds_bucket{datacenter="TYO",origin="testorigin",service_id="my-service-id",service_name="my-service-name",source="compute",le="+Inf"}`: 10,
`testspace_origin_latency_seconds_bucket{datacenter="TYO",origin="testorigin",service_id="my-service-id",service_name="my-service-name",source="compute",le="0.001"}`: 10,
`testspace_origin_latency_seconds_bucket{datacenter="TYO",origin="testorigin",service_id="my-service-id",service_name="my-service-name",source="compute",le="0.001"}`: 5,
`testspace_origin_latency_seconds_bucket{datacenter="TYO",origin="testorigin",service_id="my-service-id",service_name="my-service-name",source="compute",le="0.005"}`: 10,
`testspace_origin_latency_seconds_bucket{datacenter="TYO",origin="testorigin",service_id="my-service-id",service_name="my-service-name",source="compute",le="0.01"}`: 10,
`testspace_origin_latency_seconds_bucket{datacenter="TYO",origin="testorigin",service_id="my-service-id",service_name="my-service-name",source="compute",le="0.05"}`: 10,
Expand All @@ -4893,10 +4893,10 @@ var expectedOriginsMetricsOutputMap = map[string]float64{
`testspace_origin_latency_seconds_bucket{datacenter="TYO",origin="testorigin",service_id="my-service-id",service_name="my-service-name",source="compute",le="5"}`: 10,
`testspace_origin_latency_seconds_bucket{datacenter="TYO",origin="testorigin",service_id="my-service-id",service_name="my-service-name",source="compute",le="60"}`: 10,
`testspace_origin_latency_seconds_bucket{datacenter="TYO",origin="testorigin",service_id="my-service-id",service_name="my-service-name",source="delivery",le="+Inf"}`: 10,
`testspace_origin_latency_seconds_bucket{datacenter="TYO",origin="testorigin",service_id="my-service-id",service_name="my-service-name",source="delivery",le="0.001"}`: 2,
`testspace_origin_latency_seconds_bucket{datacenter="TYO",origin="testorigin",service_id="my-service-id",service_name="my-service-name",source="delivery",le="0.005"}`: 3,
`testspace_origin_latency_seconds_bucket{datacenter="TYO",origin="testorigin",service_id="my-service-id",service_name="my-service-name",source="delivery",le="0.01"}`: 4,
`testspace_origin_latency_seconds_bucket{datacenter="TYO",origin="testorigin",service_id="my-service-id",service_name="my-service-name",source="delivery",le="0.05"}`: 10,
`testspace_origin_latency_seconds_bucket{datacenter="TYO",origin="testorigin",service_id="my-service-id",service_name="my-service-name",source="delivery",le="0.001"}`: 1,
`testspace_origin_latency_seconds_bucket{datacenter="TYO",origin="testorigin",service_id="my-service-id",service_name="my-service-name",source="delivery",le="0.005"}`: 2,
`testspace_origin_latency_seconds_bucket{datacenter="TYO",origin="testorigin",service_id="my-service-id",service_name="my-service-name",source="delivery",le="0.01"}`: 3,
`testspace_origin_latency_seconds_bucket{datacenter="TYO",origin="testorigin",service_id="my-service-id",service_name="my-service-name",source="delivery",le="0.05"}`: 4,
`testspace_origin_latency_seconds_bucket{datacenter="TYO",origin="testorigin",service_id="my-service-id",service_name="my-service-name",source="delivery",le="0.1"}`: 10,
`testspace_origin_latency_seconds_bucket{datacenter="TYO",origin="testorigin",service_id="my-service-id",service_name="my-service-name",source="delivery",le="0.25"}`: 10,
`testspace_origin_latency_seconds_bucket{datacenter="TYO",origin="testorigin",service_id="my-service-id",service_name="my-service-name",source="delivery",le="0.5"}`: 10,
Expand All @@ -4906,8 +4906,8 @@ var expectedOriginsMetricsOutputMap = map[string]float64{
`testspace_origin_latency_seconds_bucket{datacenter="TYO",origin="testorigin",service_id="my-service-id",service_name="my-service-name",source="delivery",le="60"}`: 10,
`testspace_origin_latency_seconds_bucket{datacenter="TYO",origin="testorigin",service_id="my-service-id",service_name="my-service-name",source="waf",le="+Inf"}`: 10,
`testspace_origin_latency_seconds_bucket{datacenter="TYO",origin="testorigin",service_id="my-service-id",service_name="my-service-name",source="waf",le="0.001"}`: 0,
`testspace_origin_latency_seconds_bucket{datacenter="TYO",origin="testorigin",service_id="my-service-id",service_name="my-service-name",source="waf",le="0.005"}`: 5,
`testspace_origin_latency_seconds_bucket{datacenter="TYO",origin="testorigin",service_id="my-service-id",service_name="my-service-name",source="waf",le="0.01"}`: 10,
`testspace_origin_latency_seconds_bucket{datacenter="TYO",origin="testorigin",service_id="my-service-id",service_name="my-service-name",source="waf",le="0.005"}`: 0,
`testspace_origin_latency_seconds_bucket{datacenter="TYO",origin="testorigin",service_id="my-service-id",service_name="my-service-name",source="waf",le="0.01"}`: 5,
`testspace_origin_latency_seconds_bucket{datacenter="TYO",origin="testorigin",service_id="my-service-id",service_name="my-service-name",source="waf",le="0.05"}`: 10,
`testspace_origin_latency_seconds_bucket{datacenter="TYO",origin="testorigin",service_id="my-service-id",service_name="my-service-name",source="waf",le="0.1"}`: 10,
`testspace_origin_latency_seconds_bucket{datacenter="TYO",origin="testorigin",service_id="my-service-id",service_name="my-service-name",source="waf",le="0.25"}`: 10,
Expand All @@ -4919,9 +4919,9 @@ var expectedOriginsMetricsOutputMap = map[string]float64{
`testspace_origin_latency_seconds_count{datacenter="TYO",origin="testorigin",service_id="my-service-id",service_name="my-service-name",source="compute"}`: 10,
`testspace_origin_latency_seconds_count{datacenter="TYO",origin="testorigin",service_id="my-service-id",service_name="my-service-name",source="delivery"}`: 10,
`testspace_origin_latency_seconds_count{datacenter="TYO",origin="testorigin",service_id="my-service-id",service_name="my-service-name",source="waf"}`: 10,
`testspace_origin_latency_seconds_sum{datacenter="TYO",origin="testorigin",service_id="my-service-id",service_name="my-service-name",source="compute"}`: 0.008,
`testspace_origin_latency_seconds_sum{datacenter="TYO",origin="testorigin",service_id="my-service-id",service_name="my-service-name",source="delivery"}`: 0.317,
`testspace_origin_latency_seconds_sum{datacenter="TYO",origin="testorigin",service_id="my-service-id",service_name="my-service-name",source="waf"}`: 0.075,
`testspace_origin_latency_seconds_sum{datacenter="TYO",origin="testorigin",service_id="my-service-id",service_name="my-service-name",source="compute"}`: 0.03,
`testspace_origin_latency_seconds_sum{datacenter="TYO",origin="testorigin",service_id="my-service-id",service_name="my-service-name",source="delivery"}`: 0.666,
`testspace_origin_latency_seconds_sum{datacenter="TYO",origin="testorigin",service_id="my-service-id",service_name="my-service-name",source="waf"}`: 0.3,
`testspace_origin_resp_body_bytes_total{datacenter="TYO",origin="testorigin",service_id="my-service-id",service_name="my-service-name",source="compute"}`: 1152,
`testspace_origin_resp_body_bytes_total{datacenter="TYO",origin="testorigin",service_id="my-service-id",service_name="my-service-name",source="delivery"}`: 1024,
`testspace_origin_resp_body_bytes_total{datacenter="TYO",origin="testorigin",service_id="my-service-id",service_name="my-service-name",source="waf"}`: 1088,
Expand Down Expand Up @@ -5000,7 +5000,7 @@ var expectedOriginsMetricsOutputMap = map[string]float64{

var expectedOriginsMetricsAggOutputMap = map[string]float64{
`testspace_origin_latency_seconds_bucket{datacenter="aggregate",origin="testorigin",service_id="my-service-id",service_name="my-service-name",source="compute",le="+Inf"}`: 10,
`testspace_origin_latency_seconds_bucket{datacenter="aggregate",origin="testorigin",service_id="my-service-id",service_name="my-service-name",source="compute",le="0.001"}`: 10,
`testspace_origin_latency_seconds_bucket{datacenter="aggregate",origin="testorigin",service_id="my-service-id",service_name="my-service-name",source="compute",le="0.001"}`: 5,
`testspace_origin_latency_seconds_bucket{datacenter="aggregate",origin="testorigin",service_id="my-service-id",service_name="my-service-name",source="compute",le="0.005"}`: 10,
`testspace_origin_latency_seconds_bucket{datacenter="aggregate",origin="testorigin",service_id="my-service-id",service_name="my-service-name",source="compute",le="0.01"}`: 10,
`testspace_origin_latency_seconds_bucket{datacenter="aggregate",origin="testorigin",service_id="my-service-id",service_name="my-service-name",source="compute",le="0.05"}`: 10,
Expand All @@ -5012,10 +5012,10 @@ var expectedOriginsMetricsAggOutputMap = map[string]float64{
`testspace_origin_latency_seconds_bucket{datacenter="aggregate",origin="testorigin",service_id="my-service-id",service_name="my-service-name",source="compute",le="5"}`: 10,
`testspace_origin_latency_seconds_bucket{datacenter="aggregate",origin="testorigin",service_id="my-service-id",service_name="my-service-name",source="compute",le="60"}`: 10,
`testspace_origin_latency_seconds_bucket{datacenter="aggregate",origin="testorigin",service_id="my-service-id",service_name="my-service-name",source="delivery",le="+Inf"}`: 10,
`testspace_origin_latency_seconds_bucket{datacenter="aggregate",origin="testorigin",service_id="my-service-id",service_name="my-service-name",source="delivery",le="0.001"}`: 2,
`testspace_origin_latency_seconds_bucket{datacenter="aggregate",origin="testorigin",service_id="my-service-id",service_name="my-service-name",source="delivery",le="0.005"}`: 3,
`testspace_origin_latency_seconds_bucket{datacenter="aggregate",origin="testorigin",service_id="my-service-id",service_name="my-service-name",source="delivery",le="0.01"}`: 4,
`testspace_origin_latency_seconds_bucket{datacenter="aggregate",origin="testorigin",service_id="my-service-id",service_name="my-service-name",source="delivery",le="0.05"}`: 10,
`testspace_origin_latency_seconds_bucket{datacenter="aggregate",origin="testorigin",service_id="my-service-id",service_name="my-service-name",source="delivery",le="0.001"}`: 1,
`testspace_origin_latency_seconds_bucket{datacenter="aggregate",origin="testorigin",service_id="my-service-id",service_name="my-service-name",source="delivery",le="0.005"}`: 2,
`testspace_origin_latency_seconds_bucket{datacenter="aggregate",origin="testorigin",service_id="my-service-id",service_name="my-service-name",source="delivery",le="0.01"}`: 3,
`testspace_origin_latency_seconds_bucket{datacenter="aggregate",origin="testorigin",service_id="my-service-id",service_name="my-service-name",source="delivery",le="0.05"}`: 4,
`testspace_origin_latency_seconds_bucket{datacenter="aggregate",origin="testorigin",service_id="my-service-id",service_name="my-service-name",source="delivery",le="0.1"}`: 10,
`testspace_origin_latency_seconds_bucket{datacenter="aggregate",origin="testorigin",service_id="my-service-id",service_name="my-service-name",source="delivery",le="0.25"}`: 10,
`testspace_origin_latency_seconds_bucket{datacenter="aggregate",origin="testorigin",service_id="my-service-id",service_name="my-service-name",source="delivery",le="0.5"}`: 10,
Expand All @@ -5025,8 +5025,8 @@ var expectedOriginsMetricsAggOutputMap = map[string]float64{
`testspace_origin_latency_seconds_bucket{datacenter="aggregate",origin="testorigin",service_id="my-service-id",service_name="my-service-name",source="delivery",le="60"}`: 10,
`testspace_origin_latency_seconds_bucket{datacenter="aggregate",origin="testorigin",service_id="my-service-id",service_name="my-service-name",source="waf",le="+Inf"}`: 10,
`testspace_origin_latency_seconds_bucket{datacenter="aggregate",origin="testorigin",service_id="my-service-id",service_name="my-service-name",source="waf",le="0.001"}`: 0,
`testspace_origin_latency_seconds_bucket{datacenter="aggregate",origin="testorigin",service_id="my-service-id",service_name="my-service-name",source="waf",le="0.005"}`: 5,
`testspace_origin_latency_seconds_bucket{datacenter="aggregate",origin="testorigin",service_id="my-service-id",service_name="my-service-name",source="waf",le="0.01"}`: 10,
`testspace_origin_latency_seconds_bucket{datacenter="aggregate",origin="testorigin",service_id="my-service-id",service_name="my-service-name",source="waf",le="0.005"}`: 0,
`testspace_origin_latency_seconds_bucket{datacenter="aggregate",origin="testorigin",service_id="my-service-id",service_name="my-service-name",source="waf",le="0.01"}`: 5,
`testspace_origin_latency_seconds_bucket{datacenter="aggregate",origin="testorigin",service_id="my-service-id",service_name="my-service-name",source="waf",le="0.05"}`: 10,
`testspace_origin_latency_seconds_bucket{datacenter="aggregate",origin="testorigin",service_id="my-service-id",service_name="my-service-name",source="waf",le="0.1"}`: 10,
`testspace_origin_latency_seconds_bucket{datacenter="aggregate",origin="testorigin",service_id="my-service-id",service_name="my-service-name",source="waf",le="0.25"}`: 10,
Expand All @@ -5038,9 +5038,9 @@ var expectedOriginsMetricsAggOutputMap = map[string]float64{
`testspace_origin_latency_seconds_count{datacenter="aggregate",origin="testorigin",service_id="my-service-id",service_name="my-service-name",source="compute"}`: 10,
`testspace_origin_latency_seconds_count{datacenter="aggregate",origin="testorigin",service_id="my-service-id",service_name="my-service-name",source="delivery"}`: 10,
`testspace_origin_latency_seconds_count{datacenter="aggregate",origin="testorigin",service_id="my-service-id",service_name="my-service-name",source="waf"}`: 10,
`testspace_origin_latency_seconds_sum{datacenter="aggregate",origin="testorigin",service_id="my-service-id",service_name="my-service-name",source="compute"}`: 0.008,
`testspace_origin_latency_seconds_sum{datacenter="aggregate",origin="testorigin",service_id="my-service-id",service_name="my-service-name",source="delivery"}`: 0.317,
`testspace_origin_latency_seconds_sum{datacenter="aggregate",origin="testorigin",service_id="my-service-id",service_name="my-service-name",source="waf"}`: 0.075,
`testspace_origin_latency_seconds_sum{datacenter="aggregate",origin="testorigin",service_id="my-service-id",service_name="my-service-name",source="compute"}`: 0.03,
`testspace_origin_latency_seconds_sum{datacenter="aggregate",origin="testorigin",service_id="my-service-id",service_name="my-service-name",source="delivery"}`: 0.666,
`testspace_origin_latency_seconds_sum{datacenter="aggregate",origin="testorigin",service_id="my-service-id",service_name="my-service-name",source="waf"}`: 0.3,
`testspace_origin_resp_body_bytes_total{datacenter="aggregate",origin="testorigin",service_id="my-service-id",service_name="my-service-name",source="compute"}`: 1152,
`testspace_origin_resp_body_bytes_total{datacenter="aggregate",origin="testorigin",service_id="my-service-id",service_name="my-service-name",source="delivery"}`: 1024,
`testspace_origin_resp_body_bytes_total{datacenter="aggregate",origin="testorigin",service_id="my-service-id",service_name="my-service-name",source="waf"}`: 1088,
Expand Down

0 comments on commit de3ecfb

Please sign in to comment.