Skip to content

Commit

Permalink
Revert "Ensure there's always a . or e in floats. (#162)"
Browse files Browse the repository at this point in the history
This reverts commit 67670fe.

The format change wreaks havoc with histograms if you have a mixed set
of monitored targets. `histogram_quantile` spits out weird values,
which are difficult to diagnose.

A developer team here at SoundCloud pulled in the current version of
prometheus/common via a different indirect dependency than
prometheus/client_golang. (client_golang alone would not pull it in
yet if you are using Go modules.) Canary instances then had the newer
prometheus/common, while the normal production instances had not. The
calculated quantiles for the complete service jumped up dramatically,
so the team rolled back and started to look for a performance
regression. (Just looking at the canary alone still worked, but since
nobody suspected this kind of monitoring failure case, the
investigation went totally the wrong way.)

Given the wide distribution of prometheus/client_golang and the way Go
modules work, we will see many of those innocuous upgrades that
suddenly change the `le` values on new deployments. Since this change
is buried behind dependencies, users will run into this problem
without suspecting it, even if we announce it very loud and clearly in
prometheus/common or even prometheus/client_golang.

For now, we have to revert the change and then think about a way to
mitigate it. I'm thinking of sanitizing `le` values in Prometheus
2.x. But let's think of it without haste.

Signed-off-by: beorn7 <[email protected]>
  • Loading branch information
beorn7 committed Jan 23, 2019
1 parent 2998b13 commit 0c04638
Show file tree
Hide file tree
Showing 2 changed files with 21 additions and 67 deletions.
12 changes: 3 additions & 9 deletions expfmt/text_create.go
Original file line number Diff line number Diff line change
Expand Up @@ -436,11 +436,11 @@ func writeEscapedString(w enhancedWriter, v string, includeDoubleQuote bool) (in
func writeFloat(w enhancedWriter, f float64) (int, error) {
switch {
case f == 1:
return w.WriteString("1.0")
return 1, w.WriteByte('1')
case f == 0:
return w.WriteString("0.0")
return 1, w.WriteByte('0')
case f == -1:
return w.WriteString("-1.0")
return w.WriteString("-1")
case math.IsNaN(f):
return w.WriteString("NaN")
case math.IsInf(f, +1):
Expand All @@ -450,12 +450,6 @@ func writeFloat(w enhancedWriter, f float64) (int, error) {
default:
bp := numBufPool.Get().(*[]byte)
*bp = strconv.AppendFloat((*bp)[:0], f, 'g', -1, 64)
// Add a .0 if used fixed point and there is no decimal
// point already. This is for future proofing with OpenMetrics,
// where floats always contain either an exponent or decimal.
if !bytes.ContainsAny(*bp, "e.") {
*bp = append(*bp, '.', '0')
}
written, err := w.Write(*bp)
numBufPool.Put(bp)
return written, err
Expand Down
76 changes: 18 additions & 58 deletions expfmt/text_create_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -212,14 +212,14 @@ untyped_name{name_1="value 1"} -1.23e-45
# TYPE summary_name summary
summary_name{quantile="0.5"} -1.23
summary_name{quantile="0.9"} 0.2342354
summary_name{quantile="0.99"} 0.0
summary_name{quantile="0.99"} 0
summary_name_sum -3.4567
summary_name_count 42.0
summary_name{name_1="value 1",name_2="value 2",quantile="0.5"} 1.0
summary_name{name_1="value 1",name_2="value 2",quantile="0.9"} 2.0
summary_name{name_1="value 1",name_2="value 2",quantile="0.99"} 3.0
summary_name_count 42
summary_name{name_1="value 1",name_2="value 2",quantile="0.5"} 1
summary_name{name_1="value 1",name_2="value 2",quantile="0.9"} 2
summary_name{name_1="value 1",name_2="value 2",quantile="0.99"} 3
summary_name_sum{name_1="value 1",name_2="value 2"} 2010.1971
summary_name_count{name_1="value 1",name_2="value 2"} 4711.0
summary_name_count{name_1="value 1",name_2="value 2"} 4711
`,
},
// 4: Histogram
Expand Down Expand Up @@ -261,13 +261,13 @@ summary_name_count{name_1="value 1",name_2="value 2"} 4711.0
},
out: `# HELP request_duration_microseconds The response latency.
# TYPE request_duration_microseconds histogram
request_duration_microseconds_bucket{le="100.0"} 123.0
request_duration_microseconds_bucket{le="120.0"} 412.0
request_duration_microseconds_bucket{le="144.0"} 592.0
request_duration_microseconds_bucket{le="172.8"} 1524.0
request_duration_microseconds_bucket{le="+Inf"} 2693.0
request_duration_microseconds_bucket{le="100"} 123
request_duration_microseconds_bucket{le="120"} 412
request_duration_microseconds_bucket{le="144"} 592
request_duration_microseconds_bucket{le="172.8"} 1524
request_duration_microseconds_bucket{le="+Inf"} 2693
request_duration_microseconds_sum 1.7560473e+06
request_duration_microseconds_count 2693.0
request_duration_microseconds_count 2693
`,
},
// 5: Histogram with missing +Inf bucket.
Expand All @@ -282,26 +282,6 @@ request_duration_microseconds_count 2693.0
SampleCount: proto.Uint64(2693),
SampleSum: proto.Float64(1756047.3),
Bucket: []*dto.Bucket{
&dto.Bucket{
UpperBound: proto.Float64(0),
CumulativeCount: proto.Uint64(123),
},
&dto.Bucket{
UpperBound: proto.Float64(1e-5),
CumulativeCount: proto.Uint64(123),
},
&dto.Bucket{
UpperBound: proto.Float64(1e-4),
CumulativeCount: proto.Uint64(123),
},
&dto.Bucket{
UpperBound: proto.Float64(1e-1),
CumulativeCount: proto.Uint64(123),
},
&dto.Bucket{
UpperBound: proto.Float64(1),
CumulativeCount: proto.Uint64(123),
},
&dto.Bucket{
UpperBound: proto.Float64(100),
CumulativeCount: proto.Uint64(123),
Expand All @@ -318,40 +298,20 @@ request_duration_microseconds_count 2693.0
UpperBound: proto.Float64(172.8),
CumulativeCount: proto.Uint64(1524),
},
&dto.Bucket{
UpperBound: proto.Float64(1e5),
CumulativeCount: proto.Uint64(1543),
},
&dto.Bucket{
UpperBound: proto.Float64(1e6),
CumulativeCount: proto.Uint64(1544),
},
&dto.Bucket{
UpperBound: proto.Float64(1e23),
CumulativeCount: proto.Uint64(1545),
},
},
},
},
},
},
out: `# HELP request_duration_microseconds The response latency.
# TYPE request_duration_microseconds histogram
request_duration_microseconds_bucket{le="0.0"} 123.0
request_duration_microseconds_bucket{le="1e-05"} 123.0
request_duration_microseconds_bucket{le="0.0001"} 123.0
request_duration_microseconds_bucket{le="0.1"} 123.0
request_duration_microseconds_bucket{le="1.0"} 123.0
request_duration_microseconds_bucket{le="100.0"} 123.0
request_duration_microseconds_bucket{le="120.0"} 412.0
request_duration_microseconds_bucket{le="144.0"} 592.0
request_duration_microseconds_bucket{le="172.8"} 1524.0
request_duration_microseconds_bucket{le="100000.0"} 1543.0
request_duration_microseconds_bucket{le="1e+06"} 1544.0
request_duration_microseconds_bucket{le="1e+23"} 1545.0
request_duration_microseconds_bucket{le="+Inf"} 2693.0
request_duration_microseconds_bucket{le="100"} 123
request_duration_microseconds_bucket{le="120"} 412
request_duration_microseconds_bucket{le="144"} 592
request_duration_microseconds_bucket{le="172.8"} 1524
request_duration_microseconds_bucket{le="+Inf"} 2693
request_duration_microseconds_sum 1.7560473e+06
request_duration_microseconds_count 2693.0
request_duration_microseconds_count 2693
`,
},
// 6: No metric type, should result in default type Counter.
Expand Down

0 comments on commit 0c04638

Please sign in to comment.