From 4c2ee9df31b8ce9ca6a924099b745cb155d5caee Mon Sep 17 00:00:00 2001
From: Dieter Plaetinck <dieter@vimeo.com>
Date: Mon, 8 Oct 2012 15:14:17 -0400
Subject: [PATCH 01/14] support histograms

in accordance with existing codebase, implement this in graphite
backend
---
 README.md            |  7 +++++++
 backends/graphite.js | 16 +++++++++++++++-
 exampleConfig.js     | 12 +++++++++---
 3 files changed, 31 insertions(+), 4 deletions(-)

diff --git a/README.md b/README.md
index df8a2462..78f34195 100644
--- a/README.md
+++ b/README.md
@@ -51,6 +51,13 @@ generate the following list of stats for each threshold:
 Where `$KEY` is the key you stats key you specify when sending to statsd, and
 `$PCT` is the percentile threshold.
 
+If `config.histogram` is set to a non-zero array, statsd will also
+maintain frequencies for each bin as specified by the (non-inclusive)
+upper limits in the array. (`'inf'` can be used to denote infinity,
+which is highly recommended, as high outliers will not be accounted for if
+your last upper limit is too low).
+a lower limit of 0 is assumed.
+
 Sampling
 --------
 
diff --git a/backends/graphite.js b/backends/graphite.js
index 1b704c01..3ffe909f 100644
--- a/backends/graphite.js
+++ b/backends/graphite.js
@@ -15,6 +15,7 @@
 var net = require('net'),
    util = require('util');
 
+var config;
 var debug;
 var flushInterval;
 var graphiteHost;
@@ -125,6 +126,18 @@ var flush_stats = function graphite_flush(ts, metrics) {
       message += 'stats.timers.' + key + '.count ' + count + ' ' + ts + "\n";
       message += 'stats.timers.' + key + '.sum ' + sum  + ' ' + ts + "\n";
       message += 'stats.timers.' + key + '.mean ' + mean + ' ' + ts + "\n";
+
+      // note: values bigger than the upper limit of the last bin are ignored, by design
+      num_bins = (config.histogram || []).length
+      var i = 0;
+      for (var bin_i = 0; bin_i < num_bins; bin_i++) {
+        var freq = 0;
+        for (; i < count && (config.histogram[bin_i] == 'inf' || values[i] < config.histogram[bin_i]); i++) {
+          freq += 1;
+        }
+        message += 'stats.timers.' + key + '.bin_' + config.histogram[bin_i] + ' ' + freq  + ' ' + ts + "\n";
+      }
+
       statString += message;
 
       numStats += 1;
@@ -152,7 +165,8 @@ var backend_status = function graphite_status(writeCb) {
   }
 };
 
-exports.init = function graphite_init(startup_time, config, events) {
+exports.init = function graphite_init(startup_time, conf, events) {
+  config = conf
   debug = config.debug;
   graphiteHost = config.graphiteHost;
   graphitePort = config.graphitePort;
diff --git a/exampleConfig.js b/exampleConfig.js
index b9dcbe9e..acafc77c 100644
--- a/exampleConfig.js
+++ b/exampleConfig.js
@@ -27,9 +27,6 @@ Optional Variables:
   debugInterval:    interval to print debug information [ms, default: 10000]
   dumpMessages:     log all incoming messages
   flushInterval:    interval (in ms) to flush to Graphite
-  percentThreshold: for time information, calculate the Nth percentile(s)
-                    (can be a single value or list of floating-point values)
-                    [%, default: 90]
   keyFlush:         log the most frequently sent keys [object, default: undefined]
     interval:       how often to log frequent keys [ms, default: 0]
     percent:        percentage of frequent keys to log [%, default: 100]
@@ -49,6 +46,15 @@ Optional Variables:
                     packets should be "repeated" (duplicated to).
                     e.g. [ { host: '10.10.10.10', port: 8125 },
                            { host: 'observer', port: 88125 } ]
+  timer:
+    percentThreshold: calculate the Nth percentile(s)
+                    (can be a single value or list of floating-point values)
+                    [%, default: 90]
+    histogram:      an array of ordered non-inclusive upper limits of bins for
+                    histogram (in ms).  'inf' means infinity. (default: [])
+                    if non-empty, histograms are enabled and frequencies
+                    for each bin are written.
+                    e.g. [ 25, 50, 100, 150, 200, 'inf' ]
 */
 {
   graphitePort: 2003

From 6f51d04c028bcf1e644eed016537194598d5b958 Mon Sep 17 00:00:00 2001
From: Dieter Plaetinck <dieter@vimeo.com>
Date: Mon, 8 Oct 2012 15:32:17 -0400
Subject: [PATCH 02/14] clarify bins can be arbitrarily wide

---
 README.md | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/README.md b/README.md
index 78f34195..8581c38f 100644
--- a/README.md
+++ b/README.md
@@ -55,8 +55,11 @@ If `config.histogram` is set to a non-zero array, statsd will also
 maintain frequencies for each bin as specified by the (non-inclusive)
 upper limits in the array. (`'inf'` can be used to denote infinity,
 which is highly recommended, as high outliers will not be accounted for if
-your last upper limit is too low).
-a lower limit of 0 is assumed.
+your last upper limit is too low).  A lower limit of 0 is assumed.
+Note that this is actually more powerful than real histograms, as you can
+make your bins arbitrarily wide if you want to.   Though if you want to
+view real histograms, you should make your bins equally wide
+(equally sized class intervals).
 
 Sampling
 --------

From 50dd2ae6844aa89d9d2782ff2fa0169a632004bf Mon Sep 17 00:00:00 2001
From: Dieter Plaetinck <dieter@vimeo.com>
Date: Mon, 5 Nov 2012 14:21:56 -0500
Subject: [PATCH 03/14] support different histogram settings for different
 metrics

---
 README.md            | 32 +++++++++++++++++++++++---------
 backends/graphite.js | 15 +++++++++++----
 exampleConfig.js     | 19 ++++++++++++++-----
 3 files changed, 48 insertions(+), 18 deletions(-)

diff --git a/README.md b/README.md
index 8581c38f..74e97d22 100644
--- a/README.md
+++ b/README.md
@@ -51,15 +51,29 @@ generate the following list of stats for each threshold:
 Where `$KEY` is the key you stats key you specify when sending to statsd, and
 `$PCT` is the percentile threshold.
 
-If `config.histogram` is set to a non-zero array, statsd will also
-maintain frequencies for each bin as specified by the (non-inclusive)
-upper limits in the array. (`'inf'` can be used to denote infinity,
-which is highly recommended, as high outliers will not be accounted for if
-your last upper limit is too low).  A lower limit of 0 is assumed.
-Note that this is actually more powerful than real histograms, as you can
-make your bins arbitrarily wide if you want to.   Though if you want to
-view real histograms, you should make your bins equally wide
-(equally sized class intervals).
+Use the `config.histogram` setting to instruct statsd to maintain histograms
+over time.  Specify which metrics to match and a corresponding list of
+ordered non-inclusive upper limits of bins (class intervals).
+(use `inf` to denote infinity; a lower limit of 0 is assumed)
+Each `flushInterval`, statsd will store how many values (absolute frequency)
+fall within each bin (class interval), for all matching metrics.
+First match wins.  examples:
+
+* no histograms for any timer (default): `[]`
+* histogram to only track render durations,
+  with unequal class intervals and catchall for outliers:
+
+        [ { metric: 'render', bins: [8, 25, 50, 100, 'inf'] } ]
+
+* histogram for all timers except 'foo' related,
+  with equal class interval and catchall for outliers:
+
+        [ { metric: 'foo', bins: [] },
+          { metric: '', bins: [ 50, 100, 150, 200, 'inf'] } ]
+
+Note that this is actually more powerful than what's strictly considered
+histograms, as you can make each bin arbitrarily wide if you want to
+(upto infinity), i.e. class intervals of different sizes.
 
 Sampling
 --------
diff --git a/backends/graphite.js b/backends/graphite.js
index 3ffe909f..de6bd5b7 100644
--- a/backends/graphite.js
+++ b/backends/graphite.js
@@ -128,14 +128,21 @@ var flush_stats = function graphite_flush(ts, metrics) {
       message += 'stats.timers.' + key + '.mean ' + mean + ' ' + ts + "\n";
 
       // note: values bigger than the upper limit of the last bin are ignored, by design
-      num_bins = (config.histogram || []).length
+      conf = config.histogram || [];
+      bins = [];
+      for (var i = 0; i < conf.length; i++) {
+          if (key.indexOf(conf[i].metric) > -1) {
+              bins = conf[i].bins;
+              break;
+          }
+      }
       var i = 0;
-      for (var bin_i = 0; bin_i < num_bins; bin_i++) {
+      for (var bin_i = 0; bin_i < bins.length; bin_i++) {
         var freq = 0;
-        for (; i < count && (config.histogram[bin_i] == 'inf' || values[i] < config.histogram[bin_i]); i++) {
+        for (; i < count && (bins[bin_i] == 'inf' || values[i] < bins[bin_i]); i++) {
           freq += 1;
         }
-        message += 'stats.timers.' + key + '.bin_' + config.histogram[bin_i] + ' ' + freq  + ' ' + ts + "\n";
+        message += 'stats.timers.' + key + '.bin_' + bins[bin_i] + ' ' + freq  + ' ' + ts + "\n";
       }
 
       statString += message;
diff --git a/exampleConfig.js b/exampleConfig.js
index acafc77c..6b55dfb0 100644
--- a/exampleConfig.js
+++ b/exampleConfig.js
@@ -50,11 +50,20 @@ Optional Variables:
     percentThreshold: calculate the Nth percentile(s)
                     (can be a single value or list of floating-point values)
                     [%, default: 90]
-    histogram:      an array of ordered non-inclusive upper limits of bins for
-                    histogram (in ms).  'inf' means infinity. (default: [])
-                    if non-empty, histograms are enabled and frequencies
-                    for each bin are written.
-                    e.g. [ 25, 50, 100, 150, 200, 'inf' ]
+    histogram:      an array of mappings of strings (to match metrics) and
+                    corresponding ordered non-inclusive upper limits of bins.
+                    For all matching metrics, histograms are maintained over
+                    time by writing the frequencies for all bins.
+                    'inf' means infinity. A lower limit of 0 is assumed.
+                    default: [], meaning no histograms for any timer.
+                    First match wins.  examples:
+                    * histogram to only track render durations, with unequal
+                      class intervals and catchall for outliers:
+                      [ { metric: 'render', bins: [8, 25, 50, 100, 'inf'] } ]
+                    * histogram for all timers except 'foo' related,
+                      equal class interval and catchall for outliers:
+                     [ { metric: 'foo', bins: [] },
+                       { metric: '', bins: [ 50, 100, 150, 200, 'inf'] } ]
 */
 {
   graphitePort: 2003

From 92c46c357ae5979cbd18498a6307b6263c8f3872 Mon Sep 17 00:00:00 2001
From: Dieter Plaetinck <dieter@vimeo.com>
Date: Mon, 5 Nov 2012 16:03:16 -0500
Subject: [PATCH 04/14] bugfix: allow bin upper limits to contain decimals

also modify an example to demonstrate
---
 README.md            | 14 +++++++++-----
 backends/graphite.js |  3 ++-
 exampleConfig.js     |  2 +-
 3 files changed, 12 insertions(+), 7 deletions(-)

diff --git a/README.md b/README.md
index 74e97d22..87259a44 100644
--- a/README.md
+++ b/README.md
@@ -57,13 +57,13 @@ ordered non-inclusive upper limits of bins (class intervals).
 (use `inf` to denote infinity; a lower limit of 0 is assumed)
 Each `flushInterval`, statsd will store how many values (absolute frequency)
 fall within each bin (class interval), for all matching metrics.
-First match wins.  examples:
+Examples:
 
 * no histograms for any timer (default): `[]`
 * histogram to only track render durations,
   with unequal class intervals and catchall for outliers:
 
-        [ { metric: 'render', bins: [8, 25, 50, 100, 'inf'] } ]
+        [ { metric: 'render', bins: [ 0.01, 0.1, 1, 10, 'inf'] } ]
 
 * histogram for all timers except 'foo' related,
   with equal class interval and catchall for outliers:
@@ -71,9 +71,13 @@ First match wins.  examples:
         [ { metric: 'foo', bins: [] },
           { metric: '', bins: [ 50, 100, 150, 200, 'inf'] } ]
 
-Note that this is actually more powerful than what's strictly considered
-histograms, as you can make each bin arbitrarily wide if you want to
-(upto infinity), i.e. class intervals of different sizes.
+Note:
+
+* first match for a metric wins.
+* bin upper limits may contain decimals.
+* this is actually more powerful than what's strictly considered
+histograms, as you can make each bin arbitrarily wide,
+i.e. class intervals of different sizes.
 
 Sampling
 --------
diff --git a/backends/graphite.js b/backends/graphite.js
index de6bd5b7..435f27b5 100644
--- a/backends/graphite.js
+++ b/backends/graphite.js
@@ -142,7 +142,8 @@ var flush_stats = function graphite_flush(ts, metrics) {
         for (; i < count && (bins[bin_i] == 'inf' || values[i] < bins[bin_i]); i++) {
           freq += 1;
         }
-        message += 'stats.timers.' + key + '.bin_' + bins[bin_i] + ' ' + freq  + ' ' + ts + "\n";
+        bin_name = ('bin_' + bins[bin_i]).replace('.','_');
+        message += 'stats.timers.' + key + '.' + bin_name + ' ' + freq  + ' ' + ts + "\n";
       }
 
       statString += message;
diff --git a/exampleConfig.js b/exampleConfig.js
index 6b55dfb0..1dacc007 100644
--- a/exampleConfig.js
+++ b/exampleConfig.js
@@ -59,7 +59,7 @@ Optional Variables:
                     First match wins.  examples:
                     * histogram to only track render durations, with unequal
                       class intervals and catchall for outliers:
-                      [ { metric: 'render', bins: [8, 25, 50, 100, 'inf'] } ]
+                      [ { metric: 'render', bins: [ 0.01, 0.1, 1, 10, 'inf'] } ]
                     * histogram for all timers except 'foo' related,
                       equal class interval and catchall for outliers:
                      [ { metric: 'foo', bins: [] },

From 6582ea204fb6097986fc34bd4ba8bee8b458b450 Mon Sep 17 00:00:00 2001
From: Dieter Plaetinck <dieter@plaetinck.be>
Date: Mon, 12 Nov 2012 16:07:36 -0500
Subject: [PATCH 05/14] clarify histogram/bin categorisation algorithm

---
 backends/graphite.js | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/backends/graphite.js b/backends/graphite.js
index 435f27b5..75bf6b1b 100644
--- a/backends/graphite.js
+++ b/backends/graphite.js
@@ -136,6 +136,9 @@ var flush_stats = function graphite_flush(ts, metrics) {
               break;
           }
       }
+      // the outer loop iterates bins, the inner loop iterates timer values;
+      // within each run of the inner loop we should only consider the timer value range that's within the scope of the current bin
+      // so we leverage the fact that the values are already sorted to end up with only full 1 iteration of the entire values range
       var i = 0;
       for (var bin_i = 0; bin_i < bins.length; bin_i++) {
         var freq = 0;

From 3fe3d43ae943f34fdce627792b11b215d402837a Mon Sep 17 00:00:00 2001
From: Dieter Plaetinck <dieter@plaetinck.be>
Date: Sun, 9 Dec 2012 15:24:57 -0500
Subject: [PATCH 06/14] add tests for histograms

---
 test/process_metrics_tests.js | 53 +++++++++++++++++++++++++++++++++++
 1 file changed, 53 insertions(+)

diff --git a/test/process_metrics_tests.js b/test/process_metrics_tests.js
index 6252c4b3..8eb2ab1d 100644
--- a/test/process_metrics_tests.js
+++ b/test/process_metrics_tests.js
@@ -115,6 +115,59 @@ module.exports = {
     test.equal(150, timer_data.mean_80);
     test.equal(200, timer_data.upper_80);
     test.equal(300, timer_data.sum_80);
+    test.done();
+  }, // check if the correct settings are being applied. as well as actual counts
+    timers_histogram: function (test) {
+    test.expect(45);
+    this.metrics.timers['a'] = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10];
+    this.metrics.timers['abc'] = [0.1234, 2.89, 4, 6, 8];
+    this.metrics.timers['foo'] = [0, 2, 4, 6, 8];
+    this.metrics.timers['barbazfoobar'] = [0, 2, 4, 6, 8];
+    this.metrics.timers['bar.bazfoobar.abc'] = [0, 2, 4, 6, 8];
+    this.metrics.timers['xyz'] = [0, 2, 4, 6, 8];
+    this.metrics.histogram = [ { metric: 'foo', bins: [] },
+                               { metric: 'abcd', bins: [ 1, 5, 'inf'] },
+                               { metric: 'abc', bins: [ 1, 2.21, 'inf'] },
+                               { metric: 'a', bins: [ 1, 2] } ];
+    pm.process_metrics(this.metrics, 100, this.time_stamp, function(){});
+    timer_data = this.metrics.timer_data;
+    // nothing matches the 'abcd' config, so nothing has bin_5
+    test.equal(undefined, timer_data['a']['bin_5']);
+    test.equal(undefined, timer_data['abc']['bin_5']);
+    test.equal(undefined, timer_data['foo']['bin_5']);
+    test.equal(undefined, timer_data['barbazfoobar']['bin_5']);
+    test.equal(undefined, timer_data['bar.bazfoobar.abc']['bin_5']);
+    test.equal(undefined, timer_data['xyz']['bin_5']);
+
+    // check that 'a' got the right config and numbers
+    test.equal(0, timer_data['a']['bin_1']);
+    test.equal(1, timer_data['a']['bin_2']);
+    test.equal(undefined, timer_data['a']['bin_inf']);
+
+    // only 'abc' should have a bin_inf; also check all its counts,
+    // and make sure it has no other bins
+    // amount of non-bin_ keys: std, upper, lower, count, sum, mean -> 6
+    test.equal(1, timer_data['abc']['bin_1']);
+    test.equal(0, timer_data['abc']['bin_2_21']);
+    test.equal(4, timer_data['abc']['bin_inf']);
+    for (key in timer_data['abc']) {
+        test.ok(key.indexOf('bin_') < 0 || key == 'bin_1' || key == 'bin_2_21' || key == 'bin_inf');
+    }
+
+    // 'foo', 'barbazfoobar' and 'bar.bazfoobar.meh' and 'xyz' should not have any bin
+    for (key in timer_data['foo']) {
+        test.ok(key.indexOf('bin_') < 0);
+    }
+    for (key in timer_data['barbazfoobar']) {
+        test.ok(key.indexOf('bin_') < 0);
+    }
+    for (key in timer_data['bar.bazfoobar.abc']) {
+        test.ok(key.indexOf('bin_') < 0);
+    }
+    for (key in timer_data['xyz']) {
+        test.ok(key.indexOf('bin_') < 0);
+    }
+
     test.done();
   },
     statsd_metrics_exist: function(test) {

From 7a9d9f3644a0c58742b1b465257f40fb7c9d62f3 Mon Sep 17 00:00:00 2001
From: Dieter Plaetinck <dieter@plaetinck.be>
Date: Sun, 9 Dec 2012 16:59:00 -0500
Subject: [PATCH 07/14] put all histogram bins in a "histogram" sub-hierarchy

---
 lib/process_metrics.js        |  5 +++-
 test/process_metrics_tests.js | 48 +++++++++++++----------------------
 2 files changed, 21 insertions(+), 32 deletions(-)

diff --git a/lib/process_metrics.js b/lib/process_metrics.js
index a6b5e624..2f388170 100644
--- a/lib/process_metrics.js
+++ b/lib/process_metrics.js
@@ -82,6 +82,9 @@ var process_metrics = function (metrics, flushInterval, ts, flushCallback) {
                 break;
             }
         }
+        if(bins.length) {
+            current_timer_data['histogram'] = {};
+        }
         // the outer loop iterates bins, the inner loop iterates timer values;
         // within each run of the inner loop we should only consider the timer value range that's within the scope of the current bin
         // so we leverage the fact that the values are already sorted to end up with only full 1 iteration of the entire values range
@@ -92,7 +95,7 @@ var process_metrics = function (metrics, flushInterval, ts, flushCallback) {
             freq += 1;
           }
           bin_name = ('bin_' + bins[bin_i]).replace('.','_');
-          current_timer_data[bin_name] = freq;
+          current_timer_data['histogram'][bin_name] = freq;
         }
 
         timer_data[key] = current_timer_data;
diff --git a/test/process_metrics_tests.js b/test/process_metrics_tests.js
index 8eb2ab1d..0adfb4c9 100644
--- a/test/process_metrics_tests.js
+++ b/test/process_metrics_tests.js
@@ -1,4 +1,5 @@
-var pm = require('../lib/process_metrics')
+var pm = require('../lib/process_metrics'),
+    _  = require('underscore');
 
 module.exports = {
   setUp: function (callback) {
@@ -118,7 +119,7 @@ module.exports = {
     test.done();
   }, // check if the correct settings are being applied. as well as actual counts
     timers_histogram: function (test) {
-    test.expect(45);
+    test.expect(13);
     this.metrics.timers['a'] = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10];
     this.metrics.timers['abc'] = [0.1234, 2.89, 4, 6, 8];
     this.metrics.timers['foo'] = [0, 2, 4, 6, 8];
@@ -132,41 +133,26 @@ module.exports = {
     pm.process_metrics(this.metrics, 100, this.time_stamp, function(){});
     timer_data = this.metrics.timer_data;
     // nothing matches the 'abcd' config, so nothing has bin_5
-    test.equal(undefined, timer_data['a']['bin_5']);
-    test.equal(undefined, timer_data['abc']['bin_5']);
-    test.equal(undefined, timer_data['foo']['bin_5']);
-    test.equal(undefined, timer_data['barbazfoobar']['bin_5']);
-    test.equal(undefined, timer_data['bar.bazfoobar.abc']['bin_5']);
-    test.equal(undefined, timer_data['xyz']['bin_5']);
+    test.equal(undefined, timer_data['a']['histogram']['bin_5']);
+    test.equal(undefined, timer_data['abc']['histogram']['bin_5']);
 
     // check that 'a' got the right config and numbers
-    test.equal(0, timer_data['a']['bin_1']);
-    test.equal(1, timer_data['a']['bin_2']);
-    test.equal(undefined, timer_data['a']['bin_inf']);
+    test.equal(0, timer_data['a']['histogram']['bin_1']);
+    test.equal(1, timer_data['a']['histogram']['bin_2']);
+    test.equal(undefined, timer_data['a']['histogram']['bin_inf']);
 
     // only 'abc' should have a bin_inf; also check all its counts,
     // and make sure it has no other bins
-    // amount of non-bin_ keys: std, upper, lower, count, sum, mean -> 6
-    test.equal(1, timer_data['abc']['bin_1']);
-    test.equal(0, timer_data['abc']['bin_2_21']);
-    test.equal(4, timer_data['abc']['bin_inf']);
-    for (key in timer_data['abc']) {
-        test.ok(key.indexOf('bin_') < 0 || key == 'bin_1' || key == 'bin_2_21' || key == 'bin_inf');
-    }
+    test.equal(1, timer_data['abc']['histogram']['bin_1']);
+    test.equal(0, timer_data['abc']['histogram']['bin_2_21']);
+    test.equal(4, timer_data['abc']['histogram']['bin_inf']);
+    test.equal(3, _.size(timer_data['abc']['histogram']));
 
-    // 'foo', 'barbazfoobar' and 'bar.bazfoobar.meh' and 'xyz' should not have any bin
-    for (key in timer_data['foo']) {
-        test.ok(key.indexOf('bin_') < 0);
-    }
-    for (key in timer_data['barbazfoobar']) {
-        test.ok(key.indexOf('bin_') < 0);
-    }
-    for (key in timer_data['bar.bazfoobar.abc']) {
-        test.ok(key.indexOf('bin_') < 0);
-    }
-    for (key in timer_data['xyz']) {
-        test.ok(key.indexOf('bin_') < 0);
-    }
+    // these all have histograms disabled ('foo' explicitly, rest implicitly)
+    test.equal(undefined, timer_data['foo']['histogram']);
+    test.equal(undefined, timer_data['barbazfoobar']['histogram']);
+    test.equal(undefined, timer_data['bar.bazfoobar.abc']['histogram']);
+    test.equal(undefined, timer_data['xyz']['histogram']);
 
     test.done();
   },

From 338fbd5d47267b4c675fa7c0476e4f33cc1604da Mon Sep 17 00:00:00 2001
From: Dieter Plaetinck <dieter@vimeo.com>
Date: Mon, 14 Jan 2013 10:41:20 -0500
Subject: [PATCH 08/14] remove metric name sanitisation. this is a task for the
 backends

---
 lib/process_metrics.js        | 2 +-
 test/process_metrics_tests.js | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/lib/process_metrics.js b/lib/process_metrics.js
index 2f388170..b95371e8 100644
--- a/lib/process_metrics.js
+++ b/lib/process_metrics.js
@@ -94,7 +94,7 @@ var process_metrics = function (metrics, flushInterval, ts, flushCallback) {
           for (; i < count && (bins[bin_i] == 'inf' || values[i] < bins[bin_i]); i++) {
             freq += 1;
           }
-          bin_name = ('bin_' + bins[bin_i]).replace('.','_');
+          bin_name = 'bin_' + bins[bin_i];
           current_timer_data['histogram'][bin_name] = freq;
         }
 
diff --git a/test/process_metrics_tests.js b/test/process_metrics_tests.js
index 0adfb4c9..675c4a7e 100644
--- a/test/process_metrics_tests.js
+++ b/test/process_metrics_tests.js
@@ -144,7 +144,7 @@ module.exports = {
     // only 'abc' should have a bin_inf; also check all its counts,
     // and make sure it has no other bins
     test.equal(1, timer_data['abc']['histogram']['bin_1']);
-    test.equal(0, timer_data['abc']['histogram']['bin_2_21']);
+    test.equal(0, timer_data['abc']['histogram']['bin_2.21']);
     test.equal(4, timer_data['abc']['histogram']['bin_inf']);
     test.equal(3, _.size(timer_data['abc']['histogram']));
 

From 7447b557241017adb8b5033220ed7dcffc92a69e Mon Sep 17 00:00:00 2001
From: Dieter Plaetinck <dieter@plaetinck.be>
Date: Mon, 11 Feb 2013 18:22:40 -0500
Subject: [PATCH 09/14] bugfix: make sure histogram metric path is correct

also slight optimisation in the metric setting loop for timers
---
 backends/graphite.js   | 13 ++++++++++---
 test/graphite_tests.js |  2 ++
 2 files changed, 12 insertions(+), 3 deletions(-)

diff --git a/backends/graphite.js b/backends/graphite.js
index 1208433b..32207394 100644
--- a/backends/graphite.js
+++ b/backends/graphite.js
@@ -99,10 +99,17 @@ var flush_stats = function graphite_flush(ts, metrics) {
 
   for (key in timer_data) {
     if (Object.keys(timer_data).length > 0) {
+      var namespace = timerNamespace.concat(key);
+      var the_key = namespace.join(".");
       for (timer_data_key in timer_data[key]) {
-        var namespace = timerNamespace.concat(key);
-        var the_key = namespace.join(".");
-        statString += the_key + '.' + timer_data_key + ' ' + timer_data[key][timer_data_key] + ' ' + ts + "\n";
+        if (typeof(timer_data_key) === 'string') {
+          statString += the_key + '.' + timer_data_key + ' ' + timer_data[key][timer_data_key] + ' ' + ts + "\n";
+        } else {
+          for (timer_data_sub_key in timer_data[key][timer_data_key]) {
+            statString += the_key + '.' + timer_data_key + '.' + timer_data_sub_key + ' ' +
+                          timer_data[key][timer_data_key][timer_data_sub_key] + ' ' + ts + "\n";
+          }
+        }
       }
 
       numStats += 1;
diff --git a/test/graphite_tests.js b/test/graphite_tests.js
index c0a9b05b..4cb1dab6 100644
--- a/test/graphite_tests.js
+++ b/test/graphite_tests.js
@@ -76,6 +76,7 @@ module.exports = {
                ,  batch: 200 \n\
                ,  flushInterval: " + this.myflush + " \n\
                ,  percentThreshold: 90\n\
+               ,  timer: {histogram: [ { metric: \"a_test_value\", bins: [10] } ]}\n\
                ,  port: 8125\n\
                ,  dumpMessages: false \n\
                ,  debug: false\n\
@@ -214,6 +215,7 @@ module.exports = {
               var mykey = 'stats.timers.a_test_value.mean_90';
               return _.include(_.keys(post),mykey) && (post[mykey] == testvalue);
             };
+            //TODO: test here that one of the histogram datapoints is also correct
             test.ok(_.any(hashes,testtimervalue_test), 'stats.timers.a_test_value.mean should be ' + testvalue);
 
             test.done();

From f0a9361c2de72ecd11444e11c5c2c34e5fd7f936 Mon Sep 17 00:00:00 2001
From: Daniel Schauenberg <dschauenberg@etsy.com>
Date: Tue, 19 Feb 2013 15:22:14 -0500
Subject: [PATCH 10/14] make histograms work with the graphite backend

---
 backends/graphite.js | 3 ++-
 stats.js             | 4 ++--
 2 files changed, 4 insertions(+), 3 deletions(-)

diff --git a/backends/graphite.js b/backends/graphite.js
index 40677dfc..973b66b5 100644
--- a/backends/graphite.js
+++ b/backends/graphite.js
@@ -117,10 +117,11 @@ var flush_stats = function graphite_flush(ts, metrics) {
         var namespace = timerNamespace.concat(key);
         var the_key = namespace.join(".");
 
-        if (typeof(timer_data_key) === 'string') {
+        if (typeof(timer_data[key][timer_data_key]) === 'number') {
           statString += the_key + '.' + timer_data_key + ' ' + timer_data[key][timer_data_key] + ts_suffix;
         } else {
           for (timer_data_sub_key in timer_data[key][timer_data_key]) {
+            l.log(timer_data[key][timer_data_key][timer_data_sub_key].toString());
             statString += the_key + '.' + timer_data_key + '.' + timer_data_sub_key + ' ' +
                           timer_data[key][timer_data_key][timer_data_sub_key] + ts_suffix;
           }
diff --git a/stats.js b/stats.js
index 1f727569..e18a08e9 100644
--- a/stats.js
+++ b/stats.js
@@ -54,7 +54,7 @@ function flushMetrics() {
     counter_rates: counter_rates,
     timer_data: timer_data,
     pctThreshold: pctThreshold,
-    histogram: config.histogram
+    histogram: conf.histogram
   }
 
   // After all listeners, reset the stats
@@ -124,7 +124,7 @@ config.configFile(process.argv[2], function (config, oldConfig) {
   bad_lines_seen = prefixStats + ".bad_lines_seen";
   packets_received = prefixStats + ".packets_received";
 
-  //now set to zero so we can increment them 
+  //now set to zero so we can increment them
   counters[bad_lines_seen] = 0;
   counters[packets_received] = 0;
 

From d246dab8117d3741997e0cf1879dd90d17e6071e Mon Sep 17 00:00:00 2001
From: Dieter Plaetinck <dieter@vimeo.com>
Date: Tue, 19 Feb 2013 18:13:04 -0500
Subject: [PATCH 11/14] move percentThreshold in global namespace for now

---
 exampleConfig.js | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/exampleConfig.js b/exampleConfig.js
index 31b90b66..f6e2f976 100644
--- a/exampleConfig.js
+++ b/exampleConfig.js
@@ -27,6 +27,9 @@ Optional Variables:
   debugInterval:    interval to print debug information [ms, default: 10000]
   dumpMessages:     log all incoming messages
   flushInterval:    interval (in ms) to flush to Graphite
+  percentThreshold: for time information, calculate the Nth percentile(s)
+                    (can be a single value or list of floating-point values)
+                    [%, default: 90]
   keyFlush:         log the most frequently sent keys [object, default: undefined]
     interval:       how often to log frequent keys [ms, default: 0]
     percent:        percentage of frequent keys to log [%, default: 100]
@@ -62,9 +65,6 @@ Optional Variables:
                     ["udp4" or "udp6", default: "udp4"]
 
   timer:
-    percentThreshold: calculate the Nth percentile(s)
-                    (can be a single value or list of floating-point values)
-                    [%, default: 90]
     histogram:      an array of mappings of strings (to match metrics) and
                     corresponding ordered non-inclusive upper limits of bins.
                     For all matching metrics, histograms are maintained over

From 2ca3d215f66b348f8c0c311765f83b58fbff986b Mon Sep 17 00:00:00 2001
From: Dieter Plaetinck <dieter@vimeo.com>
Date: Tue, 19 Feb 2013 18:18:55 -0500
Subject: [PATCH 12/14] implement graphite test for histograms

---
 test/graphite_tests.js | 10 +++++++---
 1 file changed, 7 insertions(+), 3 deletions(-)

diff --git a/test/graphite_tests.js b/test/graphite_tests.js
index fd67cadc..8d066589 100644
--- a/test/graphite_tests.js
+++ b/test/graphite_tests.js
@@ -76,7 +76,7 @@ module.exports = {
                ,  batch: 200 \n\
                ,  flushInterval: " + this.myflush + " \n\
                ,  percentThreshold: 90\n\
-               ,  timer: {histogram: [ { metric: \"a_test_value\", bins: [10] } ]}\n\
+               ,  histogram: [ { metric: \"a_test_value\", bins: [10] } ]\n\
                ,  port: 8125\n\
                ,  dumpMessages: false \n\
                ,  debug: false\n\
@@ -191,7 +191,7 @@ module.exports = {
   },
 
   timers_are_valid: function (test) {
-    test.expect(5);
+    test.expect(6);
 
     var testvalue = 100;
     var me = this;
@@ -215,7 +215,11 @@ module.exports = {
               var mykey = 'stats.timers.a_test_value.mean_90';
               return _.include(_.keys(post),mykey) && (post[mykey] == testvalue);
             };
-            //TODO: test here that one of the histogram datapoints is also correct
+            var testtimerhistogramvalue_test = function(post){
+              var mykey = 'stats.timers.a_test_value.histogram.bin_10';
+              return _.include(_.keys(post),mykey) && (post[mykey] == 0);
+            };
+            test.ok(_.any(hashes,testtimerhistogramvalue_test), 'stats.timers.a_test_value.mean should be ' + 0);
             test.ok(_.any(hashes,testtimervalue_test), 'stats.timers.a_test_value.mean should be ' + testvalue);
 
             var count_test = function(post, metric){

From 08eb71ac60c25f8755a600794be8c04195e4331e Mon Sep 17 00:00:00 2001
From: Dieter Plaetinck <dieter@vimeo.com>
Date: Tue, 19 Feb 2013 18:23:46 -0500
Subject: [PATCH 13/14] remove timer subsection that was never used

---
 exampleConfig.js | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/exampleConfig.js b/exampleConfig.js
index f6e2f976..b042342a 100644
--- a/exampleConfig.js
+++ b/exampleConfig.js
@@ -64,8 +64,7 @@ Optional Variables:
   repeaterProtocol: whether to use udp4 or udp6 for repeaters.
                     ["udp4" or "udp6", default: "udp4"]
 
-  timer:
-    histogram:      an array of mappings of strings (to match metrics) and
+    histogram:      for timers, an array of mappings of strings (to match metrics) and
                     corresponding ordered non-inclusive upper limits of bins.
                     For all matching metrics, histograms are maintained over
                     time by writing the frequencies for all bins.

From 2d256e279bb2d624a0af4af3496d61a70160be54 Mon Sep 17 00:00:00 2001
From: Dieter Plaetinck <dieter@vimeo.com>
Date: Tue, 19 Feb 2013 18:40:10 -0500
Subject: [PATCH 14/14] clarify histogram graphite test: use a bin that
 actually includes the value

---
 test/graphite_tests.js | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/test/graphite_tests.js b/test/graphite_tests.js
index 8d066589..bf1886a8 100644
--- a/test/graphite_tests.js
+++ b/test/graphite_tests.js
@@ -76,7 +76,7 @@ module.exports = {
                ,  batch: 200 \n\
                ,  flushInterval: " + this.myflush + " \n\
                ,  percentThreshold: 90\n\
-               ,  histogram: [ { metric: \"a_test_value\", bins: [10] } ]\n\
+               ,  histogram: [ { metric: \"a_test_value\", bins: [1000] } ]\n\
                ,  port: 8125\n\
                ,  dumpMessages: false \n\
                ,  debug: false\n\
@@ -216,10 +216,10 @@ module.exports = {
               return _.include(_.keys(post),mykey) && (post[mykey] == testvalue);
             };
             var testtimerhistogramvalue_test = function(post){
-              var mykey = 'stats.timers.a_test_value.histogram.bin_10';
-              return _.include(_.keys(post),mykey) && (post[mykey] == 0);
+              var mykey = 'stats.timers.a_test_value.histogram.bin_1000';
+              return _.include(_.keys(post),mykey) && (post[mykey] == 1);
             };
-            test.ok(_.any(hashes,testtimerhistogramvalue_test), 'stats.timers.a_test_value.mean should be ' + 0);
+            test.ok(_.any(hashes,testtimerhistogramvalue_test), 'stats.timers.a_test_value.mean should be ' + 1);
             test.ok(_.any(hashes,testtimervalue_test), 'stats.timers.a_test_value.mean should be ' + testvalue);
 
             var count_test = function(post, metric){