Merge pull request #3 from elubow/master

Code commenting and cleanup Dude, this was well done. *thanks*
BrianGallew · May 9, 2014 · 149790d · 149790d
2 parents 576fd26 + 11d1d69
commit 149790d
Show file tree

Hide file tree

Showing 2 changed files with 88 additions and 55 deletions.
diff --git a/README.md b/README.md
@@ -17,9 +17,6 @@ For clusters that have a large amount of data per node the repair process could
 ### How the script works
 The script works by figuring out the primary range for the node that it's being executed on, and instead of running repair on the entire range, run the repair on only a smaller sub-range. When a repair is initiated on a sub-range Cassandra constructs a merkle tree only for the range specified, which in turn divides the much smaller range into 15 segments. If there is disagreement in any of the hash values then a much smaller portion of data needs to be transferred which lessens load on the system.
 
-### The Future
-The functionality provided in this script should be integrated into [DataStax OpsCenter](http://www.datastax.com/what-we-offer/products-services/datastax-opscenter) version 3.2.2. The automation and scheduling provided by OpsCenter is superior to that of this script, this script should only be considered a stopgap until the OpsCenter release is available.
-
 ### Options
 
 ```
@@ -31,30 +28,28 @@ Options:
                         keyspace to repair
   -s STEPS, --steps=STEPS
                         number of discrete ranges
-  -q, --quiet           don't print status messages to stdout
 ```
 
 ### Sample
 
 ```
-⇒ ./range_repair.py -k demo
-repair over range (-9223372036854775808, 09223372036854775808] with 100 steps for keyspace demo
-step 0100 repairing range (-9223372036854775808, -9038904596117680292] for keyspace demo ...  SUCCESS
-step 0099 repairing range (-9038904596117680292, -8854437155380584776] for keyspace demo ...  SUCCESS
-step 0098 repairing range (-8854437155380584776, -8669969714643489260] for keyspace demo ...  SUCCESS
-step 0097 repairing range (-8669969714643489260, -8485502273906393744] for keyspace demo ...  SUCCESS
-step 0096 repairing range (-8485502273906393744, -8301034833169298228] for keyspace demo ...  SUCCESS
-step 0095 repairing range (-8301034833169298228, -8116567392432202712] for keyspace demo ...  SUCCESS
-step 0094 repairing range (-8116567392432202712, -7932099951695107196] for keyspace demo ...  SUCCESS
-step 0093 repairing range (-7932099951695107196, -7747632510958011680] for keyspace demo ...  SUCCESS
-step 0092 repairing range (-7747632510958011680, -7563165070220916164] for keyspace demo ...  SUCCESS
-step 0091 repairing range (-7563165070220916164, -7378697629483820648] for keyspace demo ...  SUCCESS
+$ LOG_LEVEL="DEBUG" ./range_repair.py -k demo_keyspace
+INFO       2014-05-09 17:31:33,503    get_ring_tokens                 66  : running nodetool ring, this will take a little bit of time
+DEBUG      2014-05-09 17:31:39,057    get_ring_tokens                 72  : ring tokens found, creating ring token list...
+DEBUG      2014-05-09 17:31:40,207    get_host_tokens                 86  : host tokens found, creating host token list...
+DEBUG      2014-05-09 17:31:40,208    repair_keyspace                 170 : repair over range (-2974082934175371230, -2971948823734978979] with 100 steps for keyspace demo_keyspace
+DEBUG      2014-05-09 17:31:40,208    repair_keyspace                 176 : step 0100 repairing range (-2974082934175371230, -2974061593070967308] for keyspace demo_keyspace ...
+DEBUG      2014-05-09 17:32:47,508    repair_keyspace                 182 : SUCCESS
+DEBUG      2014-05-09 17:32:47,509    repair_keyspace                 176 : step 0099 repairing range (-2974061593070967308, -2974040251966563386] for keyspace demo_keyspace ...
+DEBUG      2014-05-09 17:33:54,904    repair_keyspace                 182 : SUCCESS
 ...
 ```
 
 ### Dependencies
 -   Python 2.6
 -   Cassandra ```nodetool``` must exist in the ```PATH```
 
-### Limitations
--   Does not work with vnodes
+### History
+- Originally by [Matt Stump](https://github.com/mstump)
+- Converted to work with vnodes by [Brian Gallew](https://github.com/BrianGallew)
+- Additional functionality by [Eric Lubow](http://github.com/elubow)
diff --git a/range_repair.py b/range_repair.py
@@ -1,12 +1,31 @@
 #!/usr/bin/env python
+"""
+This script will allow for smaller repairs of Cassandra ranges.
+
+#################################################
+# success, ring_tokens, error = get_ring_tokens()
+# success, host_token, error = get_host_token()
+# range_termination = get_range_termination(host_token, ring_tokens)
+# steps = 100
+
+# print repr(is_murmur_ring(ring_tokens))
+# print repr(get_ring_tokens())
+# print repr(get_host_token())
+# print repr(get_range_termination(host_token, ring_tokens))
+# print repr(get_sub_range_generator(host_token, range_termination, steps).next())from optparse import OptionParser
+#################################################
+"""
+from optparse import OptionParser
+
+import logging
 import operator
 import optparse
 import os
 import re
 import subprocess
 import sys
 
-def lrange(num1, num2 = None, step = 1):
+def lrange(num1, num2=None, step=1):
     op = operator.__le__
 
     if num2 is None:
@@ -23,26 +42,34 @@ def lrange(num1, num2 = None, step = 1):
         num1 += step
 
 def run_command(command, *args):
+    """take the created command and actually run it on the command
+    line capturing the output
+    """
     cmd = " ".join([command] + list(args))
     proc = subprocess.Popen(cmd, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
     stdout, stderr = proc.communicate()
     return proc.returncode == 0, proc.returncode, cmd, stdout, stderr
 
 def is_murmur_ring(ring):
+    """check whether or not the ring is a Mumur3 ring
+    :param ring: ring information
+    """
     for i in ring:
         if i < 0:
             return True
-
     return False
 
 def get_ring_tokens():
+    """gets the token information for the ring
+    """
     tokens = []
-    print "Running nodetool ring, this will take a little bit of time."
+    logging.info("running nodetool ring, this will take a little bit of time")
     success, return_code, _, stdout, stderr = run_command("nodetool", "ring")
 
     if not success:
         return False, [], stderr
 
+    logging.debug("ring tokens found, creating ring token list...")
     for line in stdout.split("\n")[6:]:
         segments = line.split()
         if len(segments) == 8:
@@ -53,9 +80,10 @@ def get_ring_tokens():
 def get_host_tokens():
     success, return_code, _, stdout, stderr = run_command("nodetool", "info", "-T")
     if not success or stdout.find("Token") == -1:
-        print stdout
+        logging.error(stdout)
         return False, [], stderr
     token_list = []
+    logging.debug("host tokens found, creating host token list...")
     for line in stdout.split('\n'):
         if not 'Token' == line[:5]: continue
         parts = line.split()
@@ -66,6 +94,8 @@ def get_host_tokens():
     return True, token_list, None
 
 def get_range_termination(token, ring):
+    """get the last/largest token in the ring
+    """
     for i in ring:
         if token < i:
             return i
@@ -74,6 +104,12 @@ def get_range_termination(token, ring):
     return ring[0]
 
 def get_sub_range_generator(start, stop, steps=100):
+    """using the full range for the $start/$stop token set,
+    create a generator of $step subranges
+    :param start: beginning token in the range
+    :param stop: ending token in the range
+    :param step: number of sub-ranges to create
+    """
     step_increment = abs(stop - start) / steps
     for i in lrange(start + step_increment, stop + 1, step_increment):
         yield start, i
@@ -82,91 +118,93 @@ def get_sub_range_generator(start, stop, steps=100):
         yield start, stop
 
 def repair_range(keyspace, start, end):
+    """repair the range just for that keyspace using the manual repair
+    piece of nodetool
+    :param keyspace: cassandra keyspace to repair
+    :param start: beginning token in the range to repair
+    :param end: ending token in the range to repair
+    """
     success, return_code, cmd, stdout, stderr = \
         run_command("nodetool", "repair %s -local -snapshot -pr -st %s -et %s" % (keyspace, start, end))
-
     return success, cmd, stdout, stderr
 
+def setup_logging():
+    """Sets up logging in a syslog format by log level
+    """
+    log_format = "%(levelname) -10s %(asctime)s    %(funcName) -30s %(lineno) -5d: %(message)s"
+    log_level = os.getenv('LOG_LEVEL', 'INFO')
+    logging.basicConfig(level=logging.getLevelName(log_level), format=log_format)
+
 def format_murmur(i):
+    """format the integer for Murmur3
+    :param i: Murmr3 integer to be formatted
+    """
     return "%020d" % i
 
 def format_md5(i):
+    """format the integer for RandomPartitioner
+    :param i: RandomPartitioner integer to be formatted
+    """
     return "%039d" % i
 
-def repair_keyspace(keyspace, start_steps=100, verbose=True):
+def repair_keyspace(keyspace, start_steps=100):
+    """repair the keyspace on the node using a total of $start_steps ranges
+    :param keyspace: cassandra keyspace to repair
+    :param start_steps: break range to repair in to $start_steps (default:100)
+    """
     success, ring_tokens, error = get_ring_tokens()
     if not success:
-        print "Error fetching ring tokens"
-        print error
+        logging.error("Error fetching ring tokens: {0}".format(error))
         return False
 
     success, host_token_list, error = get_host_tokens()
     if not success:
-        print "Error fetching host token"
-        print error
+        logging.error("Error fetching host token: {0}".format(error))
         return False
 
     for host_token in host_token_list:
         steps = start_steps
         range_termination = get_range_termination(host_token, ring_tokens)
         formatter = format_murmur if is_murmur_ring(ring_tokens) else format_md5
 
-        if verbose:
-            print "repair over range (%s, %s] with %s steps for keyspace %s" % (formatter(host_token), formatter(range_termination), steps, keyspace)
+        logging.debug("repair over range (%s, %s] with %s steps for keyspace %s" % (formatter(host_token), formatter(range_termination), steps, keyspace))
 
         for start, end in get_sub_range_generator(host_token, range_termination, steps):
             start = formatter(start)
             end = formatter(end)
 
-            if verbose:
-                print "step %04d repairing range (%s, %s] for keyspace %s ... " % (steps, start, end, keyspace),
+            logging.debug("step %04d repairing range (%s, %s] for keyspace %s ... " % (steps, start, end, keyspace))
             success, cmd, stdout, stderr = repair_range(keyspace, start, end)
             if not success:
-                print "FAILED"
-                print cmd
-                print stderr
+                logging.error("FAILED: {0}".format(cmd))
+                logging.error(stderr)
                 return False
-            if verbose:
-                print "SUCCESS"
+            logging.debug("step %04d complete" % (steps))
             steps -= 1
 
     return True
 
 def main():
-    from optparse import OptionParser
-
+    """do work
+    """
     parser = OptionParser()
     parser.add_option("-k", "--keyspace", dest="keyspace",
                       help="keyspace to repair", metavar="KEYSPACE")
 
     parser.add_option("-s", "--steps", dest="steps", type="int", default=100,
                       help="number of discrete ranges", metavar="STEPS")
 
-    parser.add_option("-q", "--quiet",
-                      action="store_false", dest="verbose", default=True,
-                      help="don't print status messages to stdout")
-
     (options, args) = parser.parse_args()
 
     if not options.keyspace:
         parser.print_help()
         sys.exit(1)
 
-    if repair_keyspace(options.keyspace, options.steps, options.verbose):
+    setup_logging()
+    if repair_keyspace(options.keyspace, options.steps):
         sys.exit(0)
 
     sys.exit(2)
 
 if __name__ == '__main__':
     main()
-
-# success, ring_tokens, error = get_ring_tokens()
-# success, host_token, error = get_host_token()
-# range_termination = get_range_termination(host_token, ring_tokens)
-# steps = 100
-
-# print repr(is_murmur_ring(ring_tokens))
-# print repr(get_ring_tokens())
-# print repr(get_host_token())
-# print repr(get_range_termination(host_token, ring_tokens))
-# print repr(get_sub_range_generator(host_token, range_termination, steps).next())