From b5f4f177ec2847b409fcb7f4fe5b4e50532f964d Mon Sep 17 00:00:00 2001 From: Vivian Date: Fri, 30 Mar 2018 11:24:39 -0700 Subject: [PATCH] 4.0.0: pynamo==3.2.1 boolean migration (#170) --- CHANGELOG.md | 8 + Dockerfile | 5 +- confidant/models/blind_credential.py | 4 +- confidant/models/credential.py | 4 +- confidant/models/service.py | 4 +- confidant/scripts/manage.py | 3 + confidant/scripts/migrate_bool.py | 302 +++++++++++++++++++++++ docs/source/basics/upgrade.html.markdown | 24 ++ setup.py | 2 +- 9 files changed, 348 insertions(+), 8 deletions(-) create mode 100644 confidant/scripts/migrate_bool.py diff --git a/CHANGELOG.md b/CHANGELOG.md index 9fc3b388..4a840687 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,13 @@ # Changelog +## 4.0.0 + +* This is a breaking release. This change upgrades the `LegacyBooleanAttributes` + to `BooleanAttributes`, which saves data in a new format. Once you upgrade + to this version, you must run the migrate\_bool\_attribute maintenance + script immediately after upgrading, which will convert all old data into + the new format and prevent further issues with Pynamo upgrades. + ## 3.0.0 * This is a breaking release, if you're using blind credentials. This change diff --git a/Dockerfile b/Dockerfile index 31fb32c2..743eace7 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,9 +1,12 @@ FROM ubuntu:trusty MAINTAINER Ryan Lane +RUN apt-get update && \ + apt-get install -y curl && \ + curl -sL https://deb.nodesource.com/setup_7.x | sudo -E bash - RUN apt-get update && \ # For frontend - apt-get install -y ruby-full npm nodejs nodejs-legacy git git-core && \ + apt-get install -y ruby-full nodejs git git-core && \ # For backend apt-get install -y python python-pip python-dev build-essential libffi-dev \ libxml2-dev libxmlsec1-dev diff --git a/confidant/models/blind_credential.py b/confidant/models/blind_credential.py index edc70c40..cac8e951 100644 --- a/confidant/models/blind_credential.py +++ b/confidant/models/blind_credential.py @@ -4,7 +4,7 @@ from pynamodb.attributes import ( UnicodeAttribute, NumberAttribute, - LegacyBooleanAttribute, + BooleanAttribute, UTCDateTimeAttribute, JSONAttribute ) @@ -43,7 +43,7 @@ class Meta: name = UnicodeAttribute() credential_pairs = JSONAttribute() credential_keys = NonNullUnicodeSetAttribute(default=set([]), null=True) - enabled = LegacyBooleanAttribute(default=True) + enabled = BooleanAttribute(default=True) data_key = JSONAttribute() cipher_version = NumberAttribute() cipher_type = UnicodeAttribute() diff --git a/confidant/models/credential.py b/confidant/models/credential.py index 0996ef6c..bdf38b2d 100644 --- a/confidant/models/credential.py +++ b/confidant/models/credential.py @@ -4,7 +4,7 @@ from pynamodb.attributes import ( UnicodeAttribute, NumberAttribute, - LegacyBooleanAttribute, + BooleanAttribute, UTCDateTimeAttribute, BinaryAttribute, JSONAttribute @@ -40,7 +40,7 @@ class Meta: data_type_date_index = DataTypeDateIndex() name = UnicodeAttribute() credential_pairs = UnicodeAttribute() - enabled = LegacyBooleanAttribute(default=True) + enabled = BooleanAttribute(default=True) data_key = BinaryAttribute() # TODO: add cipher_type cipher_version = NumberAttribute(null=True) diff --git a/confidant/models/service.py b/confidant/models/service.py index ba750f92..704f0348 100644 --- a/confidant/models/service.py +++ b/confidant/models/service.py @@ -5,7 +5,7 @@ UnicodeAttribute, NumberAttribute, UTCDateTimeAttribute, - LegacyBooleanAttribute + BooleanAttribute ) from pynamodb.indexes import GlobalSecondaryIndex, AllProjection @@ -39,7 +39,7 @@ class Meta: data_type = UnicodeAttribute() data_type_date_index = DataTypeDateIndex() revision = NumberAttribute() - enabled = LegacyBooleanAttribute(default=True) + enabled = BooleanAttribute(default=True) credentials = NonNullUnicodeSetAttribute(default=set()) blind_credentials = NonNullUnicodeSetAttribute(default=set()) account = UnicodeAttribute(null=True) diff --git a/confidant/scripts/manage.py b/confidant/scripts/manage.py index 3f3e8f10..53ac69e6 100644 --- a/confidant/scripts/manage.py +++ b/confidant/scripts/manage.py @@ -10,6 +10,7 @@ MigrateBlindCredentialSetAttribute, MigrateServiceSetAttribute, ) +from confidant.scripts.migrate_bool import MigrateBooleanAttribute manager = Manager(app.app) @@ -33,6 +34,8 @@ MigrateBlindCredentialSetAttribute) manager.add_command("migrate_service_set_attribute", MigrateServiceSetAttribute) +manager.add_command("migrate_boolean_attribute", + MigrateBooleanAttribute) def main(): diff --git a/confidant/scripts/migrate_bool.py b/confidant/scripts/migrate_bool.py new file mode 100644 index 00000000..7d84e49f --- /dev/null +++ b/confidant/scripts/migrate_bool.py @@ -0,0 +1,302 @@ +from confidant.app import app + +from flask.ext.script import Command, Option +import time + +from botocore.exceptions import ClientError +from pynamodb.exceptions import UpdateError +from pynamodb.expressions.operand import Path +from pynamodb.attributes import ( + UnicodeAttribute, + BooleanAttribute, +) +from pynamodb.models import Model +from confidant.models.session_cls import DDBSession +from confidant.models.connection_cls import DDBConnection + + +class GenericCredential(Model): + class Meta: + table_name = app.config.get('DYNAMODB_TABLE') + if app.config.get('DYNAMODB_URL'): + host = app.config.get('DYNAMODB_URL') + region = app.config.get('AWS_DEFAULT_REGION') + connection_cls = DDBConnection + session_cls = DDBSession + id = UnicodeAttribute(hash_key=True) + enabled = BooleanAttribute(default=True) + + +def _build_lba_filter_condition(attribute_names): + """ + Build a filter condition suitable for passing to scan/rate_limited_scan, + which will filter out any items for which none of the given attributes have + native DynamoDB type of 'N'. + """ + int_filter_condition = None + for attr_name in attribute_names: + if int_filter_condition is None: + int_filter_condition = Path(attr_name).is_type('N') + else: + int_filter_condition |= Path(attr_name).is_type('N') + + return int_filter_condition + + +def _build_actions(model_class, item, attribute_names): + """ + Build a list of actions required to update an item. + """ + actions = [] + condition = None + for attr_name in attribute_names: + if not hasattr(item, attr_name): + raise ValueError( + 'attribute {0} does not exist on model'.format(attr_name) + ) + old_value = getattr(item, attr_name) + if old_value is None: + continue + if not isinstance(old_value, bool): + raise ValueError( + 'attribute {0} does not appear to be a boolean ' + 'attribute'.format(attr_name) + ) + + actions.append(getattr(model_class, attr_name).set( + getattr(item, attr_name)) + ) + + if condition is None: + condition = Path(attr_name) == (1 if old_value else 0) + else: + condition = condition & Path(attr_name) == (1 if old_value else 0) + return actions, condition + + +def _handle_update_exception(e, item): + """ + Handle exceptions of type update. + """ + if not isinstance(e.cause, ClientError): + raise e + code = e.cause.response['Error'].get('Code') + if code == 'ConditionalCheckFailedException': + app.logger.warn( + 'conditional update failed (concurrent writes?) for object:' + ' (you will need to re-run migration)' + ) + return True + if code == 'ProvisionedThroughputExceededException': + app.logger.warn('provisioned write capacity exceeded at object:' + ' backing off (you will need to re-run migration)') + return True + raise e + + +def migrate_boolean_attributes(model_class, + attribute_names, + read_capacity_to_consume_per_second=10, + allow_scan_without_rcu=False, + mock_conditional_update_failure=False, + page_size=None, + limit=None, + number_of_secs_to_back_off=1, + max_items_updated_per_second=1.0): + """ + Migrates boolean attributes per GitHub + `issue 404 `_. + Will scan through all objects and perform a conditional update + against any items that store any of the given attribute names as + integers. Rate limiting is performed by passing an appropriate + value as ``read_capacity_to_consume_per_second`` (which defaults to + something extremely conservative and slow). + Note that updates require provisioned write capacity as + well. Please see `the DynamoDB docs + `_ + for more information. Keep in mind that there is not a simple 1:1 + mapping between provisioned read capacity and write capacity. Make + sure they are balanced. A conservative calculation would assume + that every object visted results in an update. + The function with log at level ``INFO`` the final outcome, and the + return values help identify how many items needed changing and how + many of them succeed. For example, if you had 10 items in the + table and every one of them had an attribute that needed + migration, and upon migration we had one item which failed the + migration due to a concurrent update by another writer, the return + value would be: ``(10, 1)`` + Suggesting that 9 were updated successfully. + It is suggested that the migration step be re-ran until the return + value is ``(0, 0)``. + :param model_class: + The Model class for which you are migrating. This should be the + up-to-date Model class using a BooleanAttribute for the relevant + attributes. + :param attribute_names: + List of strings that signifiy the names of attributes which are + potentially in need of migration. + :param read_capacity_to_consume_per_second: + Passed along to the underlying `rate_limited_scan` and intended as the + mechanism to rate limit progress. Please see notes below around write + capacity. + :param allow_scan_without_rcu: + Passed along to `rate_limited_scan`; intended to allow unit tests to + pass against DynamoDB Local. + :param mock_conditional_update_failure: + Only used for unit testing. When True, the conditional update + expression used internally is updated such that it is guaranteed to + fail. This is meant to trigger the code path in boto, to allow us to + unit test that we are jumping through appropriate hoops handling the + resulting failure and distinguishing it from other failures. + :param page_size: + Passed along to the underlying 'page_size'. Page size of the scan to + DynamoDB. + :param limit: + Passed along to the underlying 'limit'. Used to limit the number of + results returned. + :param number_of_secs_to_back_off: + Number of seconds to sleep when exceeding capacity. + :param max_items_updated_per_second: + An upper limit on the rate of items update per second. + :return: (number_of_items_in_need_of_update, + number_of_them_that_failed_due_to_conditional_update) + """ + app.logger.info('migrating items; no progress will be reported until ' + 'completed; this may take a while') + num_items_with_actions = 0 + num_update_failures = 0 + items_processed = 0 + time_of_last_update = 0.0 + if max_items_updated_per_second <= 0.0: + raise ValueError( + 'max_items_updated_per_second must be greater than zero' + ) + + for item in model_class.rate_limited_scan( + _build_lba_filter_condition(attribute_names), + read_capacity_to_consume_per_second=( + read_capacity_to_consume_per_second + ), + page_size=page_size, + limit=limit, + allow_rate_limited_scan_without_consumed_capacity=( + allow_scan_without_rcu + )): + items_processed += 1 + if items_processed % 1000 == 0: + app.logger.info( + 'processed items: {} Thousand'.format(items_processed/1000) + ) + + actions, condition = _build_actions(model_class, item, attribute_names) + + if not actions: + continue + + if mock_conditional_update_failure: + condition = condition & (Path('__bogus_mock_attribute') == 5) + + try: + num_items_with_actions += 1 + # Sleep amount of time to satisfy the maximum items updated per sec + # requirement + time.sleep( + max(0, 1 / max_items_updated_per_second - ( + time.time() - time_of_last_update + )) + ) + time_of_last_update = time.time() + item.update(actions=actions, condition=condition) + except UpdateError as e: + if _handle_update_exception(e, item): + num_update_failures += 1 + # In case of throttling, back off amount of seconds before + # continuing + time.sleep(number_of_secs_to_back_off) + + app.logger.info( + 'finished migrating; {} items required updates'.format( + num_items_with_actions + ) + ) + app.logger.info( + '{} items failed due to racing writes and/or exceeding capacity and ' + 'require re-running migration'.format(num_update_failures) + ) + return num_items_with_actions, num_update_failures + + +class MigrateBooleanAttribute(Command): + + option_list = ( + Option( + '--RCU', + action="store", + dest="RCU", + type=int, + default=10, + help='Read Capacity Units to be used for scan method.' + ), + Option( + '--page-size', + action="store", + dest="page_size", + type=int, + default=None, + help='Page size used in the scan.' + ), + Option( + '--limit', + action="store", + dest="limit", + type=int, + default=None, + help='Limit the number of results returned in the scan.' + ), + Option( + '--back-off', + action="store", + dest="back_off", + type=int, + default=1, + help='Number of seconds to sleep when exceeding capacity.' + ), + Option( + '--update-rate', + action="store", + dest="update_rate", + type=float, + default=1.0, + help='An upper limit on the rate of items update per second.' + ), + Option( + '--scan-without-rcu', + action="store_true", + dest="scan_without_rcu", + default=False, + help='For development purposes, allow scanning without read ' + 'capacity units' + ) + ) + + def run(self, RCU, page_size, limit, back_off, update_rate, + scan_without_rcu): + attributes = ['enabled'] + app.logger.info('RCU: {}, Page Size: {}, Limit: {}, Back off: {}, ' + 'Max update rate: {}, Attributes: {}'.format( + RCU, page_size, limit, back_off, update_rate, + attributes + )) + model = GenericCredential + res = migrate_boolean_attributes( + model, + attributes, + read_capacity_to_consume_per_second=RCU, + page_size=page_size, + limit=limit, + number_of_secs_to_back_off=back_off, + max_items_updated_per_second=update_rate, + allow_scan_without_rcu=scan_without_rcu + ) + app.logger.info(res) diff --git a/docs/source/basics/upgrade.html.markdown b/docs/source/basics/upgrade.html.markdown index f372d935..d1282fa6 100644 --- a/docs/source/basics/upgrade.html.markdown +++ b/docs/source/basics/upgrade.html.markdown @@ -44,3 +44,27 @@ new data format aren't written until you've run the maintenance script. This is useful to allow you to downgrade to an older version, if necessary. See the [maintenance mode settings docs](https://lyft.github.io/confidant/basics/configuration/#maintenance-mode-settings) for how to enable maintenance mode. + +## Upgrading to 4.0.0 + +Due to breaking changes in PynamoDB, to upgrade to 4.0.0 will require a data +migration. Before migrating to 4.0.0, ensure that you are at least on 3.0.0. + +PynamoDB changed its data model over a series of releases, which requires the +the upgrade path for Confidant to follow the same model. After performing this +data migration, the data is still compatible with 3.0.0, but not below this. + +### Peforming the data migration for 4.0.0 + +Confidant 4.0.0 ships with a maintenance script for the data migration: + +```bash +cd /srv/confidant +source venv/bin/activate + +# Encrypt the data +python manage.py migrate_boolean_attribute +``` + +This script may fail intermittently. If any failures are occur, retry the +script until all objects are fully migrated. diff --git a/setup.py b/setup.py index 2bb551b3..bf137d57 100644 --- a/setup.py +++ b/setup.py @@ -18,7 +18,7 @@ setup( name="confidant", - version="3.0.0", + version="4.0.0", packages=find_packages(exclude=["test*"]), include_package_data=True, zip_safe=False,