From b2fe1954cd76977a8db8a64ca068bf287cfae938 Mon Sep 17 00:00:00 2001 From: sunilk747 Date: Fri, 14 Jun 2019 13:19:39 +0530 Subject: [PATCH 1/4] update bucket name --- f8a_report/report_helper.py | 28 +++++++++++------------- f8a_report/s3_helper.py | 6 ++--- f8a_report/sentry_report_helper.py | 4 ++-- f8a_report/unknown_deps_report_helper.py | 4 +--- openshift/template.yaml | 6 ++--- 5 files changed, 22 insertions(+), 26 deletions(-) diff --git a/f8a_report/report_helper.py b/f8a_report/report_helper.py index be78853..94ccf52 100644 --- a/f8a_report/report_helper.py +++ b/f8a_report/report_helper.py @@ -48,9 +48,9 @@ def __init__(self): self.cursor = self.pg.cursor self.unknown_deps_helper = UnknownDepsReportHelper() self.sentry_helper = SentryReportHelper() - self.npm_model_bucket = os.getenv('NPM_MODEL_BUCKET', 'cvae-insights') - self.maven_model_bucket = os.getenv('MAVEN_MODEL_BUCKET', 'hpf-insights') - self.pypi_model_bucket = os.getenv('PYPI_MODEL_BUCKET', 'hpf-insights') + self.npm_model_bucket = os.getenv('NPM_MODEL_BUCKET', 'cvae-npm-insights') + self.maven_model_bucket = os.getenv('MAVEN_MODEL_BUCKET', 'hpf-maven-insights') + self.pypi_model_bucket = os.getenv('PYPI_MODEL_BUCKET', 'hpf-pypi-insights') self.golang_model_bucket = os.getenv('GOLANG_MODEL_BUCKET', 'golang-insights') self.maven_training_repo = os.getenv( 'MAVEN_TRAINING_REPO', 'https://github.com/fabric8-analytics/f8a-hpf-insights') @@ -146,8 +146,7 @@ def collate_raw_data(self, unique_stacks_with_recurrence_count, frequency): result = {} # Get collated user input data - collated_user_input_obj_key = '{depl_prefix}/user-input-data/collated-{freq}.json'.format( - depl_prefix=self.s3.deployment_prefix, freq=frequency) + collated_user_input_obj_key = 'user-input-data/collated-{freq}.json'.format(freq=frequency) collated_user_input = self.s3.read_json_object(bucket_name=self.s3.report_bucket_name, obj_key=collated_user_input_obj_key) or {} @@ -163,8 +162,7 @@ def collate_raw_data(self, unique_stacks_with_recurrence_count, frequency): obj_key=collated_user_input_obj_key) # Get collated big query data - collated_big_query_obj_key = '{depl_prefix}/big-query-data/collated.json'.format( - depl_prefix=self.s3.deployment_prefix) + collated_big_query_obj_key = 'big-query-data/collated.json' collated_big_query_data = self.s3.read_json_object(bucket_name=self.s3.report_bucket_name, obj_key=collated_big_query_obj_key) or {} @@ -232,16 +230,16 @@ def store_training_data(self, result): # Get the bucket name based on ecosystems to store user-input stacks for retraining if eco == 'maven': - bucket_name = self.maven_model_bucket + bucket_name = self.s3.deployment_prefix + '-' + self.maven_model_bucket github_repo = self.maven_training_repo elif eco == 'pypi': - bucket_name = self.pypi_model_bucket + bucket_name = self.s3.deployment_prefix + '-' + self.pypi_model_bucket github_repo = self.pypi_training_repo elif eco == 'go': - bucket_name = self.golang_model_bucket + bucket_name = self.s3.deployment_prefix + '-' + self.golang_model_bucket github_repo = self.golang_training_repo elif eco == 'npm': - bucket_name = self.npm_model_bucket + bucket_name = self.s3.deployment_prefix + '-' + self.npm_model_bucket github_repo = self.npm_training_repo else: continue @@ -290,8 +288,8 @@ def get_ecosystem_summary(self, ecosystem, total_stack_requests, all_deps, all_u def save_result(self, frequency, report_name, template): """Save result in S3 bucket.""" try: - obj_key = '{depl_prefix}/{freq}/{report_name}.json'.format( - depl_prefix=self.s3.deployment_prefix, freq=frequency, report_name=report_name + obj_key = '{freq}/{report_name}.json'.format( + freq=frequency, report_name=report_name ) self.s3.store_json_content(content=template, obj_key=obj_key, bucket_name=self.s3.report_bucket_name) @@ -693,8 +691,8 @@ def normalize_ingestion_data(self, start_date, end_date, ingestion_data, frequen # Saving the final report in the relevant S3 bucket try: - obj_key = '{depl_prefix}/{type}/epv/{report_name}.json'.format( - depl_prefix=self.s3.deployment_prefix, type=report_type, report_name=report_name + obj_key = '{type}/epv/{report_name}.json'.format( + type=report_type, report_name=report_name ) self.s3.store_json_content(content=template, obj_key=obj_key, bucket_name=self.s3.report_bucket_name) diff --git a/f8a_report/s3_helper.py b/f8a_report/s3_helper.py index 9530bc5..1b10632 100644 --- a/f8a_report/s3_helper.py +++ b/f8a_report/s3_helper.py @@ -17,9 +17,9 @@ def __init__(self): self.region_name = os.environ.get('AWS_S3_REGION') or 'us-east-1' self.aws_s3_access_key = os.environ.get('AWS_S3_ACCESS_KEY_ID') self.aws_s3_secret_access_key = os.environ.get('AWS_S3_SECRET_ACCESS_KEY') - self.deployment_prefix = os.environ.get('DEPLOYMENT_PREFIX') or 'dev' - self.report_bucket_name = os.environ.get('REPORT_BUCKET_NAME') - + self.deployment_prefix = os.environ.get('DEPLOYMENT_PREFIX').lower() or 'dev' + self.bucket_name = os.environ.get('REPORT_BUCKET_NAME') + self.report_bucket_name = self.deployment_prefix + '-' + self.bucket_name if self.aws_s3_secret_access_key is None or self.aws_s3_access_key is None or\ self.region_name is None or self.deployment_prefix is None: raise ValueError("AWS credentials or S3 configuration was " diff --git a/f8a_report/sentry_report_helper.py b/f8a_report/sentry_report_helper.py index c1c3a5b..2af998e 100644 --- a/f8a_report/sentry_report_helper.py +++ b/f8a_report/sentry_report_helper.py @@ -80,8 +80,8 @@ def normalize_sentry_data(self, start_date, end_date, errorlogs): # Saving the final report in the relevant S3 bucket try: - obj_key = '{depl_prefix}/{type}/{report_name}.json'.format( - depl_prefix=self.s3.deployment_prefix, type=report_type, report_name=report_name + obj_key = '{type}/{report_name}.json'.format( + type=report_type, report_name=report_name ) self.s3.store_json_content(content=result, obj_key=obj_key, bucket_name=self.s3.report_bucket_name) diff --git a/f8a_report/unknown_deps_report_helper.py b/f8a_report/unknown_deps_report_helper.py index d246aad..20e912d 100644 --- a/f8a_report/unknown_deps_report_helper.py +++ b/f8a_report/unknown_deps_report_helper.py @@ -19,9 +19,7 @@ def get_past_unknown_deps(self): past_date = (today - timedelta(days=1)).strftime('%Y-%m-%d') # Get the report of the previous date - past_obj_key = '{depl_prefix}/daily/{report_name}.json'.format( - depl_prefix=self.s3.deployment_prefix, report_name=past_date - ) + past_obj_key = 'daily/{report_name}.json'.format(report_name=past_date) result = self.s3.read_json_object(bucket_name=self.s3.report_bucket_name, obj_key=past_obj_key) diff --git a/openshift/template.yaml b/openshift/template.yaml index 5b85f02..cf2ff36 100644 --- a/openshift/template.yaml +++ b/openshift/template.yaml @@ -156,19 +156,19 @@ parameters: displayName: NPM Model Version required: true name: NPM_MODEL_BUCKET - value: "cvae-insights" + value: "cvae-npm-insights" - description: "Maven Model Bucket to put collated data" displayName: Maven Model Version required: true name: MAVEN_MODEL_BUCKET - value: "hpf-insights" + value: "hpf-maven-insights" - description: "Python Model Bucket to put collated data" displayName: Python Model Version required: true name: PYPI_MODEL_BUCKET - value: "hpf-insights" + value: "hpf-pypi-insights" - description: "Golang Model Bucket to put collated data" displayName: Golang Model Version From 19dd97484edb4f9019fefbfe4065ebb0087bdbcd Mon Sep 17 00:00:00 2001 From: sunilk747 Date: Fri, 14 Jun 2019 14:03:58 +0530 Subject: [PATCH 2/4] minor fix --- f8a_report/s3_helper.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/f8a_report/s3_helper.py b/f8a_report/s3_helper.py index 1b10632..e1d541e 100644 --- a/f8a_report/s3_helper.py +++ b/f8a_report/s3_helper.py @@ -17,7 +17,8 @@ def __init__(self): self.region_name = os.environ.get('AWS_S3_REGION') or 'us-east-1' self.aws_s3_access_key = os.environ.get('AWS_S3_ACCESS_KEY_ID') self.aws_s3_secret_access_key = os.environ.get('AWS_S3_SECRET_ACCESS_KEY') - self.deployment_prefix = os.environ.get('DEPLOYMENT_PREFIX').lower() or 'dev' + self.deployment_prefix = os.environ.get('DEPLOYMENT_PREFIX') or 'dev' + self.deployment_prefix = self.deployment_prefix.lower() self.bucket_name = os.environ.get('REPORT_BUCKET_NAME') self.report_bucket_name = self.deployment_prefix + '-' + self.bucket_name if self.aws_s3_secret_access_key is None or self.aws_s3_access_key is None or\ From 865d9241ab822177d92c1ea7a5d2c1dd12adbdad Mon Sep 17 00:00:00 2001 From: sunilk747 Date: Wed, 19 Jun 2019 18:23:19 +0530 Subject: [PATCH 3/4] pick bukcet name from secrets --- f8a_report/report_helper.py | 19 +++---- f8a_report/s3_helper.py | 62 +++++++++++++++++--- openshift/template.yaml | 95 +++++++++++++++++++++---------- tests/test_stack_report_helper.py | 5 -- 4 files changed, 128 insertions(+), 53 deletions(-) diff --git a/f8a_report/report_helper.py b/f8a_report/report_helper.py index 94ccf52..0da0ebb 100644 --- a/f8a_report/report_helper.py +++ b/f8a_report/report_helper.py @@ -48,10 +48,10 @@ def __init__(self): self.cursor = self.pg.cursor self.unknown_deps_helper = UnknownDepsReportHelper() self.sentry_helper = SentryReportHelper() - self.npm_model_bucket = os.getenv('NPM_MODEL_BUCKET', 'cvae-npm-insights') - self.maven_model_bucket = os.getenv('MAVEN_MODEL_BUCKET', 'hpf-maven-insights') - self.pypi_model_bucket = os.getenv('PYPI_MODEL_BUCKET', 'hpf-pypi-insights') - self.golang_model_bucket = os.getenv('GOLANG_MODEL_BUCKET', 'golang-insights') + self.npm_model_bucket = os.getenv('NPM_MODEL_BUCKET') + self.maven_model_bucket = os.getenv('MAVEN_MODEL_BUCKET') + self.pypi_model_bucket = os.getenv('PYPI_MODEL_BUCKET') + self.golang_model_bucket = os.getenv('GOLANG_MODEL_BUCKET') self.maven_training_repo = os.getenv( 'MAVEN_TRAINING_REPO', 'https://github.com/fabric8-analytics/f8a-hpf-insights') self.npm_training_repo = os.getenv( @@ -225,21 +225,20 @@ def store_training_data(self, result): for eco, stack_dict in result.items(): training_data = self.get_training_data_for_ecosystem(eco, stack_dict) - obj_key = '{eco}/{depl_prefix}/{model_version}/data/manifest.json'.format( - eco=eco, depl_prefix=self.s3.deployment_prefix, model_version=model_version) + obj_key = '{model_version}/data/manifest.json'.format(model_version=model_version) # Get the bucket name based on ecosystems to store user-input stacks for retraining if eco == 'maven': - bucket_name = self.s3.deployment_prefix + '-' + self.maven_model_bucket + bucket_name = self.maven_model_bucket github_repo = self.maven_training_repo elif eco == 'pypi': - bucket_name = self.s3.deployment_prefix + '-' + self.pypi_model_bucket + bucket_name = self.pypi_model_bucket github_repo = self.pypi_training_repo elif eco == 'go': - bucket_name = self.s3.deployment_prefix + '-' + self.golang_model_bucket + bucket_name = self.golang_model_bucket github_repo = self.golang_training_repo elif eco == 'npm': - bucket_name = self.s3.deployment_prefix + '-' + self.npm_model_bucket + bucket_name = self.npm_model_bucket github_repo = self.npm_training_repo else: continue diff --git a/f8a_report/s3_helper.py b/f8a_report/s3_helper.py index e1d541e..6c902ab 100644 --- a/f8a_report/s3_helper.py +++ b/f8a_report/s3_helper.py @@ -17,10 +17,27 @@ def __init__(self): self.region_name = os.environ.get('AWS_S3_REGION') or 'us-east-1' self.aws_s3_access_key = os.environ.get('AWS_S3_ACCESS_KEY_ID') self.aws_s3_secret_access_key = os.environ.get('AWS_S3_SECRET_ACCESS_KEY') + self.aws_s3_access_key_report_bucket = os.environ.get('AWS_S3_ACCESS_KEY_ID_REPORT_BUCKET') + self.aws_s3_secret_access_key_report_bucket = \ + os.environ.get('AWS_S3_SECRET_ACCESS_KEY_REPORT_BUCKET') + self.aws_s3_access_key_npm_bucket = \ + os.environ.get('AWS_S3_ACCESS_KEY_ID_NPM_BUCKET') + self.aws_s3_secret_access_key_npm_bucket = \ + os.environ.get('AWS_S3_SECRET_ACCESS_KEY_NPM_BUCKET') + self.aws_s3_access_key_mvn_bucket = \ + os.environ.get('AWS_S3_ACCESS_KEY_ID_KEY_MVN_BUCKET') + self.aws_s3_secret_access_key_mvn_bucket = \ + os.environ.get('AWS_S3_SECRET_ACCESS_KEY_MVN_BUCKET') + self.aws_s3_access_key_pypi_bucket = \ + os.environ.get('AWS_S3_ACCESS_KEY_ID_PYPI_BUCKET') + self.aws_s3_secret_access_key_pypi_bucket = \ + os.environ.get('AWS_S3_SECRET_ACCESS_KEY_PYPI_BUCKET') + self.aws_s3_access_key_golang_bucket = \ + os.environ.get('AWS_S3_ACCESS_KEY_ID_GOLANG_BUCKET') + self.aws_s3_secret_access_key_golang_bucket = \ + os.environ.get('AWS_S3_SECRET_ACCESS_KEY_GOLANG_BUCKET') self.deployment_prefix = os.environ.get('DEPLOYMENT_PREFIX') or 'dev' - self.deployment_prefix = self.deployment_prefix.lower() - self.bucket_name = os.environ.get('REPORT_BUCKET_NAME') - self.report_bucket_name = self.deployment_prefix + '-' + self.bucket_name + self.report_bucket_name = os.environ.get('REPORT_BUCKET_NAME') if self.aws_s3_secret_access_key is None or self.aws_s3_access_key is None or\ self.region_name is None or self.deployment_prefix is None: raise ValueError("AWS credentials or S3 configuration was " @@ -30,23 +47,49 @@ def __init__(self): # S3 endpoint URL is required only for local deployments self.s3_endpoint_url = os.environ.get('S3_ENDPOINT_URL') or 'http://localhost' - self.s3 = boto3.resource('s3', region_name=self.region_name, - aws_access_key_id=self.aws_s3_access_key, - aws_secret_access_key=self.aws_s3_secret_access_key) + def s3_client(self, bucket_name): + """Provide s3 client for each bucket.""" + if bucket_name == 'developer-analytics-audit-report-s3': + s3 = boto3.resource('s3', region_name=self.region_name, + aws_access_key_id=self.aws_s3_access_key_report_bucket, + aws_secret_access_key=self.aws_s3_secret_access_key_report_bucket) + elif bucket_name == 'hpf-pypi-insights-s3': + s3 = boto3.resource('s3', region_name=self.region_name, + aws_access_key_id=self.aws_s3_access_key_pypi_bucket, + aws_secret_access_key=self.aws_s3_secret_access_key_pypi_bucket) + elif bucket_name == 'golang-insights-s3': + s3 = boto3.resource('s3', region_name=self.region_name, + aws_access_key_id=self.aws_s3_access_key_golang_bucket, + aws_secret_access_key=self.aws_s3_secret_access_key_golang_bucket) + elif bucket_name == 'hpf-maven-insights-s3': + s3 = boto3.resource('s3', region_name=self.region_name, + aws_access_key_id=self.aws_s3_access_key_mvn_bucket, + aws_secret_access_key=self.aws_s3_secret_access_key_mvn_bucket) + elif bucket_name == 'cvae-npm-insights-s3': + s3 = boto3.resource('s3', region_name=self.region_name, + aws_access_key_id=self.aws_s3_access_key_npm_bucket, + aws_secret_access_key=self.aws_s3_secret_access_key_npm_bucket) + else: + s3 = boto3.resource('s3', region_name=self.region_name, + aws_access_key_id=self.aws_s3_access_key, + aws_secret_access_key=self.aws_s3_secret_access_key) + return s3 def store_json_content(self, content, bucket_name, obj_key): """Store the report content to the S3 storage.""" + s3 = self.s3_client(bucket_name) try: logger.info('Storing the report into the S3 file %s' % obj_key) - self.s3.Object(bucket_name, obj_key).put( + s3.Object(bucket_name, obj_key).put( Body=json.dumps(content, indent=2).encode('utf-8')) except Exception as e: logger.exception('%r' % e) def read_json_object(self, bucket_name, obj_key): """Get the report json object found on the S3 bucket.""" + s3 = self.s3_client(bucket_name) try: - obj = self.s3.Object(bucket_name, obj_key) + obj = s3.Object(bucket_name, obj_key) result = json.loads(obj.get()['Body'].read().decode('utf-8')) return result except ClientError as e: @@ -60,11 +103,12 @@ def read_json_object(self, bucket_name, obj_key): def list_objects(self, bucket_name, frequency): """Fetch the list of objects found on the S3 bucket.""" + s3 = self.s3_client(bucket_name) prefix = '{dp}/{freq}'.format(dp=self.deployment_prefix, freq=frequency) res = {'objects': []} try: - for obj in self.s3.Bucket(bucket_name).objects.filter(Prefix=prefix): + for obj in s3.Bucket(bucket_name).objects.filter(Prefix=prefix): if os.path.basename(obj.key) != '': res['objects'].append(obj.key) except ClientError as e: diff --git a/openshift/template.yaml b/openshift/template.yaml index cf2ff36..0313e74 100644 --- a/openshift/template.yaml +++ b/openshift/template.yaml @@ -44,6 +44,56 @@ objects: secretKeyRef: name: aws key: s3-access-key-id + - name: AWS_S3_SECRET_ACCESS_KEY_REPORT_BUCKET + valueFrom: + secretKeyRef: + name: developer-analytics-audit-report-s3 + key: aws_secret_access_key + - name: AWS_S3_ACCESS_KEY_ID_REPORT_BUCKET + valueFrom: + secretKeyRef: + name: developer-analytics-audit-report-s3 + key: aws_access_key_id + - name: AWS_S3_SECRET_ACCESS_KEY_NPM_BUCKET + valueFrom: + secretKeyRef: + name: cvae-npm-insights-s3 + key: aws_secret_access_key + - name: AWS_S3_ACCESS_KEY_ID_NPM_BUCKET + valueFrom: + secretKeyRef: + name: cvae-npm-insights-s3 + key: aws_access_key_id + - name: AWS_S3_SECRET_ACCESS_KEY_MVN_BUCKET + valueFrom: + secretKeyRef: + name: hpf-maven-insights-s3 + key: aws_secret_access_key + - name: AWS_S3_ACCESS_KEY_ID_MVN_BUCKET + valueFrom: + secretKeyRef: + name: hpf-maven-insights-s3 + key: aws_access_key_id + - name: AWS_S3_SECRET_ACCESS_KEY_PYPI_BUCKET + valueFrom: + secretKeyRef: + name: hpf-pypi-insights-s3 + key: aws_secret_access_key + - name: AWS_S3_ACCESS_KEY_ID_PYPI_BUCKET + valueFrom: + secretKeyRef: + name: hpf-pypi-insights-s3 + key: aws_access_key_id + - name: AWS_S3_SECRET_ACCESS_KEY_GOLANG_BUCKET + valueFrom: + secretKeyRef: + name: golang-insights-s3 + key: aws_secret_access_key + - name: AWS_S3_ACCESS_KEY_ID_GOLANG_BUCKET + valueFrom: + secretKeyRef: + name: golang-insights-s3 + key: aws_access_key_id - name: DEPLOYMENT_PREFIX valueFrom: configMapKeyRef: @@ -77,15 +127,26 @@ objects: name: worker key: sentry-auth-token - name: REPORT_BUCKET_NAME - value: developer-analytics-audit-report + valueFrom: + secretKeyRef: + name: developer-analytics-audit-report-s3 + key: bucket - name: NPM_MODEL_BUCKET - value: ${NPM_MODEL_BUCKET} + secretKeyRef: + name: cvae-npm-insights-s3 + key: bucket - name: MAVEN_MODEL_BUCKET - value: ${MAVEN_MODEL_BUCKET} + secretKeyRef: + name: hpf-maven-insights-s3 + key: bucket - name: PYPI_MODEL_BUCKET - value: ${PYPI_MODEL_BUCKET} + secretKeyRef: + name: hpf-pypi-insights-s3 + key: bucket - name: GOLANG_MODEL_BUCKET - value: ${GOLANG_MODEL_BUCKET} + secretKeyRef: + name: golang-insights-s3 + key: bucket - name: GOLANG_TRAINING_REPO value: ${GOLANG_TRAINING_REPO} - name: MAVEN_TRAINING_REPO @@ -152,30 +213,6 @@ parameters: name: MEMORY_LIMIT value: "1024Mi" -- description: "NPM Model Bucket to put collated data" - displayName: NPM Model Version - required: true - name: NPM_MODEL_BUCKET - value: "cvae-npm-insights" - -- description: "Maven Model Bucket to put collated data" - displayName: Maven Model Version - required: true - name: MAVEN_MODEL_BUCKET - value: "hpf-maven-insights" - -- description: "Python Model Bucket to put collated data" - displayName: Python Model Version - required: true - name: PYPI_MODEL_BUCKET - value: "hpf-pypi-insights" - -- description: "Golang Model Bucket to put collated data" - displayName: Golang Model Version - required: true - name: GOLANG_MODEL_BUCKET - value: "golang-insights" - - description: "Golang Training Repo to fetch training file" displayName: Golang Training Repo required: true diff --git a/tests/test_stack_report_helper.py b/tests/test_stack_report_helper.py index 5675ea8..50fc5e3 100644 --- a/tests/test_stack_report_helper.py +++ b/tests/test_stack_report_helper.py @@ -239,11 +239,6 @@ def test_populate_key_count_failure(): assert e.value == 'TypeError("unhashable type: \'list\'",)' -def test_s3helper(): - """Test the failure scenario of the __init__ method of the class S3Helper.""" - assert s.s3 is not None - - @mock.patch('f8a_report.report_helper.S3Helper.store_json_content', return_value=True) def test_store_training_data(_mock1): """Test the success scenario for storing Retraining Data in their respective buckets.""" From b03b66b2c4c83161ebc002c4cacdd044009e3776 Mon Sep 17 00:00:00 2001 From: sunilk747 Date: Mon, 1 Jul 2019 00:38:55 +0530 Subject: [PATCH 4/4] increase test coverage --- f8a_report/s3_helper.py | 23 ++++++++------ requirements.txt | 6 ++-- tests/data/data.json | 5 +++ tests/data/dev/weekly/data.json | 5 +++ tests/requirements.txt | 4 ++- tests/test_s3_helper.py | 56 +++++++++++++++++++++++++++++++++ 6 files changed, 86 insertions(+), 13 deletions(-) create mode 100644 tests/data/data.json create mode 100644 tests/data/dev/weekly/data.json create mode 100644 tests/test_s3_helper.py diff --git a/f8a_report/s3_helper.py b/f8a_report/s3_helper.py index 6c902ab..964f920 100644 --- a/f8a_report/s3_helper.py +++ b/f8a_report/s3_helper.py @@ -12,14 +12,17 @@ class S3Helper: """Helper class for storing reports to S3.""" - def __init__(self): + def __init__(self, aws_access_key_id=None, aws_secret_access_key=None, report_bucket=None): """Init method for the helper class.""" self.region_name = os.environ.get('AWS_S3_REGION') or 'us-east-1' - self.aws_s3_access_key = os.environ.get('AWS_S3_ACCESS_KEY_ID') - self.aws_s3_secret_access_key = os.environ.get('AWS_S3_SECRET_ACCESS_KEY') - self.aws_s3_access_key_report_bucket = os.environ.get('AWS_S3_ACCESS_KEY_ID_REPORT_BUCKET') + self.aws_s3_access_key = os.environ.get('AWS_S3_ACCESS_KEY_ID') \ + or aws_access_key_id + self.aws_s3_secret_access_key = os.environ.get('AWS_S3_SECRET_ACCESS_KEY') or \ + aws_secret_access_key + self.aws_s3_access_key_report_bucket = report_bucket or \ + os.environ.get('AWS_S3_ACCESS_KEY_ID_REPORT_BUCKET') self.aws_s3_secret_access_key_report_bucket = \ - os.environ.get('AWS_S3_SECRET_ACCESS_KEY_REPORT_BUCKET') + os.environ.get('AWS_S3_SECRET_ACCESS_KEY_REPORT_BUCKET') or report_bucket self.aws_s3_access_key_npm_bucket = \ os.environ.get('AWS_S3_ACCESS_KEY_ID_NPM_BUCKET') self.aws_s3_secret_access_key_npm_bucket = \ @@ -49,23 +52,23 @@ def __init__(self): def s3_client(self, bucket_name): """Provide s3 client for each bucket.""" - if bucket_name == 'developer-analytics-audit-report-s3': + if bucket_name == os.environ.get('REPORT_BUCKET_NAME'): s3 = boto3.resource('s3', region_name=self.region_name, aws_access_key_id=self.aws_s3_access_key_report_bucket, aws_secret_access_key=self.aws_s3_secret_access_key_report_bucket) - elif bucket_name == 'hpf-pypi-insights-s3': + elif bucket_name == os.getenv('PYPI_MODEL_BUCKET'): s3 = boto3.resource('s3', region_name=self.region_name, aws_access_key_id=self.aws_s3_access_key_pypi_bucket, aws_secret_access_key=self.aws_s3_secret_access_key_pypi_bucket) - elif bucket_name == 'golang-insights-s3': + elif bucket_name == os.getenv('GOLANG_MODEL_BUCKET'): s3 = boto3.resource('s3', region_name=self.region_name, aws_access_key_id=self.aws_s3_access_key_golang_bucket, aws_secret_access_key=self.aws_s3_secret_access_key_golang_bucket) - elif bucket_name == 'hpf-maven-insights-s3': + elif bucket_name == os.getenv('MAVEN_MODEL_BUCKET'): s3 = boto3.resource('s3', region_name=self.region_name, aws_access_key_id=self.aws_s3_access_key_mvn_bucket, aws_secret_access_key=self.aws_s3_secret_access_key_mvn_bucket) - elif bucket_name == 'cvae-npm-insights-s3': + elif bucket_name == os.getenv('NPM_MODEL_BUCKET'): s3 = boto3.resource('s3', region_name=self.region_name, aws_access_key_id=self.aws_s3_access_key_npm_bucket, aws_secret_access_key=self.aws_s3_secret_access_key_npm_bucket) diff --git a/requirements.txt b/requirements.txt index 293a1e3..adb26a2 100644 --- a/requirements.txt +++ b/requirements.txt @@ -4,8 +4,9 @@ # # pip-compile --output-file requirements.txt requirements.in # -boto3==1.9.98 -botocore==1.12.98 # via boto3, s3transfer +boto==2.49.0 +boto3==1.9.181 +botocore==1.12.181 # via boto3, s3transfer certifi==2018.11.29 # via requests chardet==3.0.4 # via requests codecov==2.0.15 @@ -14,6 +15,7 @@ datetime==4.3 docutils==0.14 # via botocore idna==2.8 # via requests jmespath==0.9.3 # via boto3, botocore +moto==1.3.9 psycopg2==2.7.7 python-dateutil==2.8.0 # via botocore pytz==2018.9 # via datetime diff --git a/tests/data/data.json b/tests/data/data.json new file mode 100644 index 0000000..e7cf986 --- /dev/null +++ b/tests/data/data.json @@ -0,0 +1,5 @@ +{ + "key1": "value1", + "key2": "value2", + "key3": "value3" +} \ No newline at end of file diff --git a/tests/data/dev/weekly/data.json b/tests/data/dev/weekly/data.json new file mode 100644 index 0000000..e7cf986 --- /dev/null +++ b/tests/data/dev/weekly/data.json @@ -0,0 +1,5 @@ +{ + "key1": "value1", + "key2": "value2", + "key3": "value3" +} \ No newline at end of file diff --git a/tests/requirements.txt b/tests/requirements.txt index 1c334fe..d99a1e5 100644 --- a/tests/requirements.txt +++ b/tests/requirements.txt @@ -2,4 +2,6 @@ responses pytest-cov pytest pytest-mock -boto3 +boto3==1.9.181 +moto==1.3.4 +boto==2.49.0 diff --git a/tests/test_s3_helper.py b/tests/test_s3_helper.py new file mode 100644 index 0000000..dca118c --- /dev/null +++ b/tests/test_s3_helper.py @@ -0,0 +1,56 @@ +"""Tests for classes from s3_helper module.""" + +from f8a_report.s3_helper import S3Helper +from moto import mock_s3 +import boto3 + +BUCKET = 'test_bucket' +AWS_KEY = 'fake_key' +AWS_SECRET = 'fake_secret' + + +def test_s3_helper(): + """Test to validate the s3_helper constructor function.""" + assert S3Helper() + + +def test_s3_client(): + """Test to validate the s3 client method.""" + S3 = S3Helper(aws_access_key_id=AWS_KEY, aws_secret_access_key=AWS_SECRET) + s3 = S3.s3_client(BUCKET) + assert s3 + + +@mock_s3 +def test_store_json_content(): + """Test to validate store_json method.""" + s3 = boto3.resource('s3') + s3.create_bucket(Bucket=BUCKET) + S3 = S3Helper(aws_access_key_id=AWS_KEY, aws_secret_access_key=AWS_SECRET) + S3.store_json_content({"keyA": "valueB"}, BUCKET, 'dummy.json') + + +@mock_s3 +def test_read_json_object(): + """Test to validate read_json method.""" + s3 = boto3.resource('s3') + s3.create_bucket(Bucket=BUCKET) + S3 = S3Helper(aws_access_key_id=AWS_KEY, aws_secret_access_key=AWS_SECRET) + s3.meta.client.upload_file('tests/data/data.json', BUCKET, 'data.json') + data = S3.read_json_object(BUCKET, 'data.json') + assert data.get("key1") == "value1" + data = S3.read_json_object('dummy', 'data.json') + assert data is None + + +@mock_s3 +def test_list_objects(): + """Test to validate list_object method.""" + s3 = boto3.resource('s3') + s3.create_bucket(Bucket=BUCKET) + S3 = S3Helper(aws_access_key_id=AWS_KEY, aws_secret_access_key=AWS_SECRET) + s3.meta.client.upload_file('tests/data/dev/weekly/data.json', BUCKET, 'dev/weekly/data.json') + obj = S3.list_objects(BUCKET, 'weekly') + assert len(obj['objects']) > 0 + data = S3.list_objects('dummy', 'weekly') + assert len(data['objects']) == 0