Skip to content

Commit

Permalink
Merge pull request #84 from sunilk747/update-bucket
Browse files Browse the repository at this point in the history
update bucket name
  • Loading branch information
Sunil Kumar authored Jul 8, 2019
2 parents ad5f98f + b03b66b commit cfc7fb9
Show file tree
Hide file tree
Showing 11 changed files with 211 additions and 66 deletions.
25 changes: 11 additions & 14 deletions f8a_report/report_helper.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,10 +48,10 @@ def __init__(self):
self.cursor = self.pg.cursor
self.unknown_deps_helper = UnknownDepsReportHelper()
self.sentry_helper = SentryReportHelper()
self.npm_model_bucket = os.getenv('NPM_MODEL_BUCKET', 'cvae-insights')
self.maven_model_bucket = os.getenv('MAVEN_MODEL_BUCKET', 'hpf-insights')
self.pypi_model_bucket = os.getenv('PYPI_MODEL_BUCKET', 'hpf-insights')
self.golang_model_bucket = os.getenv('GOLANG_MODEL_BUCKET', 'golang-insights')
self.npm_model_bucket = os.getenv('NPM_MODEL_BUCKET')
self.maven_model_bucket = os.getenv('MAVEN_MODEL_BUCKET')
self.pypi_model_bucket = os.getenv('PYPI_MODEL_BUCKET')
self.golang_model_bucket = os.getenv('GOLANG_MODEL_BUCKET')
self.maven_training_repo = os.getenv(
'MAVEN_TRAINING_REPO', 'https://github.com/fabric8-analytics/f8a-hpf-insights')
self.npm_training_repo = os.getenv(
Expand Down Expand Up @@ -146,8 +146,7 @@ def collate_raw_data(self, unique_stacks_with_recurrence_count, frequency):
result = {}

# Get collated user input data
collated_user_input_obj_key = '{depl_prefix}/user-input-data/collated-{freq}.json'.format(
depl_prefix=self.s3.deployment_prefix, freq=frequency)
collated_user_input_obj_key = 'user-input-data/collated-{freq}.json'.format(freq=frequency)
collated_user_input = self.s3.read_json_object(bucket_name=self.s3.report_bucket_name,
obj_key=collated_user_input_obj_key) or {}

Expand All @@ -163,8 +162,7 @@ def collate_raw_data(self, unique_stacks_with_recurrence_count, frequency):
obj_key=collated_user_input_obj_key)

# Get collated big query data
collated_big_query_obj_key = '{depl_prefix}/big-query-data/collated.json'.format(
depl_prefix=self.s3.deployment_prefix)
collated_big_query_obj_key = 'big-query-data/collated.json'
collated_big_query_data = self.s3.read_json_object(bucket_name=self.s3.report_bucket_name,
obj_key=collated_big_query_obj_key) or {}

Expand Down Expand Up @@ -227,8 +225,7 @@ def store_training_data(self, result):

for eco, stack_dict in result.items():
training_data = self.get_training_data_for_ecosystem(eco, stack_dict)
obj_key = '{eco}/{depl_prefix}/{model_version}/data/manifest.json'.format(
eco=eco, depl_prefix=self.s3.deployment_prefix, model_version=model_version)
obj_key = '{model_version}/data/manifest.json'.format(model_version=model_version)

# Get the bucket name based on ecosystems to store user-input stacks for retraining
if eco == 'maven':
Expand Down Expand Up @@ -290,8 +287,8 @@ def get_ecosystem_summary(self, ecosystem, total_stack_requests, all_deps, all_u
def save_result(self, frequency, report_name, template):
"""Save result in S3 bucket."""
try:
obj_key = '{depl_prefix}/{freq}/{report_name}.json'.format(
depl_prefix=self.s3.deployment_prefix, freq=frequency, report_name=report_name
obj_key = '{freq}/{report_name}.json'.format(
freq=frequency, report_name=report_name
)
self.s3.store_json_content(content=template, obj_key=obj_key,
bucket_name=self.s3.report_bucket_name)
Expand Down Expand Up @@ -693,8 +690,8 @@ def normalize_ingestion_data(self, start_date, end_date, ingestion_data, frequen

# Saving the final report in the relevant S3 bucket
try:
obj_key = '{depl_prefix}/{type}/epv/{report_name}.json'.format(
depl_prefix=self.s3.deployment_prefix, type=report_type, report_name=report_name
obj_key = '{type}/epv/{report_name}.json'.format(
type=report_type, report_name=report_name
)
self.s3.store_json_content(content=template, obj_key=obj_key,
bucket_name=self.s3.report_bucket_name)
Expand Down
68 changes: 58 additions & 10 deletions f8a_report/s3_helper.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,14 +12,35 @@
class S3Helper:
"""Helper class for storing reports to S3."""

def __init__(self):
def __init__(self, aws_access_key_id=None, aws_secret_access_key=None, report_bucket=None):
"""Init method for the helper class."""
self.region_name = os.environ.get('AWS_S3_REGION') or 'us-east-1'
self.aws_s3_access_key = os.environ.get('AWS_S3_ACCESS_KEY_ID')
self.aws_s3_secret_access_key = os.environ.get('AWS_S3_SECRET_ACCESS_KEY')
self.aws_s3_access_key = os.environ.get('AWS_S3_ACCESS_KEY_ID') \
or aws_access_key_id
self.aws_s3_secret_access_key = os.environ.get('AWS_S3_SECRET_ACCESS_KEY') or \
aws_secret_access_key
self.aws_s3_access_key_report_bucket = report_bucket or \
os.environ.get('AWS_S3_ACCESS_KEY_ID_REPORT_BUCKET')
self.aws_s3_secret_access_key_report_bucket = \
os.environ.get('AWS_S3_SECRET_ACCESS_KEY_REPORT_BUCKET') or report_bucket
self.aws_s3_access_key_npm_bucket = \
os.environ.get('AWS_S3_ACCESS_KEY_ID_NPM_BUCKET')
self.aws_s3_secret_access_key_npm_bucket = \
os.environ.get('AWS_S3_SECRET_ACCESS_KEY_NPM_BUCKET')
self.aws_s3_access_key_mvn_bucket = \
os.environ.get('AWS_S3_ACCESS_KEY_ID_KEY_MVN_BUCKET')
self.aws_s3_secret_access_key_mvn_bucket = \
os.environ.get('AWS_S3_SECRET_ACCESS_KEY_MVN_BUCKET')
self.aws_s3_access_key_pypi_bucket = \
os.environ.get('AWS_S3_ACCESS_KEY_ID_PYPI_BUCKET')
self.aws_s3_secret_access_key_pypi_bucket = \
os.environ.get('AWS_S3_SECRET_ACCESS_KEY_PYPI_BUCKET')
self.aws_s3_access_key_golang_bucket = \
os.environ.get('AWS_S3_ACCESS_KEY_ID_GOLANG_BUCKET')
self.aws_s3_secret_access_key_golang_bucket = \
os.environ.get('AWS_S3_SECRET_ACCESS_KEY_GOLANG_BUCKET')
self.deployment_prefix = os.environ.get('DEPLOYMENT_PREFIX') or 'dev'
self.report_bucket_name = os.environ.get('REPORT_BUCKET_NAME')

if self.aws_s3_secret_access_key is None or self.aws_s3_access_key is None or\
self.region_name is None or self.deployment_prefix is None:
raise ValueError("AWS credentials or S3 configuration was "
Expand All @@ -29,23 +50,49 @@ def __init__(self):
# S3 endpoint URL is required only for local deployments
self.s3_endpoint_url = os.environ.get('S3_ENDPOINT_URL') or 'http://localhost'

self.s3 = boto3.resource('s3', region_name=self.region_name,
aws_access_key_id=self.aws_s3_access_key,
aws_secret_access_key=self.aws_s3_secret_access_key)
def s3_client(self, bucket_name):
"""Provide s3 client for each bucket."""
if bucket_name == os.environ.get('REPORT_BUCKET_NAME'):
s3 = boto3.resource('s3', region_name=self.region_name,
aws_access_key_id=self.aws_s3_access_key_report_bucket,
aws_secret_access_key=self.aws_s3_secret_access_key_report_bucket)
elif bucket_name == os.getenv('PYPI_MODEL_BUCKET'):
s3 = boto3.resource('s3', region_name=self.region_name,
aws_access_key_id=self.aws_s3_access_key_pypi_bucket,
aws_secret_access_key=self.aws_s3_secret_access_key_pypi_bucket)
elif bucket_name == os.getenv('GOLANG_MODEL_BUCKET'):
s3 = boto3.resource('s3', region_name=self.region_name,
aws_access_key_id=self.aws_s3_access_key_golang_bucket,
aws_secret_access_key=self.aws_s3_secret_access_key_golang_bucket)
elif bucket_name == os.getenv('MAVEN_MODEL_BUCKET'):
s3 = boto3.resource('s3', region_name=self.region_name,
aws_access_key_id=self.aws_s3_access_key_mvn_bucket,
aws_secret_access_key=self.aws_s3_secret_access_key_mvn_bucket)
elif bucket_name == os.getenv('NPM_MODEL_BUCKET'):
s3 = boto3.resource('s3', region_name=self.region_name,
aws_access_key_id=self.aws_s3_access_key_npm_bucket,
aws_secret_access_key=self.aws_s3_secret_access_key_npm_bucket)
else:
s3 = boto3.resource('s3', region_name=self.region_name,
aws_access_key_id=self.aws_s3_access_key,
aws_secret_access_key=self.aws_s3_secret_access_key)
return s3

def store_json_content(self, content, bucket_name, obj_key):
"""Store the report content to the S3 storage."""
s3 = self.s3_client(bucket_name)
try:
logger.info('Storing the report into the S3 file %s' % obj_key)
self.s3.Object(bucket_name, obj_key).put(
s3.Object(bucket_name, obj_key).put(
Body=json.dumps(content, indent=2).encode('utf-8'))
except Exception as e:
logger.exception('%r' % e)

def read_json_object(self, bucket_name, obj_key):
"""Get the report json object found on the S3 bucket."""
s3 = self.s3_client(bucket_name)
try:
obj = self.s3.Object(bucket_name, obj_key)
obj = s3.Object(bucket_name, obj_key)
result = json.loads(obj.get()['Body'].read().decode('utf-8'))
return result
except ClientError as e:
Expand All @@ -59,11 +106,12 @@ def read_json_object(self, bucket_name, obj_key):

def list_objects(self, bucket_name, frequency):
"""Fetch the list of objects found on the S3 bucket."""
s3 = self.s3_client(bucket_name)
prefix = '{dp}/{freq}'.format(dp=self.deployment_prefix, freq=frequency)
res = {'objects': []}

try:
for obj in self.s3.Bucket(bucket_name).objects.filter(Prefix=prefix):
for obj in s3.Bucket(bucket_name).objects.filter(Prefix=prefix):
if os.path.basename(obj.key) != '':
res['objects'].append(obj.key)
except ClientError as e:
Expand Down
4 changes: 2 additions & 2 deletions f8a_report/sentry_report_helper.py
Original file line number Diff line number Diff line change
Expand Up @@ -80,8 +80,8 @@ def normalize_sentry_data(self, start_date, end_date, errorlogs):
# Saving the final report in the relevant S3 bucket

try:
obj_key = '{depl_prefix}/{type}/{report_name}.json'.format(
depl_prefix=self.s3.deployment_prefix, type=report_type, report_name=report_name
obj_key = '{type}/{report_name}.json'.format(
type=report_type, report_name=report_name
)
self.s3.store_json_content(content=result, obj_key=obj_key,
bucket_name=self.s3.report_bucket_name)
Expand Down
4 changes: 1 addition & 3 deletions f8a_report/unknown_deps_report_helper.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,9 +19,7 @@ def get_past_unknown_deps(self):
past_date = (today - timedelta(days=1)).strftime('%Y-%m-%d')

# Get the report of the previous date
past_obj_key = '{depl_prefix}/daily/{report_name}.json'.format(
depl_prefix=self.s3.deployment_prefix, report_name=past_date
)
past_obj_key = 'daily/{report_name}.json'.format(report_name=past_date)
result = self.s3.read_json_object(bucket_name=self.s3.report_bucket_name,
obj_key=past_obj_key)

Expand Down
95 changes: 66 additions & 29 deletions openshift/template.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,56 @@ objects:
secretKeyRef:
name: aws
key: s3-access-key-id
- name: AWS_S3_SECRET_ACCESS_KEY_REPORT_BUCKET
valueFrom:
secretKeyRef:
name: developer-analytics-audit-report-s3
key: aws_secret_access_key
- name: AWS_S3_ACCESS_KEY_ID_REPORT_BUCKET
valueFrom:
secretKeyRef:
name: developer-analytics-audit-report-s3
key: aws_access_key_id
- name: AWS_S3_SECRET_ACCESS_KEY_NPM_BUCKET
valueFrom:
secretKeyRef:
name: cvae-npm-insights-s3
key: aws_secret_access_key
- name: AWS_S3_ACCESS_KEY_ID_NPM_BUCKET
valueFrom:
secretKeyRef:
name: cvae-npm-insights-s3
key: aws_access_key_id
- name: AWS_S3_SECRET_ACCESS_KEY_MVN_BUCKET
valueFrom:
secretKeyRef:
name: hpf-maven-insights-s3
key: aws_secret_access_key
- name: AWS_S3_ACCESS_KEY_ID_MVN_BUCKET
valueFrom:
secretKeyRef:
name: hpf-maven-insights-s3
key: aws_access_key_id
- name: AWS_S3_SECRET_ACCESS_KEY_PYPI_BUCKET
valueFrom:
secretKeyRef:
name: hpf-pypi-insights-s3
key: aws_secret_access_key
- name: AWS_S3_ACCESS_KEY_ID_PYPI_BUCKET
valueFrom:
secretKeyRef:
name: hpf-pypi-insights-s3
key: aws_access_key_id
- name: AWS_S3_SECRET_ACCESS_KEY_GOLANG_BUCKET
valueFrom:
secretKeyRef:
name: golang-insights-s3
key: aws_secret_access_key
- name: AWS_S3_ACCESS_KEY_ID_GOLANG_BUCKET
valueFrom:
secretKeyRef:
name: golang-insights-s3
key: aws_access_key_id
- name: DEPLOYMENT_PREFIX
valueFrom:
configMapKeyRef:
Expand Down Expand Up @@ -77,15 +127,26 @@ objects:
name: worker
key: sentry-auth-token
- name: REPORT_BUCKET_NAME
value: developer-analytics-audit-report
valueFrom:
secretKeyRef:
name: developer-analytics-audit-report-s3
key: bucket
- name: NPM_MODEL_BUCKET
value: ${NPM_MODEL_BUCKET}
secretKeyRef:

This comment has been minimized.

Copy link
@anuragtr

anuragtr Aug 26, 2019

Contributor

valueFrom: is missing

name: cvae-npm-insights-s3
key: bucket
- name: MAVEN_MODEL_BUCKET
value: ${MAVEN_MODEL_BUCKET}
secretKeyRef:
name: hpf-maven-insights-s3
key: bucket
- name: PYPI_MODEL_BUCKET
value: ${PYPI_MODEL_BUCKET}
secretKeyRef:
name: hpf-pypi-insights-s3
key: bucket
- name: GOLANG_MODEL_BUCKET
value: ${GOLANG_MODEL_BUCKET}
secretKeyRef:
name: golang-insights-s3
key: bucket
- name: GOLANG_TRAINING_REPO
value: ${GOLANG_TRAINING_REPO}
- name: MAVEN_TRAINING_REPO
Expand Down Expand Up @@ -152,30 +213,6 @@ parameters:
name: MEMORY_LIMIT
value: "1024Mi"

- description: "NPM Model Bucket to put collated data"
displayName: NPM Model Version
required: true
name: NPM_MODEL_BUCKET
value: "cvae-insights"

- description: "Maven Model Bucket to put collated data"
displayName: Maven Model Version
required: true
name: MAVEN_MODEL_BUCKET
value: "hpf-insights"

- description: "Python Model Bucket to put collated data"
displayName: Python Model Version
required: true
name: PYPI_MODEL_BUCKET
value: "hpf-insights"

- description: "Golang Model Bucket to put collated data"
displayName: Golang Model Version
required: true
name: GOLANG_MODEL_BUCKET
value: "golang-insights"

- description: "Golang Training Repo to fetch training file"
displayName: Golang Training Repo
required: true
Expand Down
6 changes: 4 additions & 2 deletions requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,9 @@
#
# pip-compile --output-file requirements.txt requirements.in
#
boto3==1.9.98
botocore==1.12.98 # via boto3, s3transfer
boto==2.49.0
boto3==1.9.181
botocore==1.12.181 # via boto3, s3transfer
certifi==2018.11.29 # via requests
chardet==3.0.4 # via requests
codecov==2.0.15
Expand All @@ -14,6 +15,7 @@ datetime==4.3
docutils==0.14 # via botocore
idna==2.8 # via requests
jmespath==0.9.3 # via boto3, botocore
moto==1.3.9
psycopg2==2.7.7
python-dateutil==2.8.0 # via botocore
pytz==2018.9 # via datetime
Expand Down
5 changes: 5 additions & 0 deletions tests/data/data.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
{
"key1": "value1",
"key2": "value2",
"key3": "value3"
}
5 changes: 5 additions & 0 deletions tests/data/dev/weekly/data.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
{
"key1": "value1",
"key2": "value2",
"key3": "value3"
}
4 changes: 3 additions & 1 deletion tests/requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -2,4 +2,6 @@ responses
pytest-cov
pytest
pytest-mock
boto3
boto3==1.9.181
moto==1.3.4
boto==2.49.0
Loading

0 comments on commit cfc7fb9

Please sign in to comment.