Skip to content

Commit

Permalink
Updates 2024-07-10 - Updated data catalogue list and addeddd new get …
Browse files Browse the repository at this point in the history
…creds module
  • Loading branch information
CHRISCARLON committed Jul 10, 2024
1 parent 456c6e3 commit 4549ebd
Show file tree
Hide file tree
Showing 2 changed files with 69 additions and 51 deletions.
34 changes: 34 additions & 0 deletions herding_cats_pipelines/lambda_jobs/get_creds.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
import boto3
import json
from loguru import logger

def get_param(parameter_name: str, region_name: str = "eu-west-2") -> str:
"""
Retrieve a parameter from AWS Systems Manager Parameter Store.
"""
ssm = boto3.client('ssm', region_name=region_name)
try:
response = ssm.get_parameter(Name=parameter_name, WithDecryption=True)
return response['Parameter']['Value']
except Exception as e:
logger.error(f"Unable to retrieve parameter: {str(e)}")
raise e

def get_secret(secret_name: str, region_name: str = "eu-west-2") -> json:
"""
Create an AWS Secrets Manager client.
Returns a JSON with env vars.
"""
session = boto3.session.Session()
client = session.client(service_name='secretsmanager',
region_name=region_name)

try:
get_secret_value_response = client.get_secret_value(SecretId=secret_name)
except Exception as e:
logger.error(f"Unable to retrieve secret: {str(e)}")
raise e
else:
secret = get_secret_value_response['SecretString']
return json.loads(secret)
86 changes: 35 additions & 51 deletions herding_cats_pipelines/lambda_jobs/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,66 +6,50 @@

from loguru import logger


def get_param(parameter_name: str, region_name: str = "eu-west-2") -> str:
"""
Retrieve a parameter from AWS Systems Manager Parameter Store.
"""
ssm = boto3.client('ssm', region_name=region_name)
try:
response = ssm.get_parameter(Name=parameter_name, WithDecryption=True)
return response['Parameter']['Value']
except Exception as e:
logger.error(f"Unable to retrieve parameter {parameter_name}: {str(e)}")
raise e

def get_secret(secret_name: str, region_name: str = "eu-west-2") -> json:
"""
Create an AWS Secrets Manager client.
Returns a JSON with env vars.
"""
session = boto3.session.Session()
client = session.client(service_name='secretsmanager',
region_name=region_name)

try:
get_secret_value_response = client.get_secret_value(SecretId=secret_name)
except Exception as e:
logger.error(f"Unable to retrieve secret {secret_name}: {str(e)}")
raise e
else:
secret = get_secret_value_response['SecretString']
return json.loads(secret)
from get_creds import get_param, get_secret
from urllib.parse import urlparse

def lambda_handler(event, context) -> json:
"""
AWS Lambda function to fetch data catalogue from London Datastore and dump it to S3
AWS Lambda function to fetch UK open data catalogues.
Loop through links and dump catalogue data to s3 bucket.
"""

catalogues_list = [
"https://data.london.gov.uk/api/action/package_search",
"https://opendata.bristol.gov.uk/api/feed/dcat-ap/2.1.1.json"
]

try:
# Fetch aws params and secrets
secret_name = get_param("herding_cats_param")
secret = get_secret(secret_name)
bucket_name = secret["herding_cats_raw_data_bucket"]

url = "https://data.london.gov.uk/api/action/package_search"
response = requests.get(url)
response.raise_for_status()
data = response.json()
print("Data Successfully Fetched")

# Dump data to S3
s3 = boto3.client('s3')
bucket_name = bucket_name
file_name = 'london_datastore.json'
# Loop through the links
for link in catalogues_list:
response = requests.get(link, timeout=15)
response.raise_for_status()
data = response.json()
logger.success(f"Data Successfully Fetched for {link}")

# Extract domain name from the link
domain = urlparse(link).netloc

# Use domain as file name
file_name = f"{domain}.json"

# Dump data to S3
s3 = boto3.client('s3')
s3.put_object(
Bucket=bucket_name,
Key=file_name,
Body=json.dumps(data),
ContentType='application/json'
)
logger.success(f"Data Was Successfully Dumped to {file_name}")

s3.put_object(
Bucket=bucket_name,
Key=file_name,
Body=json.dumps(data),
ContentType='application/json'
)
logger.success("Data Was Successfully Dumped...")

return {
'statusCode': 200,
'body': json.dumps({'message': 'Data successfully fetched and dumped to S3'})
Expand All @@ -81,4 +65,4 @@ def lambda_handler(event, context) -> json:
return {
'statusCode': 500,
'body': json.dumps({'error': f'S3 dump error: {str(e)}'})
}
}

0 comments on commit 4549ebd

Please sign in to comment.