diff --git a/.projen/deps.json b/.projen/deps.json index d7aed525..76571022 100644 --- a/.projen/deps.json +++ b/.projen/deps.json @@ -136,6 +136,11 @@ "name": "typescript", "type": "build" }, + { + "name": "@aws-cdk/aws-lambda-python-alpha", + "version": "2.178.0-alpha.0", + "type": "bundled" + }, { "name": "deepmerge", "type": "bundled" diff --git a/.projen/tasks.json b/.projen/tasks.json index 5a8e841c..ff4dfad9 100644 --- a/.projen/tasks.json +++ b/.projen/tasks.json @@ -825,7 +825,7 @@ "exec": "yarn install --check-files" }, { - "exec": "yarn upgrade @aws-cdk/assert @aws-cdk/integ-tests-alpha @commitlint/config-conventional @mrgrain/jsii-struct-builder @stylistic/eslint-plugin @types/jest @types/node @typescript-eslint/eslint-plugin @typescript-eslint/parser aws-cdk aws-sdk-mock commit-and-tag-version commitlint eslint-import-resolver-typescript eslint-plugin-import eslint-plugin-license-header eslint husky jest jest-junit jsii-diff jsii-pacmak jsii-rosetta jsii pinst projen ts-jest ts-node typedoc typedoc-plugin-markdown typescript deepmerge aws-cdk-lib constructs cdk-nag" + "exec": "yarn upgrade @aws-cdk/assert @aws-cdk/integ-tests-alpha @commitlint/config-conventional @mrgrain/jsii-struct-builder @stylistic/eslint-plugin @types/jest @types/node @typescript-eslint/eslint-plugin @typescript-eslint/parser aws-cdk aws-sdk-mock commit-and-tag-version commitlint eslint-import-resolver-typescript eslint-plugin-import eslint-plugin-license-header eslint husky jest jest-junit jsii-diff jsii-pacmak jsii-rosetta jsii pinst projen ts-jest ts-node typedoc typedoc-plugin-markdown typescript @aws-cdk/aws-lambda-python-alpha deepmerge aws-cdk-lib constructs cdk-nag" }, { "exec": "npx projen" diff --git a/.projenrc.ts b/.projenrc.ts index b6098956..d427e46c 100644 --- a/.projenrc.ts +++ b/.projenrc.ts @@ -72,7 +72,10 @@ const project = new awscdk.AwsCdkConstructLibrary({ `@aws-cdk/integ-tests-alpha@${CDK_VERSION}-alpha.0`, ], deps: ['cdk-nag'], - bundledDeps: ['deepmerge'], + bundledDeps: [ + 'deepmerge', + `@aws-cdk/aws-lambda-python-alpha@${CDK_VERSION}-alpha.0`, + ], // Keep synchronized with https://github.com/nodejs/release#release-schedule minNodeVersion: '18.12.0', // 'MAINTENANCE' (first LTS) maxNodeVersion: '22.x', // 'CURRENT' diff --git a/README.md b/README.md index 55078feb..14d6cb0b 100644 --- a/README.md +++ b/README.md @@ -134,6 +134,7 @@ The following constructs are available in the library: | [SageMaker model deployment (Hugging Face)](./src/patterns/gen-ai/aws-model-deployment-sagemaker/README_hugging_face.md) | Deploy a foundation model from Hugging Face to an Amazon SageMaker endpoint. | Amazon SageMaker | | [SageMaker model deployment (Custom)](./src/patterns/gen-ai/aws-model-deployment-sagemaker/README_custom_sagemaker_endpoint.md) | Deploy a foundation model from an S3 location to an Amazon SageMaker endpoint. | Amazon SageMaker | | [Amazon Bedrock Monitoring (Amazon CloudWatch Dashboard)](./src/patterns/gen-ai/aws-bedrock-cw-dashboard/README.md) | Amazon CloudWatch dashboard to monitor model usage from Amazon Bedrock. | Amazon CloudWatch | +| [Bedrock Data Automation](./src/patterns/gen-ai/aws-bedrock-data-automation/README.md) | Use Amazon bedrock data automation client to to build and manage intelligent document processing, media analysis, and other multimodal data-centric automation solutions | AWS Lambda, Amazon S3 bucket | | [Bedrock Batch Step Functions](./src/patterns/gen-ai/aws-bedrock-batch-stepfn/README.md) | Manage Bedrock model invocation jobs(batch inference) in AWS Step Functions state machines | AWS Step Functions, AWS Lambda, AWS EventBridge, Amazon Bedrock, AWS IAM | ### L2 Constructs diff --git a/apidocs/README.md b/apidocs/README.md index a6be52ad..be3e3f61 100644 --- a/apidocs/README.md +++ b/apidocs/README.md @@ -23,6 +23,7 @@ - [BaseClass](classes/BaseClass.md) - [BedrockBatchSfn](classes/BedrockBatchSfn.md) - [BedrockCwDashboard](classes/BedrockCwDashboard.md) +- [BedrockDataAutomation](classes/BedrockDataAutomation.md) - [ContainerImage](classes/ContainerImage.md) - [CustomSageMakerEndpoint](classes/CustomSageMakerEndpoint.md) - [DeepLearningContainerImage](classes/DeepLearningContainerImage.md) @@ -39,6 +40,7 @@ - [BaseClassProps](interfaces/BaseClassProps.md) - [BedrockBatchSfnProps](interfaces/BedrockBatchSfnProps.md) - [BedrockCwDashboardProps](interfaces/BedrockCwDashboardProps.md) +- [BedrockDataAutomationProps](interfaces/BedrockDataAutomationProps.md) - [CollectionMonitoringProps](interfaces/CollectionMonitoringProps.md) - [ContainerImageConfig](interfaces/ContainerImageConfig.md) - [CustomSageMakerEndpointProps](interfaces/CustomSageMakerEndpointProps.md) diff --git a/apidocs/classes/BaseClass.md b/apidocs/classes/BaseClass.md index a1cd6055..b1362d6d 100644 --- a/apidocs/classes/BaseClass.md +++ b/apidocs/classes/BaseClass.md @@ -13,6 +13,7 @@ ## Extended by - [`SageMakerEndpointBase`](SageMakerEndpointBase.md) +- [`BedrockDataAutomation`](BedrockDataAutomation.md) ## Constructors diff --git a/apidocs/classes/BedrockDataAutomation.md b/apidocs/classes/BedrockDataAutomation.md new file mode 100644 index 00000000..3411c202 --- /dev/null +++ b/apidocs/classes/BedrockDataAutomation.md @@ -0,0 +1,355 @@ +[**@cdklabs/generative-ai-cdk-constructs**](../README.md) + +*** + +[@cdklabs/generative-ai-cdk-constructs](../README.md) / BedrockDataAutomation + +# Class: BedrockDataAutomation + +Class to create a BDA pattern with CDK. + +## Extends + +- [`BaseClass`](BaseClass.md) + +## Constructors + +### new BedrockDataAutomation() + +> **new BedrockDataAutomation**(`scope`, `id`, `props`): [`BedrockDataAutomation`](BedrockDataAutomation.md) + +#### Parameters + +##### scope + +`Construct` + +##### id + +`string` + +##### props + +[`BedrockDataAutomationProps`](../interfaces/BedrockDataAutomationProps.md) + +#### Returns + +[`BedrockDataAutomation`](BedrockDataAutomation.md) + +#### Overrides + +[`BaseClass`](BaseClass.md).[`constructor`](BaseClass.md#constructors) + +## Properties + +### bdaBlueprintLambdaFunction? + +> `readonly` `optional` **bdaBlueprintLambdaFunction**: `Function` + +The Lambda function responsible for creating the Bedrock Data Automation blueprint. +IMPORTANT: If isCustomBDABlueprintRequired is set to false in Pattern Construct Props, +this property will be undefined + +*** + +### bdaInputBucket? + +> `readonly` `optional` **bdaInputBucket**: `IBucket` + +The S3 bucket for input data used by the Bedrock Data Automation process. +IMPORTANT: If isCustomBDABlueprintRequired or isBDAInvocationRequired are set to false in Pattern Construct Props, +this property will be undefined + +*** + +### bdaInvocationFunction? + +> `readonly` `optional` **bdaInvocationFunction**: `Function` + +The Lambda function responsible for invoking the Bedrock Data Automation process. +IMPORTANT: If isBDAInvocationRequired is set to false in Pattern Construct Props, +this property will be undefined + +*** + +### bdaOutputBucket? + +> `readonly` `optional` **bdaOutputBucket**: `IBucket` + +The S3 bucket for output data generated by the Bedrock Data Automation process. +IMPORTANT: If isBDAInvocationRequired is set to false in Pattern Construct Props, +this property will be undefined + +*** + +### bdaProjectFunction? + +> `readonly` `optional` **bdaProjectFunction**: `Function` + +The Lambda function responsible for handling the Bedrock Data Automation project. +IMPORTANT: If isBDAProjectRequired is set to false in Pattern Construct Props, +this property will be undefined + +*** + +### bdaResultStatusFunction? + +> `readonly` `optional` **bdaResultStatusFunction**: `Function` + +The Lambda function responsible for checking the status of the Bedrock Data Automation process. +IMPORTANT: If isStatusRequired is set to false in Pattern Construct Props, +this property will be undefined + +*** + +### boto3Layer + +> `readonly` **boto3Layer**: `LayerVersion` + +The Boto3 layer used in the Lambda functions for AWS SDK interactions. + +*** + +### constructUsageMetric + +> `readonly` **constructUsageMetric**: `"uksb-1tupboc45"` = `'uksb-1tupboc45'` + +construct usage metric , added in template description + +#### Inherited from + +[`BaseClass`](BaseClass.md).[`constructUsageMetric`](BaseClass.md#constructusagemetric) + +*** + +### enablexray + +> **enablexray**: `boolean` = `true` + +enable disable xray tracing + +#### Default + +```ts +- True +``` + +#### Inherited from + +[`BaseClass`](BaseClass.md).[`enablexray`](BaseClass.md#enablexray) + +*** + +### fieldLogLevel + +> **fieldLogLevel**: `FieldLogLevel` = `appsync.FieldLogLevel.ALL` + +Default log config for all constructs + +#### Inherited from + +[`BaseClass`](BaseClass.md).[`fieldLogLevel`](BaseClass.md#fieldloglevel) + +*** + +### lambdaTracing + +> **lambdaTracing**: `Tracing` = `lambda.Tracing.ACTIVE` + +enable disable lambda tracing + +#### Default + +```ts +- Active +``` + +#### Inherited from + +[`BaseClass`](BaseClass.md).[`lambdaTracing`](BaseClass.md#lambdatracing) + +*** + +### node + +> `readonly` **node**: `Node` + +The tree node. + +#### Inherited from + +[`BaseClass`](BaseClass.md).[`node`](BaseClass.md#node) + +*** + +### powertoolsLayer + +> `readonly` **powertoolsLayer**: `ILayerVersion` + +The AWS Lambda Powertools layer used in the Lambda functions. + +*** + +### retention + +> **retention**: `RetentionDays` = `logs.RetentionDays.TEN_YEARS` + +Default log retention config for all constructs + +#### Inherited from + +[`BaseClass`](BaseClass.md).[`retention`](BaseClass.md#retention) + +*** + +### stage + +> **stage**: `string` + +Value will be appended to resources name. + +#### Default + +```ts +- _dev +``` + +#### Inherited from + +[`BaseClass`](BaseClass.md).[`stage`](BaseClass.md#stage) + +*** + +### usageMetricMap + +> `protected` `static` **usageMetricMap**: `Record`\<`string`, `number`\> + +Record , maps construct name with number of deployments + +#### Inherited from + +[`BaseClass`](BaseClass.md).[`usageMetricMap`](BaseClass.md#usagemetricmap) + +## Methods + +### addObservabilityToConstruct() + +> `protected` **addObservabilityToConstruct**(`props`): `void` + +#### Parameters + +##### props + +[`BaseClassProps`](../interfaces/BaseClassProps.md) + +#### Returns + +`void` + +#### Inherited from + +[`BaseClass`](BaseClass.md).[`addObservabilityToConstruct`](BaseClass.md#addobservabilitytoconstruct) + +*** + +### toString() + +> **toString**(): `string` + +Returns a string representation of this construct. + +#### Returns + +`string` + +#### Inherited from + +[`BaseClass`](BaseClass.md).[`toString`](BaseClass.md#tostring) + +*** + +### updateConstructUsageMetricCode() + +> `protected` **updateConstructUsageMetricCode**(`props`, `scope`, `lambdaFunctions`): `void` + +#### Parameters + +##### props + +[`BaseClassProps`](../interfaces/BaseClassProps.md) + +##### scope + +`Construct` + +##### lambdaFunctions + +`DockerImageFunction`[] + +#### Returns + +`void` + +#### Inherited from + +[`BaseClass`](BaseClass.md).[`updateConstructUsageMetricCode`](BaseClass.md#updateconstructusagemetriccode) + +*** + +### updateEnvSuffix() + +> `protected` **updateEnvSuffix**(`props`): `void` + +#### Parameters + +##### props + +[`BaseClassProps`](../interfaces/BaseClassProps.md) + +#### Returns + +`void` + +#### Inherited from + +[`BaseClass`](BaseClass.md).[`updateEnvSuffix`](BaseClass.md#updateenvsuffix) + +*** + +### isConstruct() + +> `static` **isConstruct**(`x`): `x is Construct` + +Checks if `x` is a construct. + +Use this method instead of `instanceof` to properly detect `Construct` +instances, even when the construct library is symlinked. + +Explanation: in JavaScript, multiple copies of the `constructs` library on +disk are seen as independent, completely different libraries. As a +consequence, the class `Construct` in each copy of the `constructs` library +is seen as a different class, and an instance of one class will not test as +`instanceof` the other class. `npm install` will not create installations +like this, but users may manually symlink construct libraries together or +use a monorepo tool: in those cases, multiple copies of the `constructs` +library can be accidentally installed, and `instanceof` will behave +unpredictably. It is safest to avoid using `instanceof`, and using +this type-testing method instead. + +#### Parameters + +##### x + +`any` + +Any object + +#### Returns + +`x is Construct` + +true if `x` is an object created from a class which extends `Construct`. + +#### Inherited from + +[`BaseClass`](BaseClass.md).[`isConstruct`](BaseClass.md#isconstruct) diff --git a/apidocs/interfaces/BedrockDataAutomationProps.md b/apidocs/interfaces/BedrockDataAutomationProps.md new file mode 100644 index 00000000..0a3d4ea4 --- /dev/null +++ b/apidocs/interfaces/BedrockDataAutomationProps.md @@ -0,0 +1,88 @@ +[**@cdklabs/generative-ai-cdk-constructs**](../README.md) + +*** + +[@cdklabs/generative-ai-cdk-constructs](../README.md) / BedrockDataAutomationProps + +# Interface: BedrockDataAutomationProps + +Properties for creating a CDK BDA construct. + +## Properties + +### inputBucket? + +> `readonly` `optional` **inputBucket**: `IBucket` + +- Optional. The S3 bucket +for input data used by the Bedrock Data Automation process. +If not provided, a new bucket will be created. + +*** + +### isBDAInvocationRequired? + +> `readonly` `optional` **isBDAInvocationRequired**: `boolean` + +- Optional. Indicates whether a Bedrock Data +Automation invocation is required. If true, the necessary resources will be created. + +#### Default + +```ts +- false +``` + +*** + +### isBDAProjectRequired? + +> `readonly` `optional` **isBDAProjectRequired**: `boolean` + +- Optional. Indicates whether a Bedrock Data +Automation project is required. If true, the necessary resources will be created. + +#### Default + +```ts +- false +``` + +*** + +### isCustomBDABlueprintRequired? + +> `readonly` `optional` **isCustomBDABlueprintRequired**: `boolean` + +- Optional. Indicates whether a custom +Bedrock Data Automation blueprint is required. If true, the necessary resources will be created. + +#### Default + +```ts +- false +``` + +*** + +### isStatusRequired? + +> `readonly` `optional` **isStatusRequired**: `boolean` + +- Optional. Indicates whether the status of the +Bedrock Data Automation process is required. If true, the necessary resources will be created. + +#### Default + +```ts +- false +``` + +*** + +### outputBucket? + +> `readonly` `optional` **outputBucket**: `IBucket` + +- Optional. The S3 bucket for storing +output files generated by the Bedrock Data Automation process. diff --git a/lambda/aws-bedrock-data-automation/create-blueprint/create_blueprint.py b/lambda/aws-bedrock-data-automation/create-blueprint/create_blueprint.py new file mode 100644 index 00000000..5a022ba9 --- /dev/null +++ b/lambda/aws-bedrock-data-automation/create-blueprint/create_blueprint.py @@ -0,0 +1,405 @@ +# Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"). You may not use this file except in compliance +# with the License. A copy of the License is located at +# http://www.apache.org/licenses/LICENSE-2.0 +# +# or in the 'license' file accompanying this file. This file is distributed on an 'AS IS' BASIS, WITHOUT WARRANTIES +# OR CONDITIONS OF ANY KIND, express or implied. See the License for the specific language governing permissions +# and limitations under the License. +# +from enum import Enum +import json +from typing import Any, Dict +import uuid +import boto3 +from aws_lambda_powertools import Logger,Metrics,Tracer +from custom_blueprint_schema import BlueprintStage,BlueprintType + +logger = Logger() +tracer = Tracer() +metrics = Metrics(namespace="CREATE_BLUEPRINT") + +bda_client = boto3.client("bedrock-data-automation") + +class ResourceOwner(Enum): + """Enum for valid resource owner filters""" + SERVICE = "SERVICE" + ACCOUNT = "ACCOUNT" + + +def create_blueprint(schema_content,blueprint_details): + """ + Create a blueprint using boto3 client + """ + + try: + blueprint_default_name = f"custom_blueprint_{str(uuid.uuid4())[:8]}" + blueprint_name = blueprint_details.get('blueprint_name', blueprint_default_name) + + + blueprint_stage = blueprint_details.get('blueprint_stage', BlueprintStage.LIVE) + if blueprint_stage not in [stage.value for stage in BlueprintStage]: + raise ValueError(f"Invalid blueprint stage: {blueprint_stage}. Must be one of {[stage.value for stage in BlueprintStage]}") + + blueprint_type = blueprint_details.get('blueprint_type', BlueprintType.DOCUMENT) + if blueprint_type not in [stage.value for stage in BlueprintType]: + raise ValueError(f"Invalid blueprint type: {blueprint_type}. Must be one of {[stage.value for stage in BlueprintType]}") + + + client_token =blueprint_details.get('client_token','') + encryption_config = blueprint_details.get('encryption_config','') + + + # Prepare request parameters + request_params = { + 'blueprintName': blueprint_name, + 'type': blueprint_type, + 'blueprintStage': blueprint_stage, + 'schema': schema_content, + } + + # Add client token if provided + if client_token: + request_params['clientToken'] = client_token + + print("get message") + + logger.info("Creating blueprint", extra={ + "blueprint_name": blueprint_name, + "blueprint_type": blueprint_type, + "blueprint_stage": blueprint_stage, + "schema_content": schema_content + }) + + # Add encryption configuration if provided + if encryption_config: + request_params['encryptionConfiguration'] = json.loads(encryption_config) + + # Create blueprint + response = bda_client.create_blueprint(**request_params) + blueprint_arn = response["blueprint"]["blueprintArn"] + + logger.info("Successfully created blueprint", extra={ + "response": response, + "blueprint_arn": blueprint_arn + }) + + return { + 'statusCode': 200, + 'body': json.dumps({ + 'message': 'Blueprint created successfully', + 'blueprint_arn': blueprint_arn + }) + } + + except bda_client.exceptions.ConflictException as ce: + logger.warning("Blueprint already exists. Getting existing blueprint.", extra={ + "error": str(ce) + }) + + blueprints = bda_client.list_blueprints(blueprintStageFilter="ALL")["blueprints"] + try: + blueprint_arn = next( + ( + blueprint["blueprintArn"] + for blueprint in blueprints + if "blueprintName" in blueprint + and blueprint["blueprintName"] == blueprint_name + ) + ) + logger.info("Retrieved existing blueprint", extra={ + "blueprint_arn": blueprint_arn + }) + return blueprint_arn + + except StopIteration: + logger.error("Error getting the blueprint ARN") + raise ce + + except Exception as e: + logger.error("Error creating blueprint", extra={ + "error": str(e) + }) + return { + 'statusCode': 500, + 'body': json.dumps({ + 'message': 'Error creating blueprint', + 'error': str(e) + }) + } + +def list_blueprints(detail) -> dict: + """ + List available Bedrock blueprints based on specified filters. + + Args: + resource_owner (str, optional): Filter blueprints by owner. + Valid values: 'SELF', 'AWS', 'ALL'. Defaults to None. + blueprint_stage_filter (str, optional): Filter blueprints by stage. + Valid values: 'DRAFT', 'LIVE', 'ALL'. Defaults to None. + next_token (str, optional): Token for pagination. Defaults to None. + max_results (int, optional): Maximum number of results to return. + Must be between 1 and 100. Defaults to None. + + Returns: + dict: Response containing list of blueprints and pagination details. + Format: { + 'blueprintSummaries': [...], + 'nextToken': str + } + + Raises: + ValueError: If invalid values are provided for resource_owner or blueprint_stage_filter + ClientError: If AWS API call fails + """ + try: + request_params = {} + + blueprint_stage = detail.get('blueprint_stage') + if blueprint_stage not in [stage.value for stage in BlueprintStage]: + raise ValueError(f"Invalid blueprint stage: {blueprint_stage}. Must be one of {[stage.value for stage in BlueprintStage]}") + + resource_owner = detail.get('resource_owner') + if resource_owner not in [stage.value for stage in ResourceOwner]: + raise ValueError(f"Invalid resource owner: {resource_owner}. Must be one of {[stage.value for stage in ResourceOwner]}") + + if 'blueprint_arn' in detail: + request_params['blueprint_arn'] = detail['blueprint_arn'] + + if 'max_results' in detail: + request_params['max_results'] = detail.get('max_results', 1) + + if 'next_token' in detail: + request_params['next_token'] = detail['next_token'] + + if 'project_arn' in detail: + request_params['project_arn'] = detail['project_arn'] + + if 'project_stage' in detail: + request_params['project_stage'] = detail['project_stage'] + + + + # Log request parameters for debugging + logger.info("Listing blueprints with params", extra={"params": request_params}) + + # Make API call to list blueprints + response = bda_client.list_blueprints(**request_params) + + # Log success and return response + logger.info( + "Successfully retrieved blueprints", + extra={"blueprint_count": len(response.get('blueprintSummaries', []))} + ) + logger.info("List blueprints response", extra={"response": response}) + return response + + except bda_client.exceptions.ValidationException as e: + logger.error("Validation error in list_blueprints", extra={"error": str(e)}) + raise e + except Exception as e: + logger.error("Unexpected error in list_blueprints", extra={"error": str(e)}) + raise e + + +def update_blueprint(blueprint_details): + """ + Update a blueprint using boto3 client + """ + + try: + blueprint_arn = blueprint_details.get('blueprint_arn') + if blueprint_arn is None: + raise ValueError("Blueprint ARN is required to update a blueprint") + + schema = blueprint_details.get('schema') + if schema is None: + raise ValueError("schema is required to update a blueprint") + + blueprint_stage = blueprint_details.get('blueprint_stage', BlueprintStage.LIVE) + if blueprint_stage not in [stage.value for stage in BlueprintStage]: + raise ValueError(f"Invalid blueprint stage: {blueprint_stage}. Must be one of {[stage.value for stage in BlueprintStage]}") + + + # Prepare request parameters + request_params = { + 'blueprintArn': blueprint_arn, + 'schema': schema, + 'blueprintStage': blueprint_stage, + } + + logger.info("Updating blueprint", extra={ + "request_params": request_params, + }) + + + # Update blueprint + response = bda_client.update_blueprint(**request_params) + + logger.info("Successfully updated blueprint", extra={ + "response": response, + "blueprint_arn": blueprint_arn + }) + + return { + 'statusCode': 200, + 'body': json.dumps({ + 'message': 'Blueprint updated successfully', + 'response': response + }) + } + + except Exception as e: + logger.error("Error updating blueprint", extra={ + "error": str(e) + }) + return { + 'statusCode': 500, + 'body': json.dumps({ + 'message': 'Error updating blueprint', + 'error': str(e) + }) + } + + +def get_blueprint(blueprint_details): + """ + get blueprint using boto3 client + """ + try: + blueprint_arn = blueprint_details.get('blueprint_arn') + if blueprint_arn is None: + raise ValueError("Blueprint ARN is required to create a version") + + blueprint_version = blueprint_details.get('blueprint_version') + + blueprint_stage = blueprint_details.get('blueprint_stage') + if blueprint_stage not in [stage.value for stage in BlueprintStage]: + raise ValueError(f"Invalid blueprint stage: {blueprint_stage}. Must be one of {[stage.value for stage in BlueprintStage]}") + + + # Prepare request parameters + request_params = { + 'blueprintArn': blueprint_arn, + + } + + if blueprint_version: + request_params['blueprintVersion'] = blueprint_version + if blueprint_stage: + request_params['blueprintStage'] = blueprint_stage + + + logger.info("get blueprint", extra={ + "request_params": request_params + }) + + # Create blueprint + response = bda_client.get_blueprint(**request_params) + + logger.info("Successfully get blueprint", extra={ + "response": response, + }) + + return { + 'statusCode': 200, + 'body': json.dumps({ + 'message': 'Blueprint fetched successfully', + 'response': response + }) + } + + except Exception as e: + logger.error("Error fetching blueprint ", extra={ + "error": str(e) + }) + return { + 'statusCode': 500, + 'body': json.dumps({ + 'message': 'Error creating blueprint version', + 'error': str(e) + }) + } + + + +def delete_blueprint(blueprint_arn: str, blueprint_version: str = None) -> Dict[str, Any]: + """ + Delete a blueprint or specific version of a blueprint + + Args: + blueprint_arn (str): ARN of the blueprint to delete + blueprint_version (str, optional): Version of the blueprint to delete + + Returns: + Dict[str, Any]: Response containing status and message + """ + try: + + logger.info("Deleting blueprint", extra={ + "blueprint_arn": blueprint_arn, + "blueprint_version": blueprint_version + }) + + delete_params = { + 'blueprintArn': blueprint_arn + } + + # Add version if provided + if blueprint_version: + delete_params['blueprintVersion'] = blueprint_version + + # Delete blueprint + response = bda_client.delete_blueprint(**delete_params) + + logger.info("Successfully deleted blueprint", extra={ + "response": response + }) + + return { + 'statusCode': 200, + 'body': json.dumps({ + 'message': 'Blueprint deleted successfully', + }) + } + + except bda_client.exceptions.ResourceNotFoundException as e: + logger.error("Blueprint not found", extra={ + 'blueprint_arn': blueprint_arn, + 'blueprint_version': blueprint_version + }) + return { + 'statusCode': 404, + 'body': json.dumps({ + 'message': 'Blueprint not found', + 'error': str(e) + }) + } + + except bda_client.exceptions.ValidationException as e: + logger.error("Validation error", extra={ + 'blueprint_arn': blueprint_arn, + 'blueprint_version': blueprint_version + }) + return { + 'statusCode': 400, + 'body': json.dumps({ + 'message': 'Validation error while deleting blueprint', + 'error': str(e) + }) + } + + except Exception as e: + logger.error("Error deleting blueprint", extra={ + 'blueprint_arn': blueprint_arn, + 'blueprint_version': blueprint_version, + 'error': str(e) + }) + return { + 'statusCode': 500, + 'body': json.dumps({ + 'message': 'Error deleting blueprint', + 'error': str(e) + }) + } diff --git a/lambda/aws-bedrock-data-automation/create-blueprint/create_blueprint_version.py b/lambda/aws-bedrock-data-automation/create-blueprint/create_blueprint_version.py new file mode 100644 index 00000000..e3a3d0f8 --- /dev/null +++ b/lambda/aws-bedrock-data-automation/create-blueprint/create_blueprint_version.py @@ -0,0 +1,75 @@ +# Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"). You may not use this file except in compliance +# with the License. A copy of the License is located at +# http://www.apache.org/licenses/LICENSE-2.0 +# +# or in the 'license' file accompanying this file. This file is distributed on an 'AS IS' BASIS, WITHOUT WARRANTIES +# OR CONDITIONS OF ANY KIND, express or implied. See the License for the specific language governing permissions +# and limitations under the License. +# +import json +import boto3 +from aws_lambda_powertools import Logger,Metrics,Tracer + +logger = Logger() +tracer = Tracer() +metrics = Metrics(namespace="CREATE_BLUEPRINT_VERSION") + +bda_client = boto3.client("bedrock-data-automation") + + + +def create_blueprint_version(blueprint_details): + """ + Create a blueprint version using boto3 client + """ + + try: + blueprint_arn = blueprint_details.get('blueprint_arn') + if blueprint_arn is None: + raise ValueError("Blueprint ARN is required to create a version") + + client_token =blueprint_details.get('client_token','') + + # Prepare request parameters + request_params = { + 'blueprintArn': blueprint_arn, + } + + # Add client token if provided + if client_token: + request_params['clientToken'] = client_token + + + logger.info("Creating blueprint", extra={ + "blueprintArn": blueprint_arn, + }) + + # Create blueprint + response = bda_client.create_blueprint_version(**request_params) + + logger.info("Successfully created blueprint", extra={ + "response": response, + }) + + return { + 'statusCode': 200, + 'body': json.dumps({ + 'message': 'Blueprint created successfully', + 'response': response + }) + } + + except Exception as e: + logger.error("Error creating blueprint version", extra={ + "error": str(e) + }) + return { + 'statusCode': 500, + 'body': json.dumps({ + 'message': 'Error creating blueprint version', + 'error': str(e) + }) + } + diff --git a/lambda/aws-bedrock-data-automation/create-blueprint/create_schema.py b/lambda/aws-bedrock-data-automation/create-blueprint/create_schema.py new file mode 100644 index 00000000..2c742cea --- /dev/null +++ b/lambda/aws-bedrock-data-automation/create-blueprint/create_schema.py @@ -0,0 +1,52 @@ + +# Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"). You may not use this file except in compliance +# with the License. A copy of the License is located at +# http://www.apache.org/licenses/LICENSE-2.0 +# +# or in the 'license' file accompanying this file. This file is distributed on an 'AS IS' BASIS, WITHOUT WARRANTIES +# OR CONDITIONS OF ANY KIND, express or implied. See the License for the specific language governing permissions +# and limitations under the License. +# +from typing import Annotated, List, Dict, Type +from custom_blueprint_schema import create_schema_fields,custom_blue_print + +def create_schema(fields: List[Dict[str, str]]) -> Type[custom_blue_print]: + """ + Create a schema class based on a list of field configurations. + + Args: + fields (List[Dict[str, str]]): List of field configurations + Format: [ + { + "name": "field_name", + "description": "field description", + "alias": "field alias" + } + ] + + Returns: + Type[BaseBlueprintSchema]: A new schema class + """ + annotations = {} + + # Process each field in the list + for field in fields: + field_name = field["name"] + annotations[field_name] = Annotated[ + List[str], + create_schema_fields( + description=field["description"], + alias=field["alias"] + ) + ] + + # Create a new schema class dynamically + return type( + "DynamicSchema", + (custom_blue_print,), + { + "__annotations__": annotations + } + ) \ No newline at end of file diff --git a/lambda/aws-bedrock-data-automation/create-blueprint/custom_blueprint_schema.py b/lambda/aws-bedrock-data-automation/create-blueprint/custom_blueprint_schema.py new file mode 100644 index 00000000..6ccdb044 --- /dev/null +++ b/lambda/aws-bedrock-data-automation/create-blueprint/custom_blueprint_schema.py @@ -0,0 +1,87 @@ +# Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"). You may not use this file except in compliance +# with the License. A copy of the License is located at +# http://www.apache.org/licenses/LICENSE-2.0 +# +# or in the 'license' file accompanying this file. This file is distributed on an 'AS IS' BASIS, WITHOUT WARRANTIES +# OR CONDITIONS OF ANY KIND, express or implied. See the License for the specific language governing permissions +# and limitations under the License. +# +from pydantic import BaseModel, Field +from enum import Enum + +class BlueprintStage(str, Enum): + DEVELOPMENT = "DEVELOPMENT" + LIVE = "LIVE" + +class BlueprintType(str, Enum): + DOCUMENT = "DOCUMENT" + IMAGE = "IMAGE" + +class custom_blue_print(BaseModel): + """ + Base model class for creating custom blueprints with standardized schema configuration. + Inherits from Pydantic's BaseModel for data validation and serialization. + """ + + # Default configuration for all schema instances + model_config = { + "title": None, + "json_schema_extra": { + # Specify JSON Schema version and default metadata + "$schema": "http://json-schema.org/draft-07/schema#", + "description": "default", + "documentClass": "default", + }, + } + + @classmethod + def model_json_schema(cls, *args, **kwargs) -> dict: + """ + Customizes the JSON schema generation for the model. + + Args: + *args: Variable length argument list passed to parent schema generator + **kwargs: Arbitrary keyword arguments passed to parent schema generator + + Returns: + dict: Modified JSON schema with customized property handling + """ + # Get base schema from parent class without aliases + schema = super().model_json_schema(*args, by_alias=False, **kwargs) + + # Remove top-level title if present + schema.pop("title", None) + + # Process each field in the model + properties = {} + for field_name, field in cls.model_fields.items(): + field_schema = schema["properties"][field_name] + alias = field.alias or field_name + field_schema.pop("title", None) + properties[alias] = field_schema + + schema["properties"] = properties + return schema + +def create_schema_fields(description: str, alias: str | None = None): + """ + Creates a field configuration for extractive schema fields. + + Args: + description (str): Description of what should be extracted from the document + alias (str | None): Optional alternative name for the field in the output + + Returns: + Field: Configured Pydantic Field with extractive properties + """ + return Field( + default_factory=list, + description=description, + alias=alias, + json_schema_extra={ + "inferenceType": "extractive", + "items": {"type": "string"}, + }, + ) diff --git a/lambda/aws-bedrock-data-automation/create-blueprint/lambda.py b/lambda/aws-bedrock-data-automation/create-blueprint/lambda.py new file mode 100644 index 00000000..56175758 --- /dev/null +++ b/lambda/aws-bedrock-data-automation/create-blueprint/lambda.py @@ -0,0 +1,225 @@ +# Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"). You may not use this file except in compliance +# with the License. A copy of the License is located at +# http://www.apache.org/licenses/LICENSE-2.0 +# +# or in the 'license' file accompanying this file. This file is distributed on an 'AS IS' BASIS, WITHOUT WARRANTIES +# OR CONDITIONS OF ANY KIND, express or implied. See the License for the specific language governing permissions +# and limitations under the License. +# +from enum import Enum +import os +import json +from typing import Any, Dict +import boto3 +from aws_lambda_powertools import Logger, Tracer, Metrics +from aws_lambda_powertools.utilities.typing import LambdaContext +from aws_lambda_powertools.utilities.data_classes import EventBridgeEvent, APIGatewayProxyEvent +from create_blueprint import create_blueprint,delete_blueprint,list_blueprints,get_blueprint,update_blueprint +from create_schema import create_schema + + +logger = Logger() +tracer = Tracer() +metrics = Metrics(namespace="CREATE_BLUEPRINT") + +input_bucket = os.environ.get('INPUT_BUCKET') + +class OperationType(str, Enum): + CREATE_BLUEPRINT = "CREATE" + DELETE_BLUEPRINT = "DELETE" + LIST_BLUEPRINTS = "LIST" + UPDATE_BLUEPRINT = "UPDATE" + GET_BLUEPRINT = "GET" + + +def process_event_bridge_event(event: Dict[str, Any]) -> Dict[str, Any]: + """ + Process EventBridge events + """ + event_bridge_event = EventBridgeEvent(event) + logger.info("Received EventBridge event", extra={ + "detail_type": event_bridge_event.detail_type, + "source": event_bridge_event.source, + "detail":event_bridge_event.detail + }) + + return event_bridge_event.detail + +def process_api_gateway_event(event: Dict[str, Any]) -> Dict[str, Any]: + """ + Process API Gateway events + """ + api_event = APIGatewayProxyEvent(event) + logger.info("Received API Gateway event", extra={ + "body":api_event.body + }) + + if not api_event.body: + raise ValueError("Request body is required") + + try: + return json.loads(api_event.body) + except json.JSONDecodeError as e: + logger.error("Invalid JSON in request body", extra={"error": str(e)}) + raise ValueError("Invalid JSON in request body") + + + +def get_schema(bucket_name: str, schema_key: str) -> Dict[str, Any]: + """ + Get and parse schema JSON from S3 bucket + + Args: + bucket_name: Name of the S3 bucket containing the schema + schema_key: Object key (path) of the schema file in S3 + + Returns: + Dict[str, Any]: Parsed schema content + + Raises: + ValueError: If schema file is not found or invalid JSON + ClientError: If S3 access fails + """ + try: + logger.info("Retrieving schema from S3", extra={ + "bucket": bucket_name, + "key": schema_key + }) + s3_client = boto3.client('s3') + response = s3_client.get_object( + Bucket=bucket_name, + Key=schema_key + ) + schema_content = json.loads(response['Body'].read().decode('utf-8')) + logger.info("Successfully retrieved and parsed schema", extra={ + "schema_size": len(json.dumps(schema_content)), + "bucket": bucket_name, + "key": schema_key + }) + + return schema_content + + except Exception as e: + logger.error("Unexpected error retrieving schema", extra={ + "bucket": bucket_name, + "key": schema_key, + "error": str(e) + }) + raise + +@logger.inject_lambda_context +def handler(event, context: LambdaContext): + """ + Lambda handler function + """ + try: + logger.info(f"Received event: {json.dumps(event)}") + if event.get("source") and event.get("detail-type"): + blueprint_details = process_event_bridge_event(event) + else: + blueprint_details = process_api_gateway_event(event) + + + schema_content="" + operation_type = blueprint_details.get('operation', 'CREATE') + if operation_type not in [stage.value for stage in OperationType]: + raise ValueError(f"Invalid operation type: {operation_type}. Must be one of {[stage.value for stage in OperationType]}") + + status_code = 200 + match operation_type.lower(): + case "delete": + logger.info(f"deleteing blueprint {blueprint_details}") + blueprint_arn = blueprint_details.get('blueprint_arn') + blueprint_version = blueprint_details.get('blueprint_version') + + if not blueprint_arn: + raise ValueError("blueprint_arn is required for delete operation") + + response= delete_blueprint(blueprint_arn, blueprint_version) + response_msg='Blueprint deleted successfully' + case "list": + logger.info("Listing all blueprints") + response= list_blueprints(blueprint_details) + response_msg='Blueprints fetched successfully' + + case "get": + logger.info(f"Get blueprint {blueprint_details}") + response= get_blueprint(blueprint_details) + response_msg='Blueprint fetched successfully' + + case "update": + logger.info(f"update blueprint {blueprint_details}") + response= update_blueprint(blueprint_details) + response_msg='Blueprint updated successfully' + + case "create": + logger.info("create blueprint") + + if 'schema_file_name' in blueprint_details: + input_key = blueprint_details['schema_file_name'] + + logger.info(f"Retrieving schema from S3: {input_bucket}/{input_key}") + schema_content = get_schema(input_bucket, input_key) + + if 'schema_fields' in blueprint_details: + schema_fields = blueprint_details['schema_fields'] + + # Validate schema_fields format + if not isinstance(schema_fields, list): + raise ValueError("schema_fields must be a list of field configurations") + + # Validate each field has required properties + for field in schema_fields: + if not all(key in field for key in ['name', 'description', 'alias']): + raise ValueError("Each field must contain 'name', 'description', and 'alias'") + + # Create schema using the fields + try: + DynamicSchema = create_schema(schema_fields) + schema_instance = DynamicSchema() + schema_content = json.dumps(schema_instance.model_json_schema()) + + except Exception as e: + print("Error creating schema") + return { + 'statusCode': 500, + 'body': json.dumps({ + 'message': 'Error creating schema', + 'error': str(e) + }) + } + + if not schema_content or not schema_content.strip(): + response_msg = "Schema content cannot be empty or blank" + logger.error(response_msg) + else: + response= create_blueprint(schema_content,blueprint_details) + response_msg='Blueprint created successfully' + + case _: + response_msg = (f"Unknown operation type: {operation_type}. " + "The supported operations are - create, update, delete, list and get.") + logger.warning(response_msg) + status_code = 400 + + return { + 'status_code': status_code, + 'body': json.dumps({ + 'message': response_msg, + 'response': response + }) + } + + + except Exception as e: + print(f"Unexpected error: {str(e)}") + return { + 'statusCode': 500, + 'body': json.dumps({ + 'message': 'Unexpected error occurred', + 'error': str(e) + }) + } + diff --git a/lambda/aws-bedrock-data-automation/create_project/create_project.py b/lambda/aws-bedrock-data-automation/create_project/create_project.py new file mode 100644 index 00000000..02731d29 --- /dev/null +++ b/lambda/aws-bedrock-data-automation/create_project/create_project.py @@ -0,0 +1,210 @@ + +# Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"). You may not use this file except in compliance +# with the License. A copy of the License is located at +# http://www.apache.org/licenses/LICENSE-2.0 +# +# or in the 'license' file accompanying this file. This file is distributed on an 'AS IS' BASIS, WITHOUT WARRANTIES +# OR CONDITIONS OF ANY KIND, express or implied. See the License for the specific language governing permissions +# and limitations under the License. +# +import json +import boto3 +from typing import Dict, Any +from aws_lambda_powertools import Logger,Metrics,Tracer +from botocore.exceptions import ClientError +from project_config import ProjectConfig + +logger = Logger() +tracer = Tracer() +metrics = Metrics(namespace="CREATE_PROJECT") + +bda_client = boto3.client("bedrock-data-automation") + + + +def create_project(project_details: dict) -> str: + """Create a data automation project""" + try: + project_config = ProjectConfig(project_details) + + logger.info("Creating project with configuration", extra={"config": project_config.project_config}) + + response = bda_client.create_data_automation_project(**project_config.project_config) + + project_arn = response["projectArn"] + logger.info("Project created successfully", extra={"project_arn": project_arn}) + + return project_arn + + except ClientError as e: + error_code = e.response.get('Error', {}).get('Code', 'Unknown') + error_message = e.response.get('Error', {}).get('Message', str(e)) + logger.error("AWS API error creating project", extra={ + "error_code": error_code, + "error_message": error_message + }) + raise + + except Exception as e: + logger.error("Error creating project", extra={"error": str(e)}) + raise + + +def get_project(project_details): + """ + get project using boto3 client + """ + try: + project_arn = project_details.get('projectArn') + if project_arn is None: + raise ValueError("Project ARN is required to get a project") + + project_stage = project_details.get('projectStage') + + + # Prepare request parameters + request_params = { + 'projectArn': project_arn, + + } + + if project_stage: + request_params['projectStage'] = project_stage + + logger.info("get project", extra={ + "request_params": request_params + }) + + response = bda_client.get_data_automation_project(**request_params) + + logger.info("Successfully get project", extra={ + "response": response, + }) + + return { + 'statusCode': 200, + 'body': json.dumps({ + 'message': 'data automation project fetched successfully', + 'response': response + }) + } + + except Exception as e: + logger.error("Error fetching project ", extra={ + "error": str(e) + }) + return { + 'statusCode': 500, + 'body': json.dumps({ + 'message': 'Error fetching project', + 'error': str(e) + }) + } + + +def delete_project(project_arn: str) -> None: + """ + Delete a data automation project + + Args: + project_arn: ARN of the project to delete + """ + try: + logger.info("Deleting project", extra={"project_arn": project_arn}) + + bda_client.delete_data_automation_project(projectArn=project_arn) + + logger.info("Project deleted successfully", extra={"project_arn": project_arn}) + + except ClientError as e: + error_code = e.response.get('Error', {}).get('Code', 'Unknown') + error_message = e.response.get('Error', {}).get('Message', str(e)) + logger.error("AWS API error deleting project", extra={ + "project_arn": project_arn, + "error_code": error_code, + "error_message": error_message + }) + raise + + except Exception as e: + logger.error("Error deleting project", extra={ + "project_arn": project_arn, + "error": str(e) + }) + raise + + +def update_project(project_details: dict) -> Dict[str, Any]: + """Update a data automation project + + Args: + project_details: Dictionary containing project configuration and ARN + + Returns: + Dict containing response with status code and message + + Raises: + ValueError: If project ARN is missing or invalid + ClientError: If AWS API call fails + """ + try: + project_arn = project_details.get('projectArn') + if not project_arn: + raise ValueError("Project ARN is required to update a project") + + project_config = ProjectConfig(project_details) + + update_config = project_config.project_config + update_config['projectArn'] = project_arn + + logger.info("Updating project with configuration", extra={ + "project_arn": project_arn, + "config": update_config + }) + + # Call Bedrock API to update project + response = bda_client.update_data_automation_project(**update_config) + + logger.info("Project updated successfully", extra={ + "project_arn": project_arn, + "response": response + }) + + return { + 'statusCode': 200, + 'body': { + 'message': 'Project updated successfully', + 'projectArn': response.get('projectArn'), + 'projectStatus': response.get('projectStatus'), + 'lastUpdatedAt': str(response.get('lastUpdatedAt')) + } + } + + except ClientError as e: + error_code = e.response.get('Error', {}).get('Code', 'Unknown') + error_message = e.response.get('Error', {}).get('Message', str(e)) + logger.error("AWS API error updating project", extra={ + "error_code": error_code, + "error_message": error_message + }) + return { + 'statusCode': 400, + 'body': { + 'message': 'Failed to update project', + 'error_code': error_code, + 'error_message': error_message + } + } + + except Exception as e: + logger.error("Unexpected error updating project", extra={"error": str(e)}) + return { + 'statusCode': 500, + 'body': { + 'message': 'Internal server error', + 'error': str(e) + } + } + diff --git a/lambda/aws-bedrock-data-automation/create_project/lambda.py b/lambda/aws-bedrock-data-automation/create_project/lambda.py new file mode 100644 index 00000000..a99515ce --- /dev/null +++ b/lambda/aws-bedrock-data-automation/create_project/lambda.py @@ -0,0 +1,123 @@ +# Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"). You may not use this file except in compliance +# with the License. A copy of the License is located at +# http://www.apache.org/licenses/LICENSE-2.0 +# +# or in the 'license' file accompanying this file. This file is distributed on an 'AS IS' BASIS, WITHOUT WARRANTIES +# OR CONDITIONS OF ANY KIND, express or implied. See the License for the specific language governing permissions +# and limitations under the License. +# +import json +from typing import Dict, Any +from aws_lambda_powertools import Logger, Metrics, Tracer +from aws_lambda_powertools.utilities.typing import LambdaContext +from aws_lambda_powertools.utilities.data_classes import EventBridgeEvent, APIGatewayProxyEvent +from create_project import create_project,get_project,update_project,delete_project + +logger = Logger() +tracer = Tracer() +metrics = Metrics(namespace="CREATE_PROJECT") + + +def process_event_bridge_event(event: Dict[str, Any]) -> Dict[str, Any]: + """ + Process EventBridge events + """ + event_bridge_event = EventBridgeEvent(event) + logger.info("Received EventBridge event", extra={ + "detail_type": event_bridge_event.detail_type, + "source": event_bridge_event.source + }) + + return event_bridge_event.detail + +def process_api_gateway_event(event: Dict[str, Any]) -> Dict[str, Any]: + """ + Process API Gateway events + """ + api_event = APIGatewayProxyEvent(event) + logger.info("Received API Gateway event", extra={ + "http_method": api_event.http_method, + "path": api_event.path + }) + + if not api_event.body: + raise ValueError("Request body is required") + + try: + return json.loads(api_event.body) + except json.JSONDecodeError as e: + logger.error("Invalid JSON in request body", extra={"error": str(e)}) + raise ValueError("Invalid JSON in request body") + + +@tracer.capture_lambda_handler +def handler(event: Dict[str, Any], context: LambdaContext) -> Dict[str, Any]: + """ + Create a new project + """ + try: + # Determine event source and process accordingly + if event.get("source") and event.get("detail-type"): + project_config = process_event_bridge_event(event) + else: + project_config = process_api_gateway_event(event) + + operation_type = project_config.get('operation', '') + + logger.info("Project configuration", extra={"config": project_config}) + + status_code = 200 + match operation_type.lower(): + case "create": + response = create_project(project_config) + response_msg='Project created successfully' + + case "update": + if 'projectArn' not in project_config: + raise ValueError("projectArn is required for update operation") + + response = update_project(project_config) + response_msg='Project updated successfully' + + + case "delete": + if 'projectArn' not in project_config: + raise ValueError("projectArn is required for delete operation") + delete_project(project_config['projectArn']) + response_msg='Project deleted successfully' + + case "get": + if 'projectArn' not in project_config: + raise ValueError("projectArn is required for get operation") + + response = get_project(project_config ) + response_msg='Project fetched successfully' + + + case _: + response_msg = (f"Unknown operation type: {operation_type}. " + "The supported operations are - create, update, delete and get.") + logger.warning(response_msg) + status_code = 400 + + return { + 'status_code': status_code, + 'body': json.dumps({ + 'message': response_msg, + 'response': response + }) + } + + except Exception as e: + logger.error("Unexpected error", extra={"error": str(e)}) + return { + 'status_code': 500, + 'body': json.dumps({ + 'message': 'Internal server error', + 'error': str(e) + }) + } + + diff --git a/lambda/aws-bedrock-data-automation/create_project/project_config.py b/lambda/aws-bedrock-data-automation/create_project/project_config.py new file mode 100644 index 00000000..b2572292 --- /dev/null +++ b/lambda/aws-bedrock-data-automation/create_project/project_config.py @@ -0,0 +1,241 @@ +# Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"). You may not use this file except in compliance +# with the License. A copy of the License is located at +# http://www.apache.org/licenses/LICENSE-2.0 +# +# or in the 'license' file accompanying this file. This file is distributed on an 'AS IS' BASIS, WITHOUT WARRANTIES +# OR CONDITIONS OF ANY KIND, express or implied. See the License for the specific language governing permissions +# and limitations under the License. +# +from typing import Dict, Any +from enum import Enum + +class ProjectStage(str, Enum): + DEVELOPMENT = "DEVELOPMENT" + LIVE = "LIVE" + +class State(str, Enum): + ENABLED = "ENABLED" + DISABLED = "DISABLED" + +class DocumentGranularity(str, Enum): + DOCUMENT = "DOCUMENT" + PAGE = "PAGE" + ELEMENT = "ELEMENT" + WORD = "WORD" + LINE = "LINE" + +class TextFormat(str, Enum): + PLAIN_TEXT = "PLAIN_TEXT" + MARKDOWN = "MARKDOWN" + HTML = "HTML" + CSV = "CSV" + +class ImageCategory(str, Enum): + CONTENT_MODERATION = "CONTENT_MODERATION" + TEXT_DETECTION = "TEXT_DETECTION" + +class ImageGenerativeField(str, Enum): + IMAGE_SUMMARY = "IMAGE_SUMMARY" + IAB = "IAB" + +class VideoCategory(str, Enum): + CONTENT_MODERATION = "CONTENT_MODERATION" + TEXT_DETECTION = "TEXT_DETECTION" + TRANSCRIPT = "TRANSCRIPT" + +class VideoGenerativeField(str, Enum): + VIDEO_SUMMARY = "VIDEO_SUMMARY" + SCENE_SUMMARY = "SCENE_SUMMARY" + IAB = "IAB" + +class AudioCategory(str, Enum): + AUDIO_CONTENT_MODERATION = "AUDIO_CONTENT_MODERATION" + CHAPTER_CONTENT_MODERATION = "CHAPTER_CONTENT_MODERATION" + TRANSCRIPT = "TRANSCRIPT" + +class AudioGenerativeField(str, Enum): + AUDIO_SUMMARY = "AUDIO_SUMMARY" + CHAPTER_SUMMARY = "CHAPTER_SUMMARY" + IAB = "IAB" + +def ensure_list(x): + """ + Ensures the input is always returned as a list. + If input is not a list, converts it to a single-item list. + If input is already a list, returns it unchanged. + + Args: + x: Any type of input + + Returns: + list: Input converted to or kept as list + """ + return [x] if not isinstance(x, list) else x +class ProjectConfig: + """Configuration class for Bedrock Data Automation project settings""" + + def __init__(self, project_details: Dict[str, Any]): + """ + Initialize project configuration with project details + + Args: + project_details: Dictionary containing project configuration values + """ + self.project_details = project_details + self._validate_required_fields() + + def _validate_required_fields(self) -> None: + """Validate required fields are present in project_details""" + required_fields = ['projectName'] + missing_fields = [field for field in required_fields + if not self.project_details.get(field)] + if missing_fields: + raise ValueError(f"Missing required fields: {', '.join(missing_fields)}") + + # Validate project stage + if self.project_details['projectStage'] not in [e.value for e in ProjectStage]: + raise ValueError(f"Invalid projectStage. Must be one of: {[e.value for e in ProjectStage]}") + + def _get_standard_output_config(self) -> Dict[str, Any]: + """Get standard output configuration if present""" + config = self.project_details.get('standardOutputConfiguration', {}) + + return { + 'document': self._get_document_config(config.get('document', {})), + 'image': self._get_image_config(config.get('image', {})), + 'video': self._get_video_config(config.get('video', {})), + 'audio': self._get_audio_config(config.get('audio', {})) + } + + def _get_document_config(self, config: Dict[str, Any]) -> Dict[str, Any]: + """Process document configuration""" + + return { + 'extraction': { + 'granularity': { + 'types': ensure_list( + config.get('extraction', {}).get('granularity', {}).get('types', [DocumentGranularity.DOCUMENT.value]) + ) + }, + 'boundingBox': { + 'state': config.get('extraction', {}).get('boundingBox', {}).get('state', State.DISABLED.value) + } + }, + 'generativeField': { + 'state': config.get('generativeField', {}).get('state', State.DISABLED.value) + }, + 'outputFormat': { + 'textFormat': { + 'types': ensure_list( + config.get('document', {}).get('outputFormat', {}).get('textFormat', {}).get('types', ['PLAIN_TEXT']) + ) + }, + 'additionalFileFormat': { + 'state': config.get('outputFormat', {}).get('additionalFileFormat', {}).get('state', State.DISABLED.value) + } + } + } + + def _get_image_config(self, config: Dict[str, Any]) -> Dict[str, Any]: + """Process image configuration""" + + return { + 'extraction': { + 'category': { + 'state': config.get('extraction', {}).get('category', {}).get('state', State.DISABLED.value), + 'types': ensure_list( + config.get('image', {}).get('extraction', {}).get('category', {}).get('types', ['CONTENT_MODERATION']) + ) + }, + 'boundingBox': { + 'state': config.get('extraction', {}).get('boundingBox', {}).get('state', State.DISABLED.value) + } + }, + 'generativeField': { + 'state': config.get('generativeField', {}).get('state', State.DISABLED.value), + 'types': ensure_list( + config.get('image', {}).get('generativeField', {}).get('types', ['IMAGE_SUMMARY']) + ) + } + } + + def _get_video_config(self, config: Dict[str, Any]) -> Dict[str, Any]: + """Process video configuration""" + + return { + 'extraction': { + 'category': { + 'state': config.get('extraction', {}).get('category', {}).get('state', State.DISABLED.value), + 'types': ensure_list( + config.get('video', {}).get('extraction', {}).get('category', {}).get('types', ['CONTENT_MODERATION']) + ) + }, + 'boundingBox': { + 'state': config.get('extraction', {}).get('boundingBox', {}).get('state', State.DISABLED.value) + } + }, + 'generativeField': { + 'state': config.get('generativeField', {}).get('state', State.DISABLED.value), + 'types': ensure_list( + config.get('video', {}).get('generativeField', {}).get('types', ['VIDEO_SUMMARY']) + ) + } + } + + def _get_audio_config(self, config: Dict[str, Any]) -> Dict[str, Any]: + """Process audio configuration""" + if not config: + return {} + + return { + 'extraction': { + 'category': { + 'state': config.get('extraction', {}).get('category', {}).get('state', State.DISABLED.value), + 'types': ensure_list( + config.get('audio', {}).get('extraction', {}).get('category', {}).get('types', ['TRANSCRIPT']) + ) } + }, + 'generativeField': { + 'state': config.get('generativeField', {}).get('state', State.DISABLED.value), + 'types': ensure_list( + config.get('audio', {}).get('generativeField', {}).get('types', ['AUDIO_SUMMARY']) + ) } + } + + @property + def project_config(self) -> Dict[str, Any]: + """Get complete project configuration""" + config = { + 'projectName': self.project_details['projectName'], + 'projectDescription': self.project_details.get('projectDescription','sample description'), + 'projectStage': self.project_details.get('projectStage','LIVE') + } + + # Add standard output configuration if present + standard_output = self._get_standard_output_config() + if standard_output: + config['standardOutputConfiguration'] = standard_output + + # Add custom output configuration if present + if 'customOutputConfiguration' in self.project_details: + config['customOutputConfiguration'] = self.project_details['customOutputConfiguration'] + + # Add override configuration if present + if 'overrideConfiguration' in self.project_details: + config['overrideConfiguration'] = self.project_details['overrideConfiguration'] + + # Add client token if present + if 'clientToken' in self.project_details: + config['clientToken'] = self.project_details['clientToken'] + + # Add encryption configuration if present + if 'encryptionConfiguration' in self.project_details: + config['encryptionConfiguration'] = self.project_details['encryptionConfiguration'] + + return config + + def __str__(self) -> str: + """String representation of project configuration""" + return f"ProjectConfig(name={self.project_details['projectName']}, stage={self.project_details['projectStage']})" diff --git a/lambda/aws-bedrock-data-automation/data_processing/data_processing.py b/lambda/aws-bedrock-data-automation/data_processing/data_processing.py new file mode 100644 index 00000000..483ae10b --- /dev/null +++ b/lambda/aws-bedrock-data-automation/data_processing/data_processing.py @@ -0,0 +1,214 @@ +# Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"). You may not use this file except in compliance +# with the License. A copy of the License is located at +# http://www.apache.org/licenses/LICENSE-2.0 +# +# or in the 'license' file accompanying this file. This file is distributed on an 'AS IS' BASIS, WITHOUT WARRANTIES +# OR CONDITIONS OF ANY KIND, express or implied. See the License for the specific language governing permissions +# and limitations under the License. +# +import boto3 +from dataclasses import dataclass +from typing import Dict, Any, Optional +from aws_lambda_powertools import Logger, Tracer, Metrics + +logger = Logger() +tracer = Tracer() +metrics = Metrics(namespace="data_processing") + + + +@dataclass +class BlueprintConfig: + """Blueprint configuration""" + blueprint_arn: str + version: Optional[str] = None + stage: str = 'LIVE' + + def to_dict(self) -> Dict[str, Any]: + """Convert blueprint config to dictionary format""" + blueprint_dict = { + 'blueprintArn': self.blueprint_arn, + 'stage': self.stage + } + if self.version: + blueprint_dict['version'] = self.version + return blueprint_dict + +@dataclass +class DataAutomationConfig: + """Data automation configuration""" + data_automation_arn: str + stage: str = 'LIVE' + +@dataclass +class EncryptionConfig: + """Encryption configuration""" + kms_key_id: str + kms_encryption_context: Optional[Dict[str, str]] = None + +@dataclass +class NotificationConfig: + """Notification configuration""" + eventbridge_enabled: bool = False + +def validate_configs( + blueprint_config: Optional[BlueprintConfig] = None, + data_automation_config: Optional[DataAutomationConfig] = None + ) -> None: + """ + Validate that either blueprint_config or data_automation_config is present + + Args: + blueprint_config: Optional BlueprintConfig + data_automation_config: Optional DataAutomationConfig + + Raises: + ValueError: If neither or both configs are provided + """ + if blueprint_config is None and data_automation_config is None: + raise ValueError("Either blueprint_config or data_automation_config must be provided") + + if blueprint_config is not None and data_automation_config is not None: + raise ValueError("Cannot provide both blueprint_config and data_automation_config") + + +class DataProcessor: + def __init__(self, input_bucket: str, output_bucket: str, client=None): + """ + Initialize the processor + + Args: + input_bucket (str): Input S3 bucket name + output_bucket (str): Output S3 bucket name + client: Optional pre-configured boto3 client + """ + self.client = boto3.client("bedrock-data-automation-runtime") + self.input_bucket = input_bucket + self.output_bucket = output_bucket + + + def invoke_data_automation_async( + self, + input_filename: str, + output_filename: str, + client_token: Optional[str] = None, + blueprint_config: Optional[BlueprintConfig] = None, + data_automation_config: Optional[DataAutomationConfig] = None, + encryption_config: Optional[EncryptionConfig] = None, + notification_config: Optional[NotificationConfig] = None, + ) -> Dict[str, Any]: + """ + Invoke data automation asynchronously + + Args: + input_filename (str): Input file name/key + output_filename (str): Output file name/key + blueprint_config (BlueprintConfig): Blueprint configuration + client_token (str): Unique identifier for the request + data_automation_config (DataAutomationConfig, optional): Data automation configuration + encryption_config (EncryptionConfig, optional): Encryption configuration + notification_config (NotificationConfig, optional): Notification configuration + + Returns: + Dict[str, Any]: Response containing job details + """ + + try: + + validate_configs(blueprint_config, data_automation_config) + + input_s3_uri = f"s3://{self.input_bucket}/{input_filename}" + output_s3_uri = f"s3://{self.output_bucket}/{output_filename}" + + logger.info("Invoking data automation", extra={ + "input_location": input_s3_uri, + "output_location": output_s3_uri, + "blueprint_config": blueprint_config + + }) + + # Prepare base request parameters + request_params = { + 'inputConfiguration': { + 's3Uri': input_s3_uri + }, + 'outputConfiguration': { + 's3Uri': output_s3_uri + } + } + + if client_token: + request_params['clientToken'] = client_token + # Add blueprint configuration if provided + if blueprint_config: + request_params['blueprints'] = [blueprint_config.to_dict()] + logger.info("Added blueprint configuration", extra={ + "blueprint_config": blueprint_config.to_dict() + }) + + # Add data automation configuration if provided + if data_automation_config: + request_params['dataAutomationConfiguration'] = { + 'dataAutomationArn': data_automation_config.data_automation_arn, + 'stage': data_automation_config.stage + } + + # Add encryption configuration if provided + if encryption_config: + encryption_params = { + 'kmsKeyId': encryption_config.kms_key_id + } + if encryption_config.kms_encryption_context: + encryption_params['kmsEncryptionContext'] = ( + encryption_config.kms_encryption_context + ) + request_params['encryptionConfiguration'] = encryption_params + + # Add notification configuration if provided + if notification_config: + request_params['notificationConfiguration'] = { + 'eventBridgeConfiguration': { + 'eventBridgeEnabled': notification_config.eventbridge_enabled + } + } + + # Invoke data automation + logger.info("Invoking data automation", extra={ + "request_params": request_params + }) + response = self.client.invoke_data_automation_async(**request_params) + + logger.info("Successfully invoked data automation", extra={ + "job_id": response.get('jobId') + }) + + return { + 'statusCode': 200, + 'body': { + 'message': 'Data automation invoked successfully', + 'jobId': response.get('jobId'), + 'response': response + } + } + + except self.client.exceptions.ValidationException as e: + logger.error("Validation error", exc_info=True) + return { + 'statusCode': 400, + 'body': { + 'message': 'Validation error', + 'error': str(e) + } + } + + except Exception as e: + logger.error("Error invoking data automation", exc_info=True) + return { + 'statusCode': 500, + 'body': { + 'message': 'Error invoking data automation', + 'error': str(e) + } + } diff --git a/lambda/aws-bedrock-data-automation/data_processing/lambda.py b/lambda/aws-bedrock-data-automation/data_processing/lambda.py new file mode 100644 index 00000000..3c1eb359 --- /dev/null +++ b/lambda/aws-bedrock-data-automation/data_processing/lambda.py @@ -0,0 +1,250 @@ +# Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"). You may not use this file except in compliance +# with the License. A copy of the License is located at +# http://www.apache.org/licenses/LICENSE-2.0 +# +# or in the 'license' file accompanying this file. This file is distributed on an 'AS IS' BASIS, WITHOUT WARRANTIES +# OR CONDITIONS OF ANY KIND, express or implied. See the License for the specific language governing permissions +# and limitations under the License. +# +import os +import json +from typing import Dict, Any +from aws_lambda_powertools import Logger,Tracer,Metrics +from aws_lambda_powertools.utilities.typing import LambdaContext +from data_processing import DataProcessor, BlueprintConfig,EncryptionConfig,NotificationConfig,DataAutomationConfig +from aws_lambda_powertools.utilities.data_classes import EventBridgeEvent, APIGatewayProxyEvent + + +logger = Logger() +tracer = Tracer() +metrics = Metrics(namespace="DATA_PROCESSING") + + +def process_event_bridge_event(event: Dict[str, Any]) -> Dict[str, Any]: + """ + Process EventBridge events + """ + event_bridge_event = EventBridgeEvent(event) + logger.info("Received EventBridge event", extra={ + "detail_type": event_bridge_event.detail_type, + "source": event_bridge_event.source, + "detail":event_bridge_event.detail + }) + + return event_bridge_event.detail + +def process_api_gateway_event(event: Dict[str, Any]) -> Dict[str, Any]: + """ + Process API Gateway events + """ + api_event = APIGatewayProxyEvent(event) + logger.info("Received API Gateway event", extra={ + "http_method": api_event.http_method, + "path": api_event.path, + "body":api_event.body + }) + + if not api_event.body: + raise ValueError("Request body is required") + try: + # If body is already a string, parse it + if isinstance(api_event.body, str): + return json.loads(api_event.body) + # If body is already a dict, return it + elif isinstance(api_event.body, dict): + return api_event.body + else: + raise ValueError(f"Unexpected body type: {type(api_event.body)}") + + except json.JSONDecodeError as e: + logger.error("Invalid JSON in request body", extra={"error": str(e)}) + raise ValueError("Invalid JSON in request body") + + +def get_env_var(var_name: str, default: str = None) -> str: + """Get environment variable with validation""" + value = os.environ.get(var_name, default) + if value is None: + raise ValueError(f"Environment variable {var_name} is not set") + return value + +#@logger.inject_lambda_context +def handler(event: Dict[str, Any], context: LambdaContext) -> Dict[str, Any]: + """ + Lambda handler to process EventBridge events and invoke data automation + + Expected event structure: + { + "detail-type": "DataAutomationRequest", + "client_token": "optional-client-token", # Optional + "detail": { + "input_filename": "document.pdf", + "output_filename": "results.json", + "blueprint": { + "blueprint_arn": "arn:aws:bedrock:region:account:blueprint/id", + "version": "1", + "stage": "LIVE" + }, + "data_automation": { + "data_automation_arn": "arn:aws:bedrock:region:account:automation/id", + "stage": "LIVE" + }, + "encryption": { + "kms_key_id": "arn:aws:kms:region:account:key/id", + "kms_encryption_context": {"purpose": "data-processing"} + }, + "notification": { + "eventbridge_enabled": true + } + } + } + """ + try: + # Determine event source and process accordingly + if event.get("source") and event.get("detail-type"): + detail = process_event_bridge_event(event) + else: + detail = process_api_gateway_event(event) + + # Validate required fields + input_filename = detail.get('input_filename') + + if not input_filename : + raise ValueError("input_filename is required") + + default_output_filename = os.path.splitext(input_filename)[0] + '.json' + output_filename = detail.get('output_filename',default_output_filename) + + # Get environment variables + input_bucket = get_env_var('INPUT_BUCKET') + output_bucket = get_env_var('OUTPUT_BUCKET') + + + # Initialize processor + processor = DataProcessor( + input_bucket=input_bucket, + output_bucket=output_bucket + ) + + configs = {} + + if client_token := detail.get('client_token'): + configs['client_token']=client_token + + if blueprint_data := detail.get('blueprints'): + try: + blueprint_config = blueprint_data[0] + configs['blueprint_config'] = BlueprintConfig( + blueprint_arn=blueprint_config['blueprint_arn'], + version=blueprint_config.get('version'), + stage=blueprint_config.get('stage', 'LIVE') + ) + logger.info("Blueprint configuration initialized", extra={ + "blueprint_config": configs['blueprint_config'] + }) + + except (IndexError, KeyError) as e: + logger.error(f"Missing required blueprint parameter: {e}") + raise ValueError(f"Missing required blueprint parameter: {e}") + + # Check and initialize DataAutomationConfig + if automation_data := detail.get('data_automation'): + try: + configs['data_automation_config'] = DataAutomationConfig( + data_automation_arn=automation_data['data_automation_arn'], + stage=automation_data.get('stage', 'LIVE') + ) + logger.info("Data automation configuration initialized", extra={ + "data_automation_config": configs['data_automation_config'] + }) + except KeyError as e: + logger.error(f"Missing required data automation parameter: {e}") + raise ValueError(f"Missing required data automation parameter: {e}") + + # Check and initialize EncryptionConfig + if encryption_data := detail.get('encryption'): + try: + configs['encryption_config'] = EncryptionConfig( + kms_key_id=encryption_data['kms_key_id'], + kms_encryption_context=encryption_data.get('kms_encryption_context') + ) + logger.info("Encryption configuration initialized", extra={ + "encryption_config": configs['encryption_config'] + }) + except KeyError as e: + logger.error(f"Missing required encryption parameter: {e}") + raise ValueError(f"Missing required encryption parameter: {e}") + + # Check and initialize NotificationConfig + if notification_data := detail.get('notification'): + configs['notification_config'] = NotificationConfig( + eventbridge_enabled=notification_data.get('eventbridge_enabled', True) + ) + logger.info("Notification configuration initialized", extra={ + "notification_config": configs['notification_config'] + }) + + # Invoke data automation with all configurations + response = processor.invoke_data_automation_async( + input_filename=input_filename, + output_filename=output_filename, + **configs + ) + + # Log response + logger.info("Data automation invocation response", extra={ + "status_code": response['statusCode'], + "response": response['body'] + }) + + # Check response + if response['statusCode'] == 200: + job_id = response['body']['jobId'] + logger.info(f"Job started successfully", extra={"job_id": job_id}) + + return { + 'statusCode': 200, + 'body': { + 'message': 'Data automation job started successfully', + 'jobId': job_id, + 'clientToken': client_token, + 'configurations': { + k: str(v) for k, v in configs.items() + } + } + } + else: + error_message = response['body'].get('message', 'Unknown error') + logger.error("Failed to start job", extra={ + "error": error_message + }) + + return { + 'statusCode': response['statusCode'], + 'body': { + 'message': 'Failed to start data automation job', + 'error': error_message + } + } + + except ValueError as e: + logger.error("Validation error", exc_info=True) + return { + 'statusCode': 400, + 'body': { + 'message': 'Validation error', + 'error': str(e) + } + } + + except Exception as e: + logger.error("Unexpected error", exc_info=True) + return { + 'statusCode': 500, + 'body': { + 'message': 'Internal server error', + 'error': str(e) + } + } diff --git a/lambda/aws-bedrock-data-automation/data_result/data_automation_result.py b/lambda/aws-bedrock-data-automation/data_result/data_automation_result.py new file mode 100644 index 00000000..f0903cdb --- /dev/null +++ b/lambda/aws-bedrock-data-automation/data_result/data_automation_result.py @@ -0,0 +1,192 @@ +# Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"). You may not use this file except in compliance +# with the License. A copy of the License is located at +# http://www.apache.org/licenses/LICENSE-2.0 +# +# or in the 'license' file accompanying this file. This file is distributed on an 'AS IS' BASIS, WITHOUT WARRANTIES +# OR CONDITIONS OF ANY KIND, express or implied. See the License for the specific language governing permissions +# and limitations under the License. +# +import json +import boto3 +import asyncio +from aws_lambda_powertools import Logger,Tracer,Metrics +from botocore.exceptions import ClientError +from typing import Dict, Any + +logger = Logger() +tracer = Tracer() +metrics = Metrics(namespace="DATA_AUTOMATION_RESULT") + + +class DataAutomationResult: + def __init__(self, s3_client=None, bda_client=None): + self.s3 = boto3.client('s3') + self.bda_client = boto3.client("bedrock-data-automation-runtime") + self.max_retries = 60 + self.retry_delay = 10 + + def _parse_s3_uri(self, s3_uri: str) -> tuple: + parts = s3_uri.replace("s3://", "").split("/", 1) + return parts[0], parts[1] + + def _read_s3_json(self, s3_uri: str) -> Dict: + try: + bucket, key = self._parse_s3_uri(s3_uri) + response = self.s3.get_object(Bucket=bucket, Key=key) + return json.loads(response['Body'].read().decode('utf-8')) + except Exception as e: + logger.error("Error reading S3 file", extra={ + "s3_uri": s3_uri, + "error": str(e) + }) + raise + + def get_job_status(self, invoke_arn: str) -> Dict[str, Any]: + """Get current status of the data automation job""" + try: + status = self.bda_client.get_data_automation_status( + invocationArn=invoke_arn + ) + logger.info("Retrieved job status", extra={ + "invoke_arn": invoke_arn, + "status": status.get("status") + }) + return status + except ClientError as e: + logger.error("Error getting job status", extra={ + "invoke_arn": invoke_arn, + "error": str(e) + }) + raise + + async def wait_for_completion(self, invoke_arn: str) -> Dict[str, Any]: + """ + Asynchronously wait for job completion + """ + retries = 0 + while retries < self.max_retries: + status = self.get_job_status(invoke_arn) + current_status = status.get("status") + + if current_status == "Success": + logger.info("Job completed successfully", extra={ + "invoke_arn": invoke_arn + }) + return status + elif current_status in ["FAILED", "CANCELLED"]: + logger.error("Job ended with status", extra={ + "invoke_arn": invoke_arn, + "status": current_status, + "error": status.get("errorMessage") + }) + return status + + logger.info("Job in progress", extra={ + "invoke_arn": invoke_arn, + "status": current_status, + "retry": retries + }) + await asyncio.sleep(self.retry_delay) + retries += 1 + + raise TimeoutError(f"Job did not complete within {self.max_retries * self.retry_delay} seconds") + + async def get_results(self, invocation_arn: str, wait: bool = False) -> Dict[str, Any]: + """ + Get results from completed job + + Args: + invoke_arn: ARN of the invoked job + wait: If True, wait asynchronously for job completion + + Returns: + Dict[str, Any]: Job results and metadata + """ + try: + # Get current status or wait for completion based on wait parameter + if wait: + logger.info("Waiting for job completion", extra={ + "invocation_arn": invocation_arn + }) + status = await self.wait_for_completion(invocation_arn) + else: + status = self.get_job_status(invocation_arn) + + current_status = status.get("status") + + # Check if job is completed + if current_status != "Success": + logger.warning("Job not completed, returning status", extra={ + "invoke_arn": invocation_arn, + "status": current_status, + "wait_enabled": wait + }) + return { + "status": current_status, + "details": status + } + + # Get output location + output_uri = status["outputConfiguration"]["s3Uri"] + logger.info("Fetching results", extra={ + "invoke_arn": invocation_arn, + "output_uri": output_uri + }) + + # Read metadata + metadata = self._read_s3_json(output_uri) + results = [] + + # Process each output + for output in metadata["output_metadata"]: + for segment in output["segment_metadata"]: + # Add custom output if there's a match + if segment["custom_output_status"] == "MATCH": + custom_result = self._read_s3_json(segment["custom_output_path"]) + results.append(custom_result) + logger.debug("Added custom output", extra={ + "path": segment["custom_output_path"] + }) + + # Add standard output + standard_result = self._read_s3_json(segment["standard_output_path"]) + results.append(standard_result) + logger.debug("Added standard output", extra={ + "path": segment["standard_output_path"] + }) + + logger.info("Successfully retrieved results", extra={ + "invoke_arn": invocation_arn, + "result_count": len(results), + "wait_enabled": wait + }) + + return { + "status": "Success", + "metadata": metadata, + "results": results + } + + except TimeoutError as e: + logger.error("Timeout waiting for job completion", extra={ + "invoke_arn": invocation_arn, + "max_retries": self.max_retries, + "retry_delay": self.retry_delay + }) + return { + "status": "TIMEOUT", + "details": { + "error": str(e), + "max_wait_time": self.max_retries * self.retry_delay + } + } + + except Exception as e: + logger.error("Error getting results", extra={ + "invoke_arn": invocation_arn, + "error": str(e), + "wait_enabled": wait + }) + raise diff --git a/lambda/aws-bedrock-data-automation/data_result/lambda.py b/lambda/aws-bedrock-data-automation/data_result/lambda.py new file mode 100644 index 00000000..60a36876 --- /dev/null +++ b/lambda/aws-bedrock-data-automation/data_result/lambda.py @@ -0,0 +1,173 @@ +# Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"). You may not use this file except in compliance +# with the License. A copy of the License is located at +# http://www.apache.org/licenses/LICENSE-2.0 +# +# or in the 'license' file accompanying this file. This file is distributed on an 'AS IS' BASIS, WITHOUT WARRANTIES +# OR CONDITIONS OF ANY KIND, express or implied. See the License for the specific language governing permissions +# and limitations under the License. +# + +import json +import asyncio +from typing import Dict, Any +from aws_lambda_powertools import Logger, Tracer, Metrics +from aws_lambda_powertools.utilities.typing import LambdaContext +from data_automation_result import DataAutomationResult +from aws_lambda_powertools.utilities.data_classes import EventBridgeEvent, APIGatewayProxyEvent + + +logger = Logger() +tracer = Tracer() +metrics = Metrics(namespace="DATA_AUTOMATION_STATUS") + +def process_event_bridge_event(event: Dict[str, Any]) -> Dict[str, Any]: + """ + Process EventBridge events + """ + event_bridge_event = EventBridgeEvent(event) + logger.info("Received EventBridge event", extra={ + "detail_type": event_bridge_event.detail_type, + "source": event_bridge_event.source, + "detail":event_bridge_event.detail + }) + + return event_bridge_event.detail + +def process_api_gateway_event(event: Dict[str, Any]) -> Dict[str, Any]: + """ + Process API Gateway events + """ + api_event = APIGatewayProxyEvent(event) + logger.info("Received API Gateway event", extra={ + "http_method": api_event.http_method, + "path": api_event.path + }) + + if not api_event.body: + raise ValueError("Request body is required") + + try: + return json.loads(api_event.body) + except json.JSONDecodeError as e: + logger.error("Invalid JSON in request body", extra={"error": str(e)}) + raise ValueError("Invalid JSON in request body") + + + + +async def data_automation_status(event: Dict[str, Any], context: Any) -> Dict[str, Any]: + """ + Lambda handler for processing EventBridge events for data automation results + + Expected EventBridge event structure: + { + "version": "0", + "id": "event-id", + "detail-type": "DataAutomationStatus", + "source": "custom.bedrock.data.automation", + "account": "123456789012", + "time": "2024-01-01T00:00:00Z", + "region": "us-east-1", + "detail": { + "invocation_arn": "arn:aws:bedrock:region:account:data-automation-job/id", + "wait": false, + } + } + """ + try: + # Determine event source and process accordingly + if event.get("source") and event.get("detail-type"): + detail = process_event_bridge_event(event) + else: + detail = process_api_gateway_event(event) + + # Extract and validate required parameters + if 'invocation_arn' not in detail: + error_msg = "Missing required parameter: invocation_arn in event detail" + logger.error(error_msg) + return { + 'statusCode': 400, + 'body': { + 'message': error_msg + } + } + + invocation_arn = detail['invocation_arn'] + wait = detail.get('wait', False) + + logger.info("Processing data automation status request", extra={ + "invocationArn": invocation_arn, + "wait": wait, + }) + + # Initialize result processor + processor = DataAutomationResult() + + # Get results with optional waiting + results = await processor.get_results(invocation_arn, wait=wait) + + logger.info("Successfully retrieved results", extra={ + "invoke_arn": invocation_arn, + "status": results.get("status"), + "wait": wait, + }) + + return { + 'statusCode': 200, + 'body': { + **results, + } + } + + except ValueError as e: + logger.error("Validation error", extra={ + "error": str(e), + "event_id": event.get("id") + }) + metrics.add_metadata(key="errorType", value="ValidationError") + return { + 'statusCode': 400, + 'body': { + 'message': 'Validation error', + 'error': str(e), + 'event_id': event.get("id") + } + } + + except TimeoutError as e: + logger.error("Operation timed out", extra={ + "error": str(e), + "event_id": event.get("id") + }) + metrics.add_metadata(key="errorType", value="TimeoutError") + return { + 'statusCode': 408, + 'body': { + 'message': 'Operation timed out', + 'error': str(e), + 'event_id': event.get("id") + } + } + + except Exception as e: + logger.error("Unexpected error", extra={ + "error": str(e), + "event_id": event.get("id") + }) + metrics.add_metadata(key="errorType", value="UnexpectedError") + return { + 'statusCode': 500, + 'body': { + 'message': 'Internal server error', + 'error': str(e), + 'event_id': event.get("id") + } + } + +def handler(event: Dict[str, Any], context: LambdaContext) -> Dict[str, Any]: + """ + Main Lambda handler that wraps the async handler + """ + return asyncio.run(data_automation_status(event, context)) diff --git a/layer/requirements.txt b/layer/requirements.txt new file mode 100644 index 00000000..02674274 --- /dev/null +++ b/layer/requirements.txt @@ -0,0 +1,3 @@ +boto3>=1.36.14 +botocore>=1.36.14 + diff --git a/package.json b/package.json index edbd58bd..7f8d3789 100644 --- a/package.json +++ b/package.json @@ -117,10 +117,12 @@ "constructs": "^10.3.0" }, "dependencies": { + "@aws-cdk/aws-lambda-python-alpha": "2.178.0-alpha.0", "cdk-nag": "^2.35.27", "deepmerge": "^4.3.1" }, "bundledDependencies": [ + "@aws-cdk/aws-lambda-python-alpha", "deepmerge" ], "keywords": [ diff --git a/src/index.ts b/src/index.ts index 65d2c5b9..1db48820 100644 --- a/src/index.ts +++ b/src/index.ts @@ -24,4 +24,5 @@ export * from './common/base-class/base-class'; export * from './common/base-class/construct-name-enum'; export * from './patterns/gen-ai/aws-bedrock-cw-dashboard'; export * from './patterns/gen-ai/aws-aoss-cw-dashboard'; +export * from './patterns/gen-ai/aws-bedrock-data-automation'; export * from './patterns/gen-ai/aws-bedrock-batch-stepfn'; diff --git a/src/patterns/gen-ai/aws-bedrock-data-automation/README.md b/src/patterns/gen-ai/aws-bedrock-data-automation/README.md new file mode 100644 index 00000000..d231c8a4 --- /dev/null +++ b/src/patterns/gen-ai/aws-bedrock-data-automation/README.md @@ -0,0 +1,695 @@ +# aws-bedrock-data-automation + + + +--- + +![Stability: Experimental](https://img.shields.io/badge/stability-Experimental-important.svg?style=for-the-badge) + +> All classes are under active development and subject to non-backward compatible changes or removal in any +> future version. These are not subject to the [Semantic Versioning](https://semver.org/) model. +> This means that while you may use them, you may need to update your source code when upgrading to a newer version of this package. + +--- + + + + +| **Language** | **Package** | +| :----------------------------------------------------------------------------------------------- | ----------------------------------------- | +| ![Typescript Logo](https://docs.aws.amazon.com/cdk/api/latest/img/typescript32.png) TypeScript | `@cdklabs/generative-ai-cdk-constructs` | +| ![Python Logo](https://docs.aws.amazon.com/cdk/api/latest/img/python32.png) Python | `cdklabs.generative_ai_cdk_constructs` | +| ![Java Logo](https://docs.aws.amazon.com/cdk/api/latest/img/java32.png) Java | `io.github.cdklabs.generative_ai_cdk_constructs`| +| ![.Net](https://docs.aws.amazon.com/cdk/api/latest/img/dotnet32.png) .Net | `CdkLabs.GenerativeAICdkConstructs`| +| ![Go](https://docs.aws.amazon.com/cdk/api/latest/img/go32.png) Go | `github.com/cdklabs/generative-ai-cdk-constructs-go/generative-ai-cdk-constructs`| + +## Table of contents + + - [Overview](#overview) + - [Architecture](#architecture) + - [Key Features](#keyfeatures) + - [Pattern Construct Props](#pattern-construct-props) + - [Initializer](#initializer) + - [Pattern Properties](#pattern-properties) + - [Methods](#methods) + - [Default properties](#default-properties) + - [Cost](#cost) + - [Security](#security) + - [Supported AWS Regions](#supported-aws-regions) + - [Quotas](#quotas) + + + +## Overview + +The Amazon Bedrock Data Automation construct simplifies the process of extracting insights from unstructured multimodal content (documents, images, video, and audio) using Amazon Bedrock Data Automation. It provides a complete infrastructure setup with Lambda functions for managing the entire workflow - from creating custom blueprints and projects to handling data processing automation and monitoring task status. The construct automates the deployment of necessary AWS resources and configures the required IAM permissions, making it easier for developers to build and manage intelligent document processing, media analysis, and other multimodal data-centric automation solutions. + +## Architecture + +The AWS Bedrock Data Automation Construct implements a serverless solution that enables automated data processing using Amazon Bedrock. The construct deploys an AWS Lambda function that serves as a data automation client, capable of interacting with Amazon Simple Storage Service (Amazon S3) for input and output operations, and invoking the Amazon Bedrock Data Processing API. + +This construct can be integrated with Amazon API Gateway for synchronous REST API operations or Amazon EventBridge for event-driven processing. The Lambda function is configured to handle incoming requests from both services, allowing flexibility in implementation patterns. The solution supports both API-based and event-driven architectures, enabling you to process data through Amazon Bedrock based on HTTP requests or scheduled/triggered events. + +![Architecture Diagram](architecture.png) + + +## Pattern Construct Props + +| Name | Type | Required | Description | +|------|------|----------|-------------| +| inputBucket | string | No | S3 bucket for uploading blueprint schema file. If not provided, a new bucket will be created. | +| isCustomBDABlueprintRequired | boolean | No | Flag to indicate if custom Bedrock Data Automation blueprint creation is required. | +| isBDAProjectRequired | boolean | No | Flag to indicate if Bedrock Data Automation project creation is required. | +| isBDAInvocationRequired | boolean | No | Flag to indicate if Bedrock Data Automation invocation functionality is required. | +| isStatusRequired | boolean | No | Flag to indicate if status checking functionality is required. | +| outputBucket | string | No | S3 bucket for storing output files. If not provided, a new bucket will be created when isBDAInvocationRequired is true. | + +## Initializer + +TypeScript: + +```typescript +import { BedrockDataAutomation } from 'generative-ai-cdk-constructs'; + +const bdaConstruct = new BedrockDataAutomation(this, 'MyBDAConstruct', { + isCustomBDABlueprintRequired: true, + isBDAProjectRequired: true, + isBDAInvocationRequired: true, + isStatusRequired: true +}); + +``` + +```python +from generative_ai_cdk_constructs import BedrockDataAutomation + +bda_construct = BedrockDataAutomation(self, "MyBDAConstruct", + is_custom_bda_blueprint_required=True, + is_bda_project_required=True, + is_bda_invocation_required=True, + is_status_required=True +) +``` + +## Key Features + +This construct provides granular control over Amazon Bedrock Data Automation capabilities through configurable feature flags. You can selectively enable specific features based on your workload requirements. + +### Creating Custom Blueprints + +The construct enables creation of custom blueprints by setting the `isCustomBDABlueprintRequired` property to true in the construct configuration. You can integrate this construct with either Amazon EventBridge for event-driven workflows or Amazon API Gateway for REST API operations. + +To create a new blueprint, either send an event to EventBridge or an API Gateway with following options: + +## Option1: Add Amazon EventBridge as a front-end interface to the construct + +Typescript + +```typescript +import { EventbridgeToLambda } from '@aws-solutions-constructs/aws-eventbridge-lambda'; +import { BedrockDataAutomation } from 'generative-ai-cdk-constructs'; + + const bdaConstruct = new BedrockDataAutomation(this, 'MyBDAConstruct', { + isCustomBDABlueprintRequired: true, + isBDAProjectRequired: false, + isBDAInvocationRequired: false, + isStatusRequired: false + }); + + const bluePrintFunction = bdaConstruct.blueprintLambdaFunction + + const blueprintEventbridge = new EventbridgeToLambda(this, 'CreateBlueprintEventRule', { + existingLambdaObj: bluePrintFunction, + eventRuleProps: { + eventPattern: { + source: ['custom.bedrock.blueprint'], + detailType: ['Bedrock Blueprint Request'], + } + }, + }); + +``` + +Python + +```python +from cdklabs.generative_ai_cdk_constructs import BedrockDataAutomation +from aws_solutions_constructs.aws_eventbridge_lambda import EventbridgeToLambda + +bda_construct = BedrockDataAutomation(self, "MyBDAConstruct", + is_custom_bda_blueprint_required=True, + is_bda_project_required=False, + is_bda_invocation_required=False, + is_status_required=False +) + +blueprint_lambda_function = bda_construct.blueprint_lambda_function + +EventbridgeToLambda(self, 'create_blueprint-lambda', + existing_lambda_obj=blueprint_lambda_function + event_rule_props= { + event_pattern= { + source= ['custom.bedrock.blueprint'], + detail_type= ['Bedrock Blueprint Request'], + } + } + ) +``` + +upload the sample file to s3 + +``` +aws s3 cp ./{sample_file.pdf} s3://{input-bucket-name}/ +``` + +Create a bp_event.json file with following event in your project directory. + + +```json +{ + "Entries": [ + { + "Source": "custom.bedrock.blueprint", + "DetailType": "Bedrock Blueprint Request", + "Detail": { + "blueprint_name": "noa_bp", + "blueprint_type": "DOCUMENT", + "blueprint_stage": "LIVE", + "operation": "CREATE", + "schema_fields": [ // This is a sample schema, replace this with your expected blueprint schema. + { + "name": "Total income", + "description": "Please analyze the following Notice of assesment report and extract information about Total income.", + "alias": "Total income" + }, + { + "name": "Taxable Income", + "description": "Please analyze the following Notice of assesment report and extract information about Taxable income.", + "alias": "Taxable Income" + }, + { + "name": "Tax payable", + "description": "Please analyze the following Notice of assesment report and extract information about Tax payable.", + "alias": "Tax payable" + } + ] + } + } + ] +} +``` + +send event bridge event using below command + +``` +aws events put-events --cli-input-json file://bp_event.json +``` + +## Invoke Construct with S3 event notifications + +Note: To automatically trigger the blueprint Lambda function upon file upload to Amazon S3: + +* Enable Amazon EventBridge notifications on the input S3 bucket in your stack. + + ```python + bda_input_bucket = bda_construct.input_bucket + bda_input_bucket.enable_event_bridge_notification() +``` + +* Configure Amazon S3 Event Notifications to send events to Amazon EventBridge. + +```python +create_blueprint_event = EventbridgeToLambda(self, 'invokeBda', + existing_lambda_obj=bda_construct.blueprint_lambda_function, + event_rule_props={ + "event_pattern": { + "source": ["aws.s3"], + "detail_type": ["Object Created"], + "detail": { + "bucket": { + "name": [bda_construct.input_bucket.bucket_name] + }, + "object": { + "key": [{ + "suffix": ".pdf" + }] + } + } + } + } + ) + + rule = create_blueprint_event.events_rule + +``` + +* Use Amazon EventBridge input transformer to convert S3 events into the blueprint Lambda function format + +```python +rule.add_target(targets.LambdaFunction( + bedrock_data_automation.blueprint_lambda_function, + event=events.RuleTargetInput.from_object({ + "source": "custom.bedrock.blueprint", + "detail_type": "Bedrock Create Request", + "detail": json.dumps({ + "blueprint_name": "XXXX", + "blueprint_type": "DOCUMENT", + "blueprint_stage": "LIVE", + "operation": "CREATE", + "schema_fields": [ + { + "name": "XXXXX", + "description": "XXXXXX.", + "alias": "XXXXX" + }, + { + "name": "XXXXX", + "description": "XXXXXX.", + "alias": "XXXXX" + }, + { + "name": "XXXXX", + "description": "XXXXXX.", + "alias": "XXXXX" + } + ] + }) + }) + )) +``` + +## Option2: Add API Gateway as a front-end interface to the construct + +```typescript +import { ApiGatewayToLambda } from '@aws-solutions-constructs/aws-apigateway-lambda'; + +import { BedrockDataAutomation } from 'generative-ai-cdk-constructs'; + + const bdaConstruct = new BedrockDataAutomation(this, 'MyBDAConstruct', { + isCustomBDABlueprintRequired: true, + isBDAProjectRequired: false, + isBDAInvocationRequired: false, + isStatusRequired: false + }); + +new ApiGatewayToLambda(this, 'ApiGatewayToLambdaPattern', { + existingLambdaObj:bdaConstruct.bluePrintFunction, + apiGatewayProps:{ + restApiName: 'createCustomBlueprint', + } + }); +``` + +```python +from cdklabs.generative_ai_cdk_constructs import BedrockDataAutomation +from aws_solutions_constructs.aws_eventbridge_lambda import ApiGatewayToLambda + +bda_construct = BedrockDataAutomation(self, "MyBDAConstruct", + is_custom_bda_blueprint_required=True, + is_bda_project_required=False, + is_bda_invocation_required=False, + is_status_required=False +) + +blueprint_lambda_function = bda_construct.blueprint_lambda_function + +blueprint_api = ApiGatewayToLambda(self, 'CreateBlueprintApi', + existing_lambda_obj=bda.blueprint_lambda_function, + api_gateway_props=apigw.RestApiProps( + rest_api_name='createBluePrintPython' + ) + ) +``` + +Publish a POST request with following body + +```json + + { + "blueprint_name":"noa_bp_api_2", + "blueprint_type":"DOCUMENT", + "blueprint_stage":"LIVE", + "operation":"CREATE", + "schema_fields":[ // Expected output fields + { + "name":"XXXX", + "description":"XXXX.", + "alias":"XXXX" + }, + { + "name":"XXXX", + "description":"XXXX.", + "alias":"XXXX" + }, + ] + } +``` + +### CRUD operation on blueprint + +You can perform additional operations like list, get, update and delete on your Amazon Bedrock blueprints using the same stack. + +To execute these operations, invoke the Lambda function with the appropriate event payload. Each operation requires specific event parameters as detailed in the following sections. + +Note: The operation type is determined by the event structure passed to the Lambda function through either Amazon API Gateway or Amazon EventBridge + +## Eventbridge event format + +```json +{ + "Entries": [ + { + "Source": "custom.bedrock.blueprint", + "DetailType": "Bedrock Blueprint Request", + "Detail": { + "blueprint_arn": "XXXXXXXXX", + "operation": "DELETE/GET/UPDATE",// Use appropriate operation + + + } + } + ] +} +``` + +## APIGateway request body + +```json +{ + "operation":"GET/UPDATE/DELETE","blueprintArn":"XXXXXXXX", +} +``` + +## Creating Data Automation Projects + +The construct enables creation and management of Bedrock Data Automation projects by setting the `isBDAProjectRequired` property to true in the construct configuration. You can integrate this construct with either Amazon EventBridge for event-driven workflows or Amazon API Gateway for REST API operations. + +### Project Creation Event Format + +To create a new bedrock data automation project, either send an event to EventBridge or an API Gateway with following options: + +## Option1: Add Amazon EventBridge as a front-end interface to the construct + +Please use the same [stack](#creating-custom-blueprints): Option1 and replace the `existingLambdaObj` with `bdaConstruct.bdaProjectLambdaFunction`. + +Create a bda_event.json file using below event. + +```json +{ + "Entries": [ + { + "Source": "custom.bedrock.project", + "DetailType": "Bedrock Project Request", + "Detail": { + "project_name": "sample_proj", + "project_description": "Sample Project", + "project_stage": "LIVE", + "operation": "CREATE" + } + } + ] +} +``` + +publish the event using below command. + +``` + +aws events put-events --cli-input-json file://bda_event.json +``` + +## Option2: Add API Gateway as a front-end interface to the construct + +Please use the same [stack](#creating-custom-blueprints), Option2 and replace the `existingLambdaObj` with `bdaConstruct.bdaProjectLambdaFunction`. + +Publish a POST request with following body +```json + + { + "operation":"create","projectName":"bp_project_1","projectStage":"LIVE","projectDescription":"sample","customOutputConfiguration": + {"blueprints":[{"blueprintArn":"XXXXXXXX","blueprintStage":"LIVE"}]} + + +} +``` + +### CRUD operation on project + +You can perform additional operations like list, get, update and delete on your Amazon Bedrock projects using the same stack. + +To execute these operations, invoke the Lambda function with the appropriate event payload. Each operation requires specific event parameters as detailed in the following sections. + +Note: The operation type is determined by the event structure passed to the Lambda function through either Amazon API Gateway or Amazon. + +## EventBridge + +```json +{ + "operation":"delete/update/list","projectArn":"XXXXXXXX", +} +``` + +## Data Processing Invocations + +The construct enables automated data processing through Bedrock Data Automation invocations by setting the `isBDAInvocationRequired` property to true in the construct configuration. You can integrate this construct with either Amazon EventBridge for event-driven workflows or Amazon API Gateway for REST API operations. + +## Option1: Add Amazon EventBridge as a front-end interface to the construct + +Please use the same [stack](#creating-custom-blueprints): Option1 and replace the `existingLambdaObj` with `bdaConstruct.bdaInvocationLambdaFunction`. + +Create a bda_event.json file using below event and then use following cli command to push the event. +```json +{ + "Entries": [ + { + "Source": "custom.bedrock.invocation", + "DetailType": "Bedrock Invoke Request", + "Detail": { + "input_filename": "sample_input.pdf", + "output_filename": "sample_output.json", + "blueprints": [{ + "blueprint_arn":"XXXXXXX", + "stage":"LIVE" + }], + } + } + ] +} +``` + +``` +aws events put-events --cli-input-json file://bda_event.json +``` + +blueprint_arn is fetched from create bluepreint response. + +## Invoke Construct with S3 event notifications + +Note: To automatically trigger the blueprint Lambda function upon file upload to Amazon S3: + +* Enable Amazon EventBridge notifications on the input S3 bucket in your stack. + + ```python + bda_input_bucket = bda_construct.input_bucket + bda_input_bucket.enable_event_bridge_notification() +``` + +* Configure Amazon S3 Event Notifications to send events to Amazon EventBridge. + +```python +invoke_bda_event = EventbridgeToLambda(self, 'invokeBda', + existing_lambda_obj=bedrock_data_automation.bda_invocation_lambda_function, + event_rule_props={ + "event_pattern": { + "source": ["aws.s3"], + "detail_type": ["Object Created"], + "detail": { + "bucket": { + "name": [bedrock_data_automation.input_bucket.bucket_name] + }, + "object": { + "key": [{ + "suffix": ".pdf" + }] + } + } + } + } + ) + + rule = create_blueprint_event.events_rule + +``` + +* Use Amazon EventBridge input transformer to convert S3 events into the blueprint Lambda function format + +```python + rule.add_target(targets.LambdaFunction( + bedrock_data_automation.bda_invocation_lambda_function, + event=events.RuleTargetInput.from_object({ + "source": "custom.bedrock.blueprint", + "detail_type": "Bedrock Invoke Request", + "detail": json.dumps({ + "input_filename": events.EventField.from_path('$.detail.object.key'), + "output_filename": events.EventField.from_path('$.detail.object.key').replace('.pdf', '_2.csv'), + "blueprints": [{ + "blueprint_arn": blueprint_arn, + "stage": "LIVE" + }] + }) + }) + )) +``` + +## Option2: Add API Gateway as a front-end interface to the construct + +Please use the same [stack](#creating-custom-blueprints): Option2 and replace the `existingLambdaObj` with `bdaConstruct.bdaInvocationLambdaFunction`. + +## Processing Status Monitoring + +The construct provides automated status monitoring for Bedrock Data Automation processing jobs. To enable this functionality, set `isStatusRequired = true` in the construct props. + +### Status Check Event Format + +To check the status of a processing job,either send an event to EventBridge or an API Gateway with following options: + +## Option1: Add Amazon EventBridge as a front-end interface to the construct + +Typescript + +```typescript +import { EventbridgeToLambda } from '@aws-solutions-constructs/aws-eventbridge-lambda'; + + const dataResultStatusFunction = bdaConstruct.bdaResultStatuLambdaFunction + + new dataProcessingFunction(this, 'bdaResult', { + existingLambdaObj: dataResultStatusFunction, + eventRuleProps: { + eventPattern: { + source: ['custom.bedrock.blueprint'], + detailType: ['Bedrock Result Status'], + } + }, + }); + +``` +Python + +```python +EventbridgeToLambda(self, 'data_result_lambda', + existing_lambda_obj=dataResultStatusFunction + event_rule_props= { + event_pattern= { + source= ['custom.bedrock.blueprint'], + detail_type= ['Bedrock Result Status'], + } + } + ) +``` +Create a bda_result_event.json file using above event and then use following cli command to push the event. +``` +aws events put-events --cli-input-json file://bda_result_event.json +``` +invocation_arn is fetched from data processing started job. + +```json +{ + "Entries": [ + { + "Source": "custom.bedrock.blueprint", + "DetailType": "Bedrock Result Status", + "Detail": {"invocation_arn":"XXXXXX"} + } + ] +} + +``` +## Option2: Add API Gateway as a front-end interface to the construct + +```typescript +import { ApiGatewayToLambda } from '@aws-solutions-constructs/aws-apigateway-lambda'; + +new ApiGatewayToLambda(this, 'ApiGatewayToLambdaPattern', { + existingLambdaObj:dataResultStatusFunction, + apiGatewayProps:{ + restApiName: 'dataResultStatus', + } + }); +``` + +Publish a POST request with following body. +invocation_arn is fetched from data processing API response +```json + + {"invocation_arn":"XXXXXX"} +``` + +## Default properties + +### Lambda functions + +- **Memory Size**: 1024 MB +- **Timeout**: 15 minutes +- **Architecture**: x86_64 + +### S3 buckets + +if not provided, the construct will handle the creation of the S3 buckets and associated server access log buckets : + +- **Input Bucket**: + - **Name**: `${type}-documents` (where `type` is either 'input' or 'output') + - **Encryption**: S3 Managed + - **Versioned**: true + - **Block Public Access**: Block all public access + - **Removal Policy**: Destroy + - **Auto Delete Objects**: true + +- **Output Bucket**: + - **Name**: `${type}-documents` (where `type` is either 'input' or 'output') + - **Encryption**: S3 Managed + - **Versioned**: true + - **Block Public Access**: Block all public access + - **Removal Policy**: Destroy + - **Auto Delete Objects**: true + +## Cost + +You are responsible for the cost of the AWS services used while running this construct. + +We recommend creating a budget through [AWS Cost Explorer](http://aws.amazon.com/aws-cost-management/aws-cost-explorer/) to help manage costs. Prices are subject to change. For full details, refer to the pricing webpage for each AWS service used in this solution: + +- [AWS Lambda pricing](https://aws.amazon.com/lambda/pricing/) +- [Amazon CloudWatch pricing](https://aws.amazon.com/cloudwatch/pricing/) +- [Amazon Bedrock pricing](https://aws.amazon.com/bedrock/pricing/) + +## Supported AWS Regions + +Amazon Bedrock Data Automation is currently available only in US West (Oregon) Region - `us-west-2`. When deploying this construct, ensure your AWS CDK application is configured to deploy in the US West (Oregon) Region. If you attempt to deploy this construct in any other region, it will fail as the underlying Amazon Bedrock Data Automation service is not available. This regional limitation applies to all components of the construct, including blueprint creation, project management, data processing invocations, and status monitoring functionalities. We recommend monitoring the AWS Regional Services List for updates on regional availability expansions. + +Note: While the construct must be deployed in US West (Oregon), you can still process data stored in S3 buckets from other regions, though this may incur additional cross-region data transfer costs. + + +## Security + +When you build systems on AWS infrastructure, security responsibilities are shared between you and AWS. This [shared responsibility](http://aws.amazon.com/compliance/shared-responsibility-model/) model reduces your operational burden because AWS operates, manages, and controls the components including the host operating system, virtualization layer, and physical security of the facilities in which the services operate. For more information about AWS security, visit [AWS Cloud Security](http://aws.amazon.com/security/). + +Optionnaly, you can provide existing resources to the constructs (marked optional in the construct pattern props). If you chose to do so, please refer to the official documentation on best practices to secure each service: + +- [Amazon CloudWatch](https://docs.aws.amazon.com/AmazonCloudWatch/latest/monitoring/security.html) + +If you grant access to a user to your account where this construct is deployed, this user may access information stored by the construct (Amazon CloudWatch logs). To help secure your AWS resources, please follow the best practices for [AWS Identity and Access Management (IAM)](https://docs.aws.amazon.com/IAM/latest/UserGuide/best-practices.html). + +AWS CloudTrail provides a number of security features to consider as you develop and implement your own security policies. Please follow the related best practices through the [official documentation](https://docs.aws.amazon.com/awscloudtrail/latest/userguide/best-practices-security.html). + + +## Quotas + +Service quotas, also referred to as limits, are the maximum number of service resources or operations for your AWS account. + +Make sure you have sufficient quota for each of the services implemented in this solution. For more information, refer to [AWS service quotas](https://docs.aws.amazon.com/general/latest/gr/aws_service_limits.html). + +To view the service quotas for all AWS services in the documentation without switching pages, view the information in the [Service endpoints and quotas](https://docs.aws.amazon.com/general/latest/gr/aws-general.pdf#aws-service-information) page in the PDF instead. + +--- + +© Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. diff --git a/src/patterns/gen-ai/aws-bedrock-data-automation/architecture.png b/src/patterns/gen-ai/aws-bedrock-data-automation/architecture.png new file mode 100644 index 00000000..83cf55b9 Binary files /dev/null and b/src/patterns/gen-ai/aws-bedrock-data-automation/architecture.png differ diff --git a/src/patterns/gen-ai/aws-bedrock-data-automation/bda-blueprint-lambda.ts b/src/patterns/gen-ai/aws-bedrock-data-automation/bda-blueprint-lambda.ts new file mode 100644 index 00000000..6e8cea41 --- /dev/null +++ b/src/patterns/gen-ai/aws-bedrock-data-automation/bda-blueprint-lambda.ts @@ -0,0 +1,172 @@ +/** + * Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"). You may not use this file except in compliance + * with the License. A copy of the License is located at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * or in the 'license' file accompanying this file. This file is distributed on an 'AS IS' BASIS, WITHOUT WARRANTIES + * OR CONDITIONS OF ANY KIND, express or implied. See the License for the specific language governing permissions + * and limitations under the License. + */ + +import * as path from 'path'; +import { aws_iam as iam, aws_lambda as lambda, Duration, Aws } from 'aws-cdk-lib'; +import * as s3 from 'aws-cdk-lib/aws-s3'; +import { NagSuppressions } from 'cdk-nag'; +import { Construct } from 'constructs'; + +/** + * Properties for creating a BdaBlueprintLambda + */ +export interface BdaBlueprintLambdaProps { + /** + * The S3 bucket + * for input data used by the Bedrock Data Automation process. + * If not provided, a new bucket will be created. + */ + readonly inputBucket: s3.IBucket; + /** + * The layers to apply to this lambda function. + */ + readonly lambdaLayers: lambda.ILayerVersion[]; +} + +/** + * Lambda function that manages BDA blueprint creation + */ +export class BdaBlueprintLambda extends lambda.Function { + + constructor(scope: Construct, id: string, props: BdaBlueprintLambdaProps) { + + const role = new iam.Role( + scope, + `${id}createBlueprint`, + { + assumedBy: new iam.ServicePrincipal('lambda.amazonaws.com'), + }, + ); + + super(scope, id, { + + runtime: lambda.Runtime.PYTHON_3_13, + handler: 'lambda.handler', + code: lambda.Code.fromAsset(path.join(__dirname, '../../../../lambda/aws-bedrock-data-automation/create-blueprint')), + layers: props.lambdaLayers, + environment: { + INPUT_BUCKET: props.inputBucket.bucketName, + POWERTOOLS_SERVICE_NAME: 'BEDROCK_BLUEPRINT', + }, + memorySize: 1024, + role: role, + architecture: lambda.Architecture.X86_64, + timeout: Duration.minutes(15), + }); + + // Add basic permissions for CloudWatch logs + const cloudwatchLogsPolicy = new iam.Policy( + scope, + `${id}LambdaBasicExecPolicy`, + { + statements: [ + new iam.PolicyStatement({ + effect: iam.Effect.ALLOW, + actions: [ + 'logs:CreateLogGroup', + 'logs:CreateLogStream', + 'logs:PutLogEvents', + ], + resources: [ + `arn:aws:logs:${Aws.REGION}:${Aws.ACCOUNT_ID}:log-group:/aws/lambda/${this.functionName}`, + `arn:aws:logs:${Aws.REGION}:${Aws.ACCOUNT_ID}:log-group:/aws/lambda/${this.functionName}:*`, + ], + }), + ], + }, + ); + + NagSuppressions.addResourceSuppressions( + cloudwatchLogsPolicy, + [{ id: 'AwsSolutions-IAM5', reason: 'Lambda requires CloudWatch logs permissions with log group name patterns' }], + ); + + role.attachInlinePolicy(cloudwatchLogsPolicy); + + // Permissions for BDA + const BedrockBDABPPolicy = new iam.Policy( + scope, + `${id}BedrockBDABPPolicy`, + { + statements: [ + new iam.PolicyStatement({ + effect: iam.Effect.ALLOW, + actions: [ + 'bedrock:ListBlueprints', + 'bedrock:DeleteBlueprint', + 'bedrock:InvokeBlueprint', + 'bedrock:ListBlueprintInvocations', + 'bedrock:GetBlueprintInvocation', + ], + resources: ['*'], + }), + ], + }, + ); + + NagSuppressions.addResourceSuppressions( + BedrockBDABPPolicy, + [{ + id: 'AwsSolutions-IAM5', + reason: 'Bedrock Blueprint operations require access to all blueprints as resource-level permissions are not supported', + }], + true, + ); + + role.attachInlinePolicy(BedrockBDABPPolicy); + + + const bedrockBDABPVersionPolicy = new iam.Policy( + scope, + `${id}BedrockBDABPVersionPolicy`, + { + statements: [ + new iam.PolicyStatement({ + effect: iam.Effect.ALLOW, + actions: [ + 'bedrock:CreateBlueprint', + 'bedrock:CreateBlueprintVersion', + ], + resources: ['*'], + }), + ], + }, + ); + + NagSuppressions.addResourceSuppressions( + bedrockBDABPVersionPolicy, + [{ + id: 'AwsSolutions-IAM5', + reason: 'Bedrock Blueprint version creation operations require access to all blueprints as resource-level permissions are not supported', + }], + true, + ); + + + role.attachInlinePolicy(bedrockBDABPVersionPolicy); + + // Give Lambda access to the bucket + if (this.role) { + props.inputBucket.grantRead(this.role); + } + + NagSuppressions.addResourceSuppressions( + role, + [{ + id: 'AwsSolutions-IAM5', + reason: 'Lambda needs read access to process files from the input bucket', + }], + true, + ); + } +} \ No newline at end of file diff --git a/src/patterns/gen-ai/aws-bedrock-data-automation/bda-data-processing-lambda.ts b/src/patterns/gen-ai/aws-bedrock-data-automation/bda-data-processing-lambda.ts new file mode 100644 index 00000000..b964ab57 --- /dev/null +++ b/src/patterns/gen-ai/aws-bedrock-data-automation/bda-data-processing-lambda.ts @@ -0,0 +1,147 @@ +/** + * Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"). You may not use this file except in compliance + * with the License. A copy of the License is located at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * or in the 'license' file accompanying this file. This file is distributed on an 'AS IS' BASIS, WITHOUT WARRANTIES + * OR CONDITIONS OF ANY KIND, express or implied. See the License for the specific language governing permissions + * and limitations under the License. + */ + +import * as path from 'path'; +import { aws_iam as iam, aws_lambda as lambda, Duration, Aws } from 'aws-cdk-lib'; +import * as s3 from 'aws-cdk-lib/aws-s3'; +import { NagSuppressions } from 'cdk-nag'; +import { Construct } from 'constructs'; + +/** + * Properties for creating a BdaBlueprintLambda + */ +export interface BdaDataProcessingLambdaProps { + /** + * The S3 bucket + * for input data used by the Bedrock Data Automation process. + * If not provided, a new bucket will be created. + */ + readonly inputBucket: s3.IBucket; + /** + * The S3 bucket + * for input data used by the Bedrock Data Automation process. + * If not provided, a new bucket will be created. + */ + readonly outputBucket: s3.IBucket; + /** + * The layers to apply to this lambda function. + */ + readonly lambdaLayers: lambda.ILayerVersion[]; +} + +/** + * Lambda function that manages BDA data processing + */ +export class BdaDataProcessingLambda extends lambda.Function { + + constructor(scope: Construct, id: string, props: BdaDataProcessingLambdaProps) { + + const role = new iam.Role( + scope, + `${id}createDataProcessing`, + { + assumedBy: new iam.ServicePrincipal('lambda.amazonaws.com'), + }, + ); + + super(scope, id, { + runtime: lambda.Runtime.PYTHON_3_13, + handler: 'lambda.handler', // Adjust this based on your actual handler + code: lambda.Code.fromAsset(path.join(__dirname, '../../../../lambda/aws-bedrock-data-automation/data_processing')), + layers: props.lambdaLayers, + environment: { + INPUT_BUCKET: props.inputBucket.bucketName, + OUTPUT_BUCKET: props.outputBucket.bucketName, + OUTPUT_FILENAME: '', + POWERTOOLS_SERVICE_NAME: 'BEDROCK_INVOKE', + }, + memorySize: 1024, + role: role, + architecture: lambda.Architecture.X86_64, + timeout: Duration.minutes(15), + }); + + // Add basic permissions for CloudWatch logs + const cloudwatchLogsPolicy = new iam.Policy( + scope, + `${id}LambdaBasicExecPolicy`, + { + statements: [ + new iam.PolicyStatement({ + effect: iam.Effect.ALLOW, + actions: [ + 'logs:CreateLogGroup', + 'logs:CreateLogStream', + 'logs:PutLogEvents', + ], + resources: [ + `arn:aws:logs:${Aws.REGION}:${Aws.ACCOUNT_ID}:log-group:/aws/lambda/${this.functionName}`, + `arn:aws:logs:${Aws.REGION}:${Aws.ACCOUNT_ID}:log-group:/aws/lambda/${this.functionName}:*`, + ], + }), + ], + }, + ); + + NagSuppressions.addResourceSuppressions( + cloudwatchLogsPolicy, + [{ id: 'AwsSolutions-IAM5', reason: 'Lambda requires CloudWatch logs permissions with log group name patterns' }], + ); + + role.attachInlinePolicy(cloudwatchLogsPolicy); + + // Permissions for BDA + const bedrockBDAPolicy = new iam.Policy( + scope, + `${id}BedrockBDAProcessingPolicy`, + { + statements: [ + new iam.PolicyStatement({ + effect: iam.Effect.ALLOW, + actions: [ + 'bedrock:InvokeDataAutomationAsync', + ], + resources: ['*'], + }), + ], + }, + ); + + NagSuppressions.addResourceSuppressions( + bedrockBDAPolicy, + [{ + id: 'AwsSolutions-IAM5', + reason: 'Invocation Lambda needs access for data processing operations', + }], + true, + ); + + role.attachInlinePolicy(bedrockBDAPolicy); + + // Give Lambda access to the buckets + if (this.role) { + props.inputBucket.grantReadWrite(this.role); + props.outputBucket.grantReadWrite(this.role); + } + + + NagSuppressions.addResourceSuppressions( + role, + [{ + id: 'AwsSolutions-IAM5', + reason: 'Lambda needs read access to process files from the input bucket', + }], + true, + ); + } +} \ No newline at end of file diff --git a/src/patterns/gen-ai/aws-bedrock-data-automation/bda-project-lambda.ts b/src/patterns/gen-ai/aws-bedrock-data-automation/bda-project-lambda.ts new file mode 100644 index 00000000..80b7c431 --- /dev/null +++ b/src/patterns/gen-ai/aws-bedrock-data-automation/bda-project-lambda.ts @@ -0,0 +1,141 @@ +/** + * Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"). You may not use this file except in compliance + * with the License. A copy of the License is located at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * or in the 'license' file accompanying this file. This file is distributed on an 'AS IS' BASIS, WITHOUT WARRANTIES + * OR CONDITIONS OF ANY KIND, express or implied. See the License for the specific language governing permissions + * and limitations under the License. + */ + +import * as path from 'path'; +import { aws_iam as iam, aws_lambda as lambda, Duration, Aws } from 'aws-cdk-lib'; +import * as s3 from 'aws-cdk-lib/aws-s3'; +import { NagSuppressions } from 'cdk-nag'; +import { Construct } from 'constructs'; + +/** + * Properties for creating a BdaProjectLambdaProps + */ +export interface BdaProjectLambdaProps { + /** + * The S3 bucket + * for input data used by the Bedrock Data Automation process. + * If not provided, a new bucket will be created. + */ + readonly inputBucket: s3.IBucket; + /** + * The layers to apply to this lambda function. + */ + readonly lambdaLayers: lambda.ILayerVersion[]; +} + +/** + * Lambda function that manages BDA project creation + */ +export class BdaProjectLambda extends lambda.Function { + + constructor(scope: Construct, id: string, props: BdaProjectLambdaProps) { + + const role = new iam.Role( + scope, + `${id}createProject`, + { + assumedBy: new iam.ServicePrincipal('lambda.amazonaws.com'), + }, + ); + + super(scope, id, { + + runtime: lambda.Runtime.PYTHON_3_13, + handler: 'lambda.handler', + code: lambda.Code.fromAsset(path.join(__dirname, '../../../../lambda/aws-bedrock-data-automation/create_project')), + layers: props.lambdaLayers, + environment: { + INPUT_BUCKET: props.inputBucket.bucketName, + POWERTOOLS_SERVICE_NAME: 'BEDROCK_PROJECT', + }, + memorySize: 1024, + role: role, + architecture: lambda.Architecture.X86_64, + timeout: Duration.minutes(15), + }); + + // Add basic permissions for CloudWatch logs + const cloudwatchLogsPolicy = new iam.Policy( + scope, + `${id}LambdaBasicExecPolicy`, + { + statements: [ + new iam.PolicyStatement({ + effect: iam.Effect.ALLOW, + actions: [ + 'logs:CreateLogGroup', + 'logs:CreateLogStream', + 'logs:PutLogEvents', + ], + resources: [ + `arn:aws:logs:${Aws.REGION}:${Aws.ACCOUNT_ID}:log-group:/aws/lambda/${this.functionName}`, + `arn:aws:logs:${Aws.REGION}:${Aws.ACCOUNT_ID}:log-group:/aws/lambda/${this.functionName}:*`, + ], + }), + ], + }, + ); + + NagSuppressions.addResourceSuppressions( + cloudwatchLogsPolicy, + [{ id: 'AwsSolutions-IAM5', reason: 'Lambda requires CloudWatch logs permissions with log group name patterns' }], + ); + + role.attachInlinePolicy(cloudwatchLogsPolicy); + + // Permissions for BDA + const bedrockBDAPolicy = new iam.Policy( + scope, + `${id}BedrockBDAProjectPolicy`, + { + statements: [ + new iam.PolicyStatement({ + effect: iam.Effect.ALLOW, + actions: [ + 'bedrock:CreateDataAutomationProject', + 'bedrock:ListDataAutomationProjects', + 'bedrock:DeleteDataAutomationProject', + 'bedrock:GetDataAutomationProject', + ], + resources: ['*'], + }), + ], + }, + ); + + NagSuppressions.addResourceSuppressions( + bedrockBDAPolicy, + [{ + id: 'AwsSolutions-IAM5', + reason: 'Project Lambda need access for managing data processing projects', + }], + true, + ); + role.attachInlinePolicy(bedrockBDAPolicy); + + // Give Lambda access to the bucket + if (this.role) { + props.inputBucket.grantReadWrite(this.role); + } + + NagSuppressions.addResourceSuppressions( + role, + [{ + id: 'AwsSolutions-IAM5', + reason: 'Lambda needs read access to process files from the input bucket', + }], + true, + ); + + } +} \ No newline at end of file diff --git a/src/patterns/gen-ai/aws-bedrock-data-automation/bda-results-lambda.ts b/src/patterns/gen-ai/aws-bedrock-data-automation/bda-results-lambda.ts new file mode 100644 index 00000000..9da4fb04 --- /dev/null +++ b/src/patterns/gen-ai/aws-bedrock-data-automation/bda-results-lambda.ts @@ -0,0 +1,128 @@ +/** + * Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"). You may not use this file except in compliance + * with the License. A copy of the License is located at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * or in the 'license' file accompanying this file. This file is distributed on an 'AS IS' BASIS, WITHOUT WARRANTIES + * OR CONDITIONS OF ANY KIND, express or implied. See the License for the specific language governing permissions + * and limitations under the License. + */ + +import * as path from 'path'; +import { aws_iam as iam, aws_lambda as lambda, Duration, Aws } from 'aws-cdk-lib'; +import * as s3 from 'aws-cdk-lib/aws-s3'; +import { NagSuppressions } from 'cdk-nag'; +import { Construct } from 'constructs'; + + +/** + * Properties for creating a BdaBlueprintLambda + */ +export interface BdaResultsLambdaProps { + /** + * The layers to apply to this lambda function. + */ + readonly lambdaLayers: lambda.ILayerVersion[]; + /** + * The S3 bucket + * Output bucket to publish the generated result + * by Bedrock Data Automation process. + */ + readonly outputBucket: s3.IBucket; +} + +/** + * Lambda function that manages BDA results + */ +export class BdaResultsambda extends lambda.Function { + + constructor(scope: Construct, id: string, props: BdaResultsLambdaProps) { + + const role = new iam.Role( + scope, + `${id}bdaResults`, + { + assumedBy: new iam.ServicePrincipal('lambda.amazonaws.com'), + }, + ); + + super(scope, id, { + + runtime: lambda.Runtime.PYTHON_3_13, + handler: 'lambda.handler', + code: lambda.Code.fromAsset(path.join(__dirname, '../../../../lambda/aws-bedrock-data-automation/data_result')), + layers: props.lambdaLayers, + environment: { + POWERTOOLS_SERVICE_NAME: 'BEDROCK_RESULT', + }, + memorySize: 1024, + role: role, + architecture: lambda.Architecture.X86_64, + timeout: Duration.minutes(15), + }); + + // Add basic permissions for CloudWatch logs + const cloudwatchLogsPolicy = new iam.Policy( + scope, + `${id}LambdaBasicExecPolicy`, + { + statements: [ + new iam.PolicyStatement({ + effect: iam.Effect.ALLOW, + actions: [ + 'logs:CreateLogGroup', + 'logs:CreateLogStream', + 'logs:PutLogEvents', + ], + resources: [ + `arn:aws:logs:${Aws.REGION}:${Aws.ACCOUNT_ID}:log-group:/aws/lambda/${this.functionName}`, + `arn:aws:logs:${Aws.REGION}:${Aws.ACCOUNT_ID}:log-group:/aws/lambda/${this.functionName}:*`, + ], + }), + ], + }, + ); + + NagSuppressions.addResourceSuppressions( + cloudwatchLogsPolicy, + [{ id: 'AwsSolutions-IAM5', reason: 'Lambda requires CloudWatch logs permissions with log group name patterns' }], + ); + + role.attachInlinePolicy(cloudwatchLogsPolicy); + + // Permissions for BDA + const bedrockBDAPolicy = new iam.Policy( + scope, + `${id}BDAStatusPolicy`, + { + statements: [ + new iam.PolicyStatement({ + effect: iam.Effect.ALLOW, + actions: [ + 'bedrock:GetDataAutomationStatus', + ], + resources: ['*'], + }), + ], + }, + ); + + NagSuppressions.addResourceSuppressions( + bedrockBDAPolicy, + [{ + id: 'AwsSolutions-IAM5', + reason: 'Lambda needs access for data processing and checking status', + }], + true, + ); + + role.attachInlinePolicy(bedrockBDAPolicy); + + if (this.role) { + props.outputBucket.grantReadWrite(this.role); + } + } +} \ No newline at end of file diff --git a/src/patterns/gen-ai/aws-bedrock-data-automation/index.ts b/src/patterns/gen-ai/aws-bedrock-data-automation/index.ts new file mode 100644 index 00000000..ca1a3125 --- /dev/null +++ b/src/patterns/gen-ai/aws-bedrock-data-automation/index.ts @@ -0,0 +1,229 @@ +/** + * Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"). You may not use this file except in compliance + * with the License. A copy of the License is located at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * or in the 'license' file accompanying this file. This file is distributed on an 'AS IS' BASIS, WITHOUT WARRANTIES + * OR CONDITIONS OF ANY KIND, express or implied. See the License for the specific language governing permissions + * and limitations under the License. + */ + +import * as path from 'path'; +import { PythonLayerVersion } from '@aws-cdk/aws-lambda-python-alpha'; +import * as cdk from 'aws-cdk-lib'; +import { Aws } from 'aws-cdk-lib'; +import * as lambda from 'aws-cdk-lib/aws-lambda'; +import * as s3 from 'aws-cdk-lib/aws-s3'; +import { md5hash } from 'aws-cdk-lib/core/lib/helpers-internal'; +import { Construct } from 'constructs'; +import { BdaBlueprintLambda } from './bda-blueprint-lambda'; +import { BdaDataProcessingLambda } from './bda-data-processing-lambda'; +import { BdaProjectLambda } from './bda-project-lambda'; +import { BdaResultsambda } from './bda-results-lambda'; +import { BaseClass } from '../../../common/base-class'; + +/****************************************************************************** + * PROPS FOR NEW CONSTRUCT + *****************************************************************************/ +/** + * Properties for creating a CDK BDA construct. + */ +export interface BedrockDataAutomationProps { + /** + * - Optional. The S3 bucket + * for input data used by the Bedrock Data Automation process. + * If not provided, a new bucket will be created. + */ + readonly inputBucket?: s3.IBucket; + /** + * - Optional. The S3 bucket for storing + * output files generated by the Bedrock Data Automation process. + */ + readonly outputBucket?: s3.IBucket; + /** + * - Optional. Indicates whether a custom + * Bedrock Data Automation blueprint is required. If true, the necessary resources will be created. + * + * @default - false + */ + readonly isCustomBDABlueprintRequired?: boolean; + /** + * - Optional. Indicates whether a Bedrock Data + * Automation project is required. If true, the necessary resources will be created. + * + * @default - false + */ + readonly isBDAProjectRequired?: boolean; + /** + * - Optional. Indicates whether a Bedrock Data + * Automation invocation is required. If true, the necessary resources will be created. + * + * @default - false + */ + readonly isBDAInvocationRequired?: boolean; + /** + * - Optional. Indicates whether the status of the + * Bedrock Data Automation process is required. If true, the necessary resources will be created. + * + * @default - false + */ + readonly isStatusRequired?: boolean; +}; + +/****************************************************************************** + * NEW CONSTRUCT DEFINITION + *****************************************************************************/ +/** + * Class to create a BDA pattern with CDK. + */ +export class BedrockDataAutomation extends BaseClass { + // ------------------------------------------------------ + // Attributes + // ------------------------------------------------------ + /** + * The S3 bucket for input data used by the Bedrock Data Automation process. + * IMPORTANT: If isCustomBDABlueprintRequired or isBDAInvocationRequired are set to false in Pattern Construct Props, + * this property will be undefined + */ + public readonly bdaInputBucket?: s3.IBucket; + /** + * The S3 bucket for output data generated by the Bedrock Data Automation process. + * IMPORTANT: If isBDAInvocationRequired is set to false in Pattern Construct Props, + * this property will be undefined + */ + public readonly bdaOutputBucket?: s3.IBucket; + /** + * The Lambda function responsible for creating the Bedrock Data Automation blueprint. + * IMPORTANT: If isCustomBDABlueprintRequired is set to false in Pattern Construct Props, + * this property will be undefined + */ + public readonly bdaBlueprintLambdaFunction?: lambda.Function; + /** + * The Lambda function responsible for handling the Bedrock Data Automation project. + * IMPORTANT: If isBDAProjectRequired is set to false in Pattern Construct Props, + * this property will be undefined + */ + public readonly bdaProjectFunction?: lambda.Function; + /** + * The Lambda function responsible for invoking the Bedrock Data Automation process. + * IMPORTANT: If isBDAInvocationRequired is set to false in Pattern Construct Props, + * this property will be undefined + */ + public readonly bdaInvocationFunction?: lambda.Function; + /** + * The Lambda function responsible for checking the status of the Bedrock Data Automation process. + * IMPORTANT: If isStatusRequired is set to false in Pattern Construct Props, + * this property will be undefined + */ + public readonly bdaResultStatusFunction?: lambda.Function; + /** + * The AWS Lambda Powertools layer used in the Lambda functions. + */ + public readonly powertoolsLayer: lambda.ILayerVersion; + /** + * The Boto3 layer used in the Lambda functions for AWS SDK interactions. + */ + public readonly boto3Layer: lambda.LayerVersion; + // ------------------------------------------------------ + // CONSTRUCTOR + // ------------------------------------------------------ + constructor(scope: Construct, id: string, props: BedrockDataAutomationProps) { + super(scope, id); + + // ------------------------------------------------------ + // Set properties and defaults + // ------------------------------------------------------ + this.powertoolsLayer = lambda.LayerVersion.fromLayerVersionArn(this, 'PowertoolsLayer', + `arn:aws:lambda:${cdk.Stack.of(this).region}:017000801446:layer:AWSLambdaPowertoolsPythonV3-python313-x86_64:8`, + ); + + this.boto3Layer = new PythonLayerVersion(this, 'Boto3Layer', { + entry: path.join(__dirname, '../../../../layer'), + compatibleRuntimes: [lambda.Runtime.PYTHON_3_13], + description: 'Latest boto3 layer for Bedrock Data Automation', + removalPolicy: cdk.RemovalPolicy.DESTROY, + }); + + // Compute hash used for bucket name + const hash = md5hash(id + Aws.ACCOUNT_ID + Aws.REGION); + + // Manage input bucket if needed + if (props.isCustomBDABlueprintRequired || props.isBDAInvocationRequired) {this.bdaInputBucket = this.handleS3Bucket(props.inputBucket, 'input', hash);} + + // Manage output bucket if needed + if (props.isCustomBDABlueprintRequired || props.isBDAInvocationRequired) {this.bdaOutputBucket = this.handleS3Bucket(props.outputBucket, 'output', hash);} + + if (props.isCustomBDABlueprintRequired && this.bdaInputBucket) { + this.bdaBlueprintLambdaFunction = new BdaBlueprintLambda(this, 'bdablueprintlambda', { + inputBucket: this.bdaInputBucket, + lambdaLayers: [this.powertoolsLayer, this.boto3Layer], + }); + } + if (props.isBDAProjectRequired && this.bdaInputBucket) { + this.bdaProjectFunction = new BdaProjectLambda(this, 'bdaprojectlambda', { + inputBucket: this.bdaInputBucket, + lambdaLayers: [this.powertoolsLayer, this.boto3Layer], + }); + } + if (props.isBDAInvocationRequired && this.bdaInputBucket && this.bdaOutputBucket) { + this.bdaInvocationFunction = new BdaDataProcessingLambda(this, 'bdainvocationlambda', { + inputBucket: this.bdaInputBucket, + outputBucket: this.bdaOutputBucket, + lambdaLayers: [this.powertoolsLayer, this.boto3Layer], + }); + } + if (props.isStatusRequired && this.bdaOutputBucket) { + this.bdaResultStatusFunction = new BdaResultsambda(this, 'bdaresultslambda', { + lambdaLayers: [this.powertoolsLayer, this.boto3Layer], + outputBucket: this.bdaOutputBucket, + }); + } + } + + /** + * Handles the creation or retrieval of an S3 bucket. + * + * @param existing_bucket - An optional existing S3 bucket to use. + * @param type - A string indicating the type of bucket (e.g., 'input' or 'output'). + * @returns The existing bucket if provided, or a newly created S3 bucket. + */ + private handleS3Bucket(existing_bucket: s3.IBucket | undefined, type: string, hash: string): s3.IBucket { + + if (existing_bucket) { + return existing_bucket; + } else { + // bucket for storing server access logging + const serverAccessLogBucket = new s3.Bucket( + this, + `${hash}-${type}-serveraccesslogbucket`, + { + blockPublicAccess: s3.BlockPublicAccess.BLOCK_ALL, + encryption: s3.BucketEncryption.S3_MANAGED, + enforceSSL: true, + versioned: true, + lifecycleRules: [ + { + expiration: cdk.Duration.days(90), + }, + ], + }, + ); + + // create the bucket + return new s3.Bucket(this, `${hash}-${type}-bucket`, { + encryption: s3.BucketEncryption.S3_MANAGED, + enforceSSL: true, + blockPublicAccess: s3.BlockPublicAccess.BLOCK_ALL, + versioned: true, + serverAccessLogsBucket: serverAccessLogBucket, + serverAccessLogsPrefix: `${type}-bucket-logs/`, + objectOwnership: s3.ObjectOwnership.BUCKET_OWNER_ENFORCED, + removalPolicy: cdk.RemovalPolicy.DESTROY, + autoDeleteObjects: true, + }); + } + } +} diff --git a/src/patterns/gen-ai/aws-model-deployment-sagemaker/code-generation/generate-dlc-container-images.ts b/src/patterns/gen-ai/aws-model-deployment-sagemaker/code-generation/generate-dlc-container-images.ts index f5d7550f..1592e906 100644 --- a/src/patterns/gen-ai/aws-model-deployment-sagemaker/code-generation/generate-dlc-container-images.ts +++ b/src/patterns/gen-ai/aws-model-deployment-sagemaker/code-generation/generate-dlc-container-images.ts @@ -173,6 +173,7 @@ function generateCode(repositoryTagData: { [repositoryName: string]: string[] }) * OR CONDITIONS OF ANY KIND, express or implied. See the License for the specific language governing permissions * and limitations under the License. */ + import { Stack } from 'aws-cdk-lib'; import * as ecr from 'aws-cdk-lib/aws-ecr'; import * as iam from 'aws-cdk-lib/aws-iam'; diff --git a/src/patterns/gen-ai/aws-model-deployment-sagemaker/code-generation/generate-jumpstart-models.ts b/src/patterns/gen-ai/aws-model-deployment-sagemaker/code-generation/generate-jumpstart-models.ts index ab02c6bb..5a4f3cb4 100644 --- a/src/patterns/gen-ai/aws-model-deployment-sagemaker/code-generation/generate-jumpstart-models.ts +++ b/src/patterns/gen-ai/aws-model-deployment-sagemaker/code-generation/generate-jumpstart-models.ts @@ -254,6 +254,7 @@ function generateCode() { * OR CONDITIONS OF ANY KIND, express or implied. See the License for the specific language governing permissions * and limitations under the License. */ + import * as zlib from 'zlib'; import * as data from './jumpstart-models.json'; diff --git a/test/patterns/gen-ai/aws-bedrock-data-automation/aws-bedrock-data-automation-test.ts b/test/patterns/gen-ai/aws-bedrock-data-automation/aws-bedrock-data-automation-test.ts new file mode 100644 index 00000000..a0d6c9ce --- /dev/null +++ b/test/patterns/gen-ai/aws-bedrock-data-automation/aws-bedrock-data-automation-test.ts @@ -0,0 +1,209 @@ +/** + * Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"). You may not use this file except in compliance + * with the License. A copy of the License is located at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * or in the 'license' file accompanying this file. This file is distributed on an 'AS IS' BASIS, WITHOUT WARRANTIES + * OR CONDITIONS OF ANY KIND, express or implied. See the License for the specific language governing permissions + * and limitations under the License. + */ + +import * as cdk from 'aws-cdk-lib'; +import { Template } from 'aws-cdk-lib/assertions'; +import * as s3 from 'aws-cdk-lib/aws-s3'; +import { AwsSolutionsChecks } from 'cdk-nag'; +import { BedrockDataAutomation } from '../../../../src/patterns/gen-ai/aws-bedrock-data-automation'; + +describe('BedrockDataAutomation Construct', () => { + let app: cdk.App; + let stack: cdk.Stack; + let template: Template; + + beforeEach(() => { + app = new cdk.App(); + cdk.Aspects.of(app).add(new AwsSolutionsChecks()); + stack = new cdk.Stack(app, 'undefined', { + env: { account: cdk.Aws.ACCOUNT_ID, region: cdk.Aws.REGION }, + }); + }); + + describe('Blueprint Resources', () => { + beforeEach(() => { + // Create construct with only blueprint required + new BedrockDataAutomation(stack, 'TestConstruct', { + isCustomBDABlueprintRequired: true, + isBDAProjectRequired: false, + isBDAInvocationRequired: false, + isStatusRequired: false, + }); + template = Template.fromStack(stack); + }); + + test('creates input bucket when no bucket name provided', () => { + template.hasResourceProperties('AWS::S3::Bucket', { + BucketEncryption: { + ServerSideEncryptionConfiguration: [ + { + ServerSideEncryptionByDefault: { + SSEAlgorithm: 'AES256', + }, + }, + ], + }, + PublicAccessBlockConfiguration: { + BlockPublicAcls: true, + BlockPublicPolicy: true, + IgnorePublicAcls: true, + RestrictPublicBuckets: true, + }, + VersioningConfiguration: { + Status: 'Enabled', + }, + }); + }); + + test('creates blueprint lambda function', () => { + template.hasResourceProperties('AWS::Lambda::Function', { + Handler: 'index.handler', + Runtime: 'nodejs18.x', + Environment: { + Variables: { + INPUT_BUCKET: { + Ref: expect.any(String), + }, + }, + }, + }); + }); + + test('creates lambda role with correct policies', () => { + template.hasResourceProperties('AWS::IAM::Role', { + AssumeRolePolicyDocument: { + Statement: [ + { + Action: 'sts:AssumeRole', + Effect: 'Allow', + Principal: { + Service: 'lambda.amazonaws.com', + }, + }, + ], + }, + }); + + // Check for S3 policy + template.hasResourceProperties('AWS::IAM::Policy', { + PolicyDocument: { + Statement: expect.arrayContaining([ + expect.objectContaining({ + Action: [ + 's3:GetObject', + 's3:PutObject', + 's3:ListBucket', + ], + Effect: 'Allow', + }), + ]), + }, + }); + + // Check for Bedrock policy + template.hasResourceProperties('AWS::IAM::Policy', { + PolicyDocument: { + Statement: expect.arrayContaining([ + expect.objectContaining({ + Action: ['bedrock:*'], + Effect: 'Allow', + Resource: '*', + }), + ]), + }, + }); + }); + }); + + describe('Project Resources', () => { + beforeEach(() => { + new BedrockDataAutomation(stack, 'TestConstruct', { + isCustomBDABlueprintRequired: false, + isBDAProjectRequired: true, + isBDAInvocationRequired: false, + isStatusRequired: false, + }); + template = Template.fromStack(stack); + }); + + test('creates project function role with correct policies', () => { + template.hasResourceProperties('AWS::IAM::Role', { + AssumeRolePolicyDocument: { + Statement: [ + { + Action: 'sts:AssumeRole', + Effect: 'Allow', + Principal: { + Service: 'lambda.amazonaws.com', + }, + }, + ], + }, + }); + }); + }); + + describe('Invocation Resources', () => { + test('throws error when input bucket not provided', () => { + expect(() => { + new BedrockDataAutomation(stack, 'TestConstruct', { + isCustomBDABlueprintRequired: false, + isBDAProjectRequired: false, + isBDAInvocationRequired: true, + isStatusRequired: false, + }); + }).toThrow('Input bucket is required when isBDAInvocationRequired is true'); + }); + + test('creates output bucket and uses existing input bucket', () => { + const inputBucket = new s3.Bucket(stack, 'testBucket'); + + new BedrockDataAutomation(stack, 'TestConstruct', { + isCustomBDABlueprintRequired: false, + isBDAProjectRequired: false, + isBDAInvocationRequired: true, + isStatusRequired: false, + inputBucket: inputBucket, + }); + + template = Template.fromStack(stack); + + // Check output bucket is created + template.hasResourceProperties('AWS::S3::Bucket', { + BucketEncryption: { + ServerSideEncryptionConfiguration: [ + { + ServerSideEncryptionByDefault: { + SSEAlgorithm: 'AES256', + }, + }, + ], + }, + }); + }); + }); + + describe('Resource Access', () => { + test('allows access to created resources', () => { + const construct = new BedrockDataAutomation(stack, 'TestConstruct', { + isCustomBDABlueprintRequired: true, + isBDAProjectRequired: false, + isBDAInvocationRequired: false, + isStatusRequired: false, + }); + + expect(construct.bdaInputBucket).toBeDefined(); + expect(construct.bdaBlueprintLambdaFunction).toBeDefined(); + }); + }); +}); diff --git a/yarn.lock b/yarn.lock index ee686f26..53bc6cda 100644 --- a/yarn.lock +++ b/yarn.lock @@ -43,6 +43,11 @@ resolved "https://registry.yarnpkg.com/@aws-cdk/asset-node-proxy-agent-v6/-/asset-node-proxy-agent-v6-2.1.0.tgz#6d3c7860354d4856a7e75375f2f0ecab313b4989" integrity sha512-7bY3J8GCVxLupn/kNmpPc5VJz8grx+4RKfnnJiO1LG+uxkZfANZG3RMHhE+qQxxwkyQ9/MfPtTpf748UhR425A== +"@aws-cdk/aws-lambda-python-alpha@2.178.0-alpha.0": + version "2.178.0-alpha.0" + resolved "https://registry.yarnpkg.com/@aws-cdk/aws-lambda-python-alpha/-/aws-lambda-python-alpha-2.178.0-alpha.0.tgz#ad65ed9b4025036b001af78ed7a34c74005b436d" + integrity sha512-sjlE3ifs/li05x8UBpTFWMyGnWcYUcvNz07iLYFu2pVXaMJGB+4EKyMIo+px18tflaw19LN+MXpgGwDvg5eFlg== + "@aws-cdk/cfnspec@2.68.0": version "2.68.0" resolved "https://registry.yarnpkg.com/@aws-cdk/cfnspec/-/cfnspec-2.68.0.tgz#e678c62d92ca76f8513a23c3c78f00ae49a7ab2e"