From 66b0ed1cbefa817c581263a39c247a1a79878df0 Mon Sep 17 00:00:00 2001 From: constanca Date: Wed, 17 Apr 2024 11:30:33 +0200 Subject: [PATCH 01/26] . --- main_aws.py | 1 + 1 file changed, 1 insertion(+) diff --git a/main_aws.py b/main_aws.py index 729e3269..afbad7b6 100644 --- a/main_aws.py +++ b/main_aws.py @@ -9,6 +9,7 @@ from handlers.aws import lambda_handler + def handler(lambda_event: dict[str, Any], lambda_context: context_.Context) -> Any: """ AWS Lambda handler as main entrypoint From c07bae01898d2fca6b8e186f05c8d1ce0b3393e9 Mon Sep 17 00:00:00 2001 From: constanca Date: Wed, 17 Apr 2024 11:40:09 +0200 Subject: [PATCH 02/26] . --- tests/handlers/aws/test_integrations.py | 4004 ----------------------- 1 file changed, 4004 deletions(-) diff --git a/tests/handlers/aws/test_integrations.py b/tests/handlers/aws/test_integrations.py index bf2a632c..e69de29b 100644 --- a/tests/handlers/aws/test_integrations.py +++ b/tests/handlers/aws/test_integrations.py @@ -1,4004 +0,0 @@ -# Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one -# or more contributor license agreements. Licensed under the Elastic License 2.0; -# you may not use this file except in compliance with the Elastic License 2.0. - -import datetime -import gzip -import os -import time -from typing import Any, Optional -from unittest import TestCase - -import boto3 -import mock -import pytest -from botocore.client import BaseClient as BotoBaseClient -from testcontainers.localstack import LocalStackContainer - -from handlers.aws.exceptions import ReplayHandlerException -from main_aws import handler -from share import get_hex_prefix, json_dumper, json_parser -from tests.testcontainers.es import ElasticsearchContainer -from tests.testcontainers.logstash import LogstashContainer - -from .utils import ( - _AWS_REGION, - _S3_NOTIFICATION_EVENT_TIME, - ContextMock, - _create_secrets, - _kinesis_create_stream, - _kinesis_put_records, - _kinesis_retrieve_event_from_kinesis_stream, - _load_file_fixture, - _logs_create_cloudwatch_logs_group, - _logs_create_cloudwatch_logs_stream, - _logs_retrieve_event_from_cloudwatch_logs, - _logs_upload_event_to_cloudwatch_logs, - _REMAINING_TIME_FORCE_CONTINUE_0ms, - _s3_upload_content_to_bucket, - _sqs_create_queue, - _sqs_get_messages, - _sqs_send_messages, - _sqs_send_s3_notifications, - _time_based_id, -) - -_OVER_COMPLETION_GRACE_PERIOD_2m = 1 + (1000 * 60 * 2) - - -@pytest.mark.integration -class TestLambdaHandlerIntegration(TestCase): - elasticsearch: Optional[ElasticsearchContainer] = None - logstash: Optional[LogstashContainer] = None - localstack: Optional[LocalStackContainer] = None - - aws_session: Optional[boto3.Session] = None - s3_client: Optional[BotoBaseClient] = None - logs_client: Optional[BotoBaseClient] = None - sqs_client: Optional[BotoBaseClient] = None - kinesis_client: Optional[BotoBaseClient] = None - sm_client: Optional[BotoBaseClient] = None - ec2_client: Optional[BotoBaseClient] = None - - secret_arn: Optional[Any] = None - - mocks: dict[str, Any] = {} - - @classmethod - def setUpClass(cls) -> None: - esc = ElasticsearchContainer() - cls.elasticsearch = esc.start() - - lgc = LogstashContainer(es_container=esc) - cls.logstash = lgc.start() - - lsc = LocalStackContainer(image="localstack/localstack:3.0.1") - lsc.with_env("EAGER_SERVICE_LOADING", "1") - lsc.with_env("SQS_DISABLE_CLOUDWATCH_METRICS", "1") - lsc.with_services("ec2", "kinesis", "logs", "s3", "sqs", "secretsmanager") - - cls.localstack = lsc.start() - - session = boto3.Session(region_name=_AWS_REGION) - cls.aws_session = session - cls.s3_client = session.client("s3", endpoint_url=cls.localstack.get_url()) - cls.logs_client = session.client("logs", endpoint_url=cls.localstack.get_url()) - cls.sqs_client = session.client("sqs", endpoint_url=cls.localstack.get_url()) - cls.kinesis_client = session.client("kinesis", endpoint_url=cls.localstack.get_url()) - cls.sm_client = session.client("secretsmanager", endpoint_url=cls.localstack.get_url()) - cls.ec2_client = session.client("ec2", endpoint_url=cls.localstack.get_url()) - - cls.secret_arn = _create_secrets( - cls.sm_client, - "es_secrets", - {"username": cls.elasticsearch.elastic_user, "password": cls.elasticsearch.elastic_password}, - ) - - cls.mocks = { - "storage.S3Storage._s3_client": mock.patch("storage.S3Storage._s3_client", new=cls.s3_client), - "share.secretsmanager._get_aws_sm_client": mock.patch( - "share.secretsmanager._get_aws_sm_client", lambda region_name: cls.sm_client - ), - "handlers.aws.utils.get_sqs_client": mock.patch( - "handlers.aws.utils.get_sqs_client", lambda: cls.sqs_client - ), - "handlers.aws.utils.get_ec2_client": mock.patch( - "handlers.aws.utils.get_ec2_client", lambda: cls.ec2_client - ), - "handlers.aws.handler.get_sqs_client": mock.patch( - "handlers.aws.handler.get_sqs_client", lambda: cls.sqs_client - ), - } - - for k, m in cls.mocks.items(): - m.start() - - @classmethod - def tearDownClass(cls) -> None: - assert cls.elasticsearch is not None - assert cls.logstash is not None - assert cls.localstack is not None - - cls.elasticsearch.stop() - cls.logstash.stop() - cls.localstack.stop() - - for k, m in cls.mocks.items(): - m.stop() - - def setUp(self) -> None: - assert isinstance(self.elasticsearch, ElasticsearchContainer) - assert isinstance(self.logstash, LogstashContainer) - assert isinstance(self.localstack, LocalStackContainer) - - os.environ["S3_CONFIG_FILE"] = "" - - sqs_continue_queue = _sqs_create_queue(self.sqs_client, _time_based_id(suffix="continuing")) - sqs_replay_queue = _sqs_create_queue(self.sqs_client, _time_based_id(suffix="replay")) - os.environ["SQS_CONTINUE_URL"] = sqs_continue_queue["QueueUrl"] - os.environ["SQS_REPLAY_URL"] = sqs_replay_queue["QueueUrl"] - - self.sqs_continue_queue_arn = sqs_continue_queue["QueueArn"] - self.sqs_replay_queue_arn = sqs_replay_queue["QueueArn"] - - self.default_tags: str = """ - - "tag1" - - "tag2" - - "tag3" - """ - - self.default_outputs: str = f""" - - type: "elasticsearch" - args: - elasticsearch_url: "{self.elasticsearch.get_url()}" - ssl_assert_fingerprint: {self.elasticsearch.ssl_assert_fingerprint} - username: "{self.secret_arn}:username" - password: "{self.secret_arn}:password" - - type: "logstash" - args: - logstash_url: "{self.logstash.get_url()}" - ssl_assert_fingerprint: {self.logstash.ssl_assert_fingerprint} - username: "{self.logstash.logstash_user}" - password: "{self.logstash.logstash_password}" - """ - - def tearDown(self) -> None: - assert isinstance(self.elasticsearch, ElasticsearchContainer) - assert isinstance(self.logstash, LogstashContainer) - - self.logstash.reset() - self.elasticsearch.reset() - - os.environ["S3_CONFIG_FILE"] = "" - os.environ["SQS_CONTINUE_URL"] = "" - os.environ["SQS_REPLAY_URL"] = "" - - def test_ls_es_output(self) -> None: - assert isinstance(self.elasticsearch, ElasticsearchContainer) - assert isinstance(self.logstash, LogstashContainer) - assert isinstance(self.localstack, LocalStackContainer) - - s3_sqs_queue_name = _time_based_id(suffix="source-s3-sqs") - - s3_sqs_queue = _sqs_create_queue(self.sqs_client, s3_sqs_queue_name, self.localstack.get_url()) - - s3_sqs_queue_arn = s3_sqs_queue["QueueArn"] - s3_sqs_queue_url = s3_sqs_queue["QueueUrl"] - - config_yaml: str = f""" - inputs: - - type: s3-sqs - id: "{s3_sqs_queue_arn}" - tags: {self.default_tags} - outputs: {self.default_outputs} - """ - - config_file_path = "config.yaml" - config_bucket_name = _time_based_id(suffix="config-bucket") - _s3_upload_content_to_bucket( - client=self.s3_client, - content=config_yaml.encode("utf-8"), - content_type="text/plain", - bucket_name=config_bucket_name, - key=config_file_path, - ) - - os.environ["S3_CONFIG_FILE"] = f"s3://{config_bucket_name}/{config_file_path}" - fixtures = [ - _load_file_fixture("cloudwatch-log-1.json"), - _load_file_fixture("cloudwatch-log-2.json"), - ] - - cloudtrail_filename_digest = ( - "AWSLogs/aws-account-id/CloudTrail-Digest/region/yyyy/mm/dd/" - "aws-account-id_CloudTrail-Digest_region_end-time_random-string.log.gz" - ) - cloudtrail_filename_non_digest = ( - "AWSLogs/aws-account-id/CloudTrail/region/yyyy/mm/dd/" - "aws-account-id_CloudTrail_region_end-time_random-string.log.gz" - ) - - s3_bucket_name = _time_based_id(suffix="test-bucket") - - _s3_upload_content_to_bucket( - client=self.s3_client, - content=gzip.compress(fixtures[0].encode("utf-8")), - content_type="application/x-gzip", - bucket_name=s3_bucket_name, - key=cloudtrail_filename_digest, - ) - - _s3_upload_content_to_bucket( - client=self.s3_client, - content=gzip.compress(fixtures[1].encode("utf-8")), - content_type="application/x-gzip", - bucket_name=s3_bucket_name, - key=cloudtrail_filename_non_digest, - ) - - _sqs_send_s3_notifications( - self.sqs_client, - s3_sqs_queue_url, - s3_bucket_name, - [cloudtrail_filename_digest, cloudtrail_filename_non_digest], - ) - - event, _ = _sqs_get_messages(self.sqs_client, s3_sqs_queue_url, s3_sqs_queue_arn) - - ctx = ContextMock(remaining_time_in_millis=_OVER_COMPLETION_GRACE_PERIOD_2m) - first_call = handler(event, ctx) # type:ignore - - assert first_call == "completed" - - self.elasticsearch.refresh(index="logs-aws.cloudtrail-default") - assert self.elasticsearch.count(index="logs-aws.cloudtrail-default")["count"] == 2 - - res = self.elasticsearch.search(index="logs-aws.cloudtrail-default", sort="_seq_no") - assert res["hits"]["total"] == {"value": 2, "relation": "eq"} - - assert res["hits"]["hits"][0]["_source"]["message"] == fixtures[0].rstrip("\n") - assert res["hits"]["hits"][0]["_source"]["log"]["offset"] == 0 - assert ( - res["hits"]["hits"][0]["_source"]["log"]["file"]["path"] - == f"https://{s3_bucket_name}.s3.eu-central-1.amazonaws.com/{cloudtrail_filename_digest}" - ) - assert res["hits"]["hits"][0]["_source"]["aws"]["s3"]["bucket"]["name"] == s3_bucket_name - assert res["hits"]["hits"][0]["_source"]["aws"]["s3"]["bucket"]["arn"] == f"arn:aws:s3:::{s3_bucket_name}" - assert res["hits"]["hits"][0]["_source"]["aws"]["s3"]["object"]["key"] == cloudtrail_filename_digest - assert res["hits"]["hits"][0]["_source"]["cloud"]["provider"] == "aws" - assert res["hits"]["hits"][0]["_source"]["cloud"]["region"] == "eu-central-1" - assert res["hits"]["hits"][0]["_source"]["cloud"]["account"]["id"] == "000000000000" - assert res["hits"]["hits"][0]["_source"]["tags"] == ["forwarded", "aws-cloudtrail", "tag1", "tag2", "tag3"] - - assert res["hits"]["hits"][1]["_source"]["message"] == fixtures[1].rstrip("\n") - assert res["hits"]["hits"][1]["_source"]["log"]["offset"] == 0 - assert ( - res["hits"]["hits"][1]["_source"]["log"]["file"]["path"] - == f"https://{s3_bucket_name}.s3.eu-central-1.amazonaws.com/{cloudtrail_filename_non_digest}" - ) - assert res["hits"]["hits"][1]["_source"]["aws"]["s3"]["bucket"]["name"] == s3_bucket_name - assert res["hits"]["hits"][1]["_source"]["aws"]["s3"]["bucket"]["arn"] == f"arn:aws:s3:::{s3_bucket_name}" - assert res["hits"]["hits"][1]["_source"]["aws"]["s3"]["object"]["key"] == cloudtrail_filename_non_digest - assert res["hits"]["hits"][1]["_source"]["cloud"]["provider"] == "aws" - assert res["hits"]["hits"][1]["_source"]["cloud"]["region"] == "eu-central-1" - assert res["hits"]["hits"][1]["_source"]["cloud"]["account"]["id"] == "000000000000" - assert res["hits"]["hits"][1]["_source"]["tags"] == ["forwarded", "aws-cloudtrail", "tag1", "tag2", "tag3"] - - logstash_message = self.logstash.get_messages(expected=2) - assert len(logstash_message) == 2 - res["hits"]["hits"][0]["_source"]["tags"].remove("aws-cloudtrail") - res["hits"]["hits"][1]["_source"]["tags"].remove("aws-cloudtrail") - - assert res["hits"]["hits"][0]["_source"]["aws"] == logstash_message[0]["aws"] - assert res["hits"]["hits"][0]["_source"]["cloud"] == logstash_message[0]["cloud"] - assert res["hits"]["hits"][0]["_source"]["log"] == logstash_message[0]["log"] - assert res["hits"]["hits"][0]["_source"]["message"] == logstash_message[0]["message"] - assert res["hits"]["hits"][0]["_source"]["tags"] == logstash_message[0]["tags"] - - assert res["hits"]["hits"][1]["_source"]["aws"] == logstash_message[1]["aws"] - assert res["hits"]["hits"][1]["_source"]["cloud"] == logstash_message[1]["cloud"] - assert res["hits"]["hits"][1]["_source"]["log"] == logstash_message[1]["log"] - assert res["hits"]["hits"][1]["_source"]["message"] == logstash_message[1]["message"] - assert res["hits"]["hits"][1]["_source"]["tags"] == logstash_message[1]["tags"] - - self.elasticsearch.refresh(index="logs-stash.elasticsearch-output") - assert self.elasticsearch.count(index="logs-stash.elasticsearch-output")["count"] == 2 - - res = self.elasticsearch.search(index="logs-stash.elasticsearch-output", sort="_seq_no") - assert res["hits"]["total"] == {"value": 2, "relation": "eq"} - - assert res["hits"]["hits"][0]["_source"]["aws"] == logstash_message[0]["aws"] - assert res["hits"]["hits"][0]["_source"]["cloud"] == logstash_message[0]["cloud"] - assert res["hits"]["hits"][0]["_source"]["log"] == logstash_message[0]["log"] - assert res["hits"]["hits"][0]["_source"]["message"] == logstash_message[0]["message"] - assert res["hits"]["hits"][0]["_source"]["tags"] == logstash_message[0]["tags"] - - assert res["hits"]["hits"][1]["_source"]["aws"] == logstash_message[1]["aws"] - assert res["hits"]["hits"][1]["_source"]["cloud"] == logstash_message[1]["cloud"] - assert res["hits"]["hits"][1]["_source"]["log"] == logstash_message[1]["log"] - assert res["hits"]["hits"][1]["_source"]["message"] == logstash_message[1]["message"] - assert res["hits"]["hits"][1]["_source"]["tags"] == logstash_message[1]["tags"] - - def test_continuing(self) -> None: - assert isinstance(self.elasticsearch, ElasticsearchContainer) - assert isinstance(self.logstash, LogstashContainer) - assert isinstance(self.localstack, LocalStackContainer) - - fixtures = [ - _load_file_fixture("cloudwatch-log-1.json"), - _load_file_fixture("cloudwatch-log-2.json"), - ] - - s3_bucket_name = _time_based_id(suffix="test-bucket") - first_filename = "exportedlog/uuid/yyyy-mm-dd-[$LATEST]hash/000000.gz" - _s3_upload_content_to_bucket( - client=self.s3_client, - content=gzip.compress("".join(fixtures).encode("utf-8")), - content_type="application/x-gzip", - bucket_name=s3_bucket_name, - key=first_filename, - ) - - cloudwatch_group_name = _time_based_id(suffix="source-group") - cloudwatch_group = _logs_create_cloudwatch_logs_group(self.logs_client, group_name=cloudwatch_group_name) - - cloudwatch_stream_name = _time_based_id(suffix="source-stream") - _logs_create_cloudwatch_logs_stream( - self.logs_client, group_name=cloudwatch_group_name, stream_name=cloudwatch_stream_name - ) - - _logs_upload_event_to_cloudwatch_logs( - self.logs_client, - group_name=cloudwatch_group_name, - stream_name=cloudwatch_stream_name, - messages_body=["".join(fixtures)], - ) - - cloudwatch_group_arn = cloudwatch_group["arn"] - - cloudwatch_group_name = cloudwatch_group_name - cloudwatch_stream_name = cloudwatch_stream_name - - sqs_queue_name = _time_based_id(suffix="source-sqs") - s3_sqs_queue_name = _time_based_id(suffix="source-s3-sqs") - - sqs_queue = _sqs_create_queue(self.sqs_client, sqs_queue_name, self.localstack.get_url()) - s3_sqs_queue = _sqs_create_queue(self.sqs_client, s3_sqs_queue_name, self.localstack.get_url()) - - sqs_queue_arn = sqs_queue["QueueArn"] - sqs_queue_url = sqs_queue["QueueUrl"] - sqs_queue_url_path = sqs_queue["QueueUrlPath"] - - s3_sqs_queue_arn = s3_sqs_queue["QueueArn"] - s3_sqs_queue_url = s3_sqs_queue["QueueUrl"] - - _sqs_send_messages(self.sqs_client, sqs_queue_url, "".join(fixtures)) - _sqs_send_s3_notifications(self.sqs_client, s3_sqs_queue_url, s3_bucket_name, [first_filename]) - - kinesis_stream_name = _time_based_id(suffix="source-kinesis") - kinesis_stream = _kinesis_create_stream(self.kinesis_client, kinesis_stream_name) - kinesis_stream_arn = kinesis_stream["StreamDescription"]["StreamARN"] - - _kinesis_put_records(self.kinesis_client, kinesis_stream_name, ["".join(fixtures)]) - - config_yaml: str = f""" - inputs: - - type: "kinesis-data-stream" - id: "{kinesis_stream_arn}" - tags: {self.default_tags} - outputs: {self.default_outputs} - - type: "cloudwatch-logs" - id: "{cloudwatch_group_arn}" - tags: {self.default_tags} - outputs: {self.default_outputs} - - type: sqs - id: "{sqs_queue_arn}" - tags: {self.default_tags} - outputs: {self.default_outputs} - - type: s3-sqs - id: "{s3_sqs_queue_arn}" - tags: {self.default_tags} - outputs: {self.default_outputs} - """ - - config_file_path = "config.yaml" - config_bucket_name = _time_based_id(suffix="config-bucket") - _s3_upload_content_to_bucket( - client=self.s3_client, - content=config_yaml.encode("utf-8"), - content_type="text/plain", - bucket_name=config_bucket_name, - key=config_file_path, - ) - - os.environ["S3_CONFIG_FILE"] = f"s3://{config_bucket_name}/{config_file_path}" - - events_s3, _ = _sqs_get_messages(self.sqs_client, s3_sqs_queue_url, s3_sqs_queue_arn) - - events_sqs, _ = _sqs_get_messages(self.sqs_client, sqs_queue_url, sqs_queue_arn) - - message_id = events_sqs["Records"][0]["messageId"] - - events_cloudwatch_logs, event_ids_cloudwatch_logs, _ = _logs_retrieve_event_from_cloudwatch_logs( - self.logs_client, cloudwatch_group_name, cloudwatch_stream_name - ) - - events_kinesis, _ = _kinesis_retrieve_event_from_kinesis_stream( - self.kinesis_client, kinesis_stream_name, kinesis_stream_arn - ) - - ctx = ContextMock() - first_call = handler(events_s3, ctx) # type:ignore - - assert first_call == "continuing" - - self.elasticsearch.refresh(index="logs-generic-default") - assert self.elasticsearch.count(index="logs-generic-default")["count"] == 1 - - res = self.elasticsearch.search(index="logs-generic-default", sort="_seq_no") - assert res["hits"]["total"] == {"value": 1, "relation": "eq"} - - assert res["hits"]["hits"][0]["_source"]["message"] == fixtures[0].rstrip("\n") - assert res["hits"]["hits"][0]["_source"]["log"]["offset"] == 0 - assert ( - res["hits"]["hits"][0]["_source"]["log"]["file"]["path"] - == f"https://{s3_bucket_name}.s3.eu-central-1.amazonaws.com/{first_filename}" - ) - assert res["hits"]["hits"][0]["_source"]["aws"]["s3"]["bucket"]["name"] == s3_bucket_name - assert res["hits"]["hits"][0]["_source"]["aws"]["s3"]["bucket"]["arn"] == f"arn:aws:s3:::{s3_bucket_name}" - assert res["hits"]["hits"][0]["_source"]["aws"]["s3"]["object"]["key"] == first_filename - assert res["hits"]["hits"][0]["_source"]["cloud"]["provider"] == "aws" - assert res["hits"]["hits"][0]["_source"]["cloud"]["region"] == "eu-central-1" - assert res["hits"]["hits"][0]["_source"]["cloud"]["account"]["id"] == "000000000000" - assert res["hits"]["hits"][0]["_source"]["tags"] == ["forwarded", "generic", "tag1", "tag2", "tag3"] - - logstash_message = self.logstash.get_messages(expected=1) - assert len(logstash_message) == 1 - res["hits"]["hits"][0]["_source"]["tags"].remove("generic") - assert res["hits"]["hits"][0]["_source"]["aws"] == logstash_message[0]["aws"] - assert res["hits"]["hits"][0]["_source"]["cloud"] == logstash_message[0]["cloud"] - assert res["hits"]["hits"][0]["_source"]["log"] == logstash_message[0]["log"] - assert res["hits"]["hits"][0]["_source"]["message"] == logstash_message[0]["message"] - assert res["hits"]["hits"][0]["_source"]["tags"] == logstash_message[0]["tags"] - - second_call = handler(events_sqs, ctx) # type:ignore - - assert second_call == "continuing" - - self.elasticsearch.refresh(index="logs-generic-default") - assert self.elasticsearch.count(index="logs-generic-default")["count"] == 2 - - res = self.elasticsearch.search(index="logs-generic-default", sort="_seq_no") - assert res["hits"]["total"] == {"value": 2, "relation": "eq"} - - assert res["hits"]["hits"][1]["_source"]["message"] == fixtures[0].rstrip("\n") - assert res["hits"]["hits"][1]["_source"]["log"]["offset"] == 0 - assert res["hits"]["hits"][1]["_source"]["log"]["file"]["path"] == sqs_queue_url_path - assert res["hits"]["hits"][1]["_source"]["aws"]["sqs"]["name"] == sqs_queue_name - assert res["hits"]["hits"][1]["_source"]["aws"]["sqs"]["message_id"] == message_id - assert res["hits"]["hits"][1]["_source"]["cloud"]["provider"] == "aws" - assert res["hits"]["hits"][1]["_source"]["cloud"]["region"] == "us-east-1" - assert res["hits"]["hits"][1]["_source"]["cloud"]["account"]["id"] == "000000000000" - assert res["hits"]["hits"][1]["_source"]["tags"] == ["forwarded", "generic", "tag1", "tag2", "tag3"] - - logstash_message = self.logstash.get_messages(expected=2) - assert len(logstash_message) == 2 - res["hits"]["hits"][1]["_source"]["tags"].remove("generic") - assert res["hits"]["hits"][1]["_source"]["aws"] == logstash_message[1]["aws"] - assert res["hits"]["hits"][1]["_source"]["cloud"] == logstash_message[1]["cloud"] - assert res["hits"]["hits"][1]["_source"]["log"] == logstash_message[1]["log"] - assert res["hits"]["hits"][1]["_source"]["message"] == logstash_message[1]["message"] - assert res["hits"]["hits"][1]["_source"]["tags"] == logstash_message[1]["tags"] - - third_call = handler(events_cloudwatch_logs, ctx) # type:ignore - - assert third_call == "continuing" - - self.elasticsearch.refresh(index="logs-generic-default") - assert self.elasticsearch.count(index="logs-generic-default")["count"] == 3 - - res = self.elasticsearch.search(index="logs-generic-default", sort="_seq_no") - assert res["hits"]["total"] == {"value": 3, "relation": "eq"} - - assert res["hits"]["hits"][2]["_source"]["message"] == fixtures[0].rstrip("\n") - assert res["hits"]["hits"][2]["_source"]["log"]["offset"] == 0 - assert ( - res["hits"]["hits"][2]["_source"]["log"]["file"]["path"] - == f"{cloudwatch_group_name}/{cloudwatch_stream_name}" - ) - assert res["hits"]["hits"][2]["_source"]["aws"]["cloudwatch"]["log_group"] == cloudwatch_group_name - assert res["hits"]["hits"][2]["_source"]["aws"]["cloudwatch"]["log_stream"] == cloudwatch_stream_name - assert res["hits"]["hits"][2]["_source"]["aws"]["cloudwatch"]["event_id"] == event_ids_cloudwatch_logs[0] - assert res["hits"]["hits"][2]["_source"]["cloud"]["provider"] == "aws" - assert res["hits"]["hits"][2]["_source"]["cloud"]["region"] == "us-east-1" - assert res["hits"]["hits"][2]["_source"]["cloud"]["account"]["id"] == "000000000000" - assert res["hits"]["hits"][2]["_source"]["tags"] == ["forwarded", "generic", "tag1", "tag2", "tag3"] - - logstash_message = self.logstash.get_messages(expected=3) - assert len(logstash_message) == 3 - res["hits"]["hits"][2]["_source"]["tags"].remove("generic") - assert res["hits"]["hits"][2]["_source"]["aws"] == logstash_message[2]["aws"] - assert res["hits"]["hits"][2]["_source"]["cloud"] == logstash_message[2]["cloud"] - assert res["hits"]["hits"][2]["_source"]["log"] == logstash_message[2]["log"] - assert res["hits"]["hits"][2]["_source"]["message"] == logstash_message[2]["message"] - assert res["hits"]["hits"][2]["_source"]["tags"] == logstash_message[2]["tags"] - - fourth_call = handler(events_kinesis, ctx) # type:ignore - - assert fourth_call == "continuing" - - self.elasticsearch.refresh(index="logs-generic-default") - assert self.elasticsearch.count(index="logs-generic-default")["count"] == 4 - - res = self.elasticsearch.search(index="logs-generic-default", sort="_seq_no") - assert res["hits"]["total"] == {"value": 4, "relation": "eq"} - - assert res["hits"]["hits"][3]["_source"]["message"] == fixtures[0].rstrip("\n") - assert res["hits"]["hits"][3]["_source"]["log"]["offset"] == 0 - assert res["hits"]["hits"][3]["_source"]["log"]["file"]["path"] == kinesis_stream_arn - assert res["hits"]["hits"][3]["_source"]["aws"]["kinesis"]["type"] == "stream" - assert res["hits"]["hits"][3]["_source"]["aws"]["kinesis"]["partition_key"] == "PartitionKey" - assert res["hits"]["hits"][3]["_source"]["aws"]["kinesis"]["name"] == kinesis_stream_name - assert ( - res["hits"]["hits"][3]["_source"]["aws"]["kinesis"]["sequence_number"] - == events_kinesis["Records"][0]["kinesis"]["sequenceNumber"] - ) - assert res["hits"]["hits"][3]["_source"]["cloud"]["provider"] == "aws" - assert res["hits"]["hits"][3]["_source"]["cloud"]["region"] == "us-east-1" - assert res["hits"]["hits"][3]["_source"]["cloud"]["account"]["id"] == "000000000000" - assert res["hits"]["hits"][3]["_source"]["tags"] == ["forwarded", "generic", "tag1", "tag2", "tag3"] - - logstash_message = self.logstash.get_messages(expected=4) - assert len(logstash_message) == 4 - res["hits"]["hits"][3]["_source"]["tags"].remove("generic") - assert res["hits"]["hits"][3]["_source"]["aws"] == logstash_message[3]["aws"] - assert res["hits"]["hits"][3]["_source"]["cloud"] == logstash_message[3]["cloud"] - assert res["hits"]["hits"][3]["_source"]["log"] == logstash_message[3]["log"] - assert res["hits"]["hits"][3]["_source"]["message"] == logstash_message[3]["message"] - assert res["hits"]["hits"][3]["_source"]["tags"] == logstash_message[3]["tags"] - - continued_events, _ = _sqs_get_messages( - self.sqs_client, os.environ["SQS_CONTINUE_URL"], self.sqs_continue_queue_arn - ) - - fifth_call = handler(continued_events, ctx) # type:ignore - - assert fifth_call == "continuing" - - self.elasticsearch.refresh(index="logs-generic-default") - assert self.elasticsearch.count(index="logs-generic-default")["count"] == 5 - - res = self.elasticsearch.search(index="logs-generic-default", sort="_seq_no") - assert res["hits"]["total"] == {"value": 5, "relation": "eq"} - - assert res["hits"]["hits"][4]["_source"]["message"] == fixtures[1].rstrip("\n") - assert res["hits"]["hits"][4]["_source"]["log"]["offset"] == 94 - assert ( - res["hits"]["hits"][4]["_source"]["log"]["file"]["path"] - == f"https://{s3_bucket_name}.s3.eu-central-1.amazonaws.com/{first_filename}" - ) - assert res["hits"]["hits"][4]["_source"]["aws"]["s3"]["bucket"]["name"] == s3_bucket_name - assert res["hits"]["hits"][4]["_source"]["aws"]["s3"]["bucket"]["arn"] == f"arn:aws:s3:::{s3_bucket_name}" - assert res["hits"]["hits"][4]["_source"]["aws"]["s3"]["object"]["key"] == first_filename - assert res["hits"]["hits"][4]["_source"]["cloud"]["provider"] == "aws" - assert res["hits"]["hits"][4]["_source"]["cloud"]["region"] == "eu-central-1" - assert res["hits"]["hits"][4]["_source"]["cloud"]["account"]["id"] == "000000000000" - assert res["hits"]["hits"][4]["_source"]["tags"] == ["forwarded", "generic", "tag1", "tag2", "tag3"] - - logstash_message = self.logstash.get_messages(expected=5) - assert len(logstash_message) == 5 - res["hits"]["hits"][4]["_source"]["tags"].remove("generic") - assert res["hits"]["hits"][4]["_source"]["aws"] == logstash_message[4]["aws"] - assert res["hits"]["hits"][4]["_source"]["cloud"] == logstash_message[4]["cloud"] - assert res["hits"]["hits"][4]["_source"]["log"] == logstash_message[4]["log"] - assert res["hits"]["hits"][4]["_source"]["message"] == logstash_message[4]["message"] - assert res["hits"]["hits"][4]["_source"]["tags"] == logstash_message[4]["tags"] - - ctx = ContextMock(remaining_time_in_millis=_OVER_COMPLETION_GRACE_PERIOD_2m) - - continued_events, _ = _sqs_get_messages( - self.sqs_client, os.environ["SQS_CONTINUE_URL"], self.sqs_continue_queue_arn - ) - sixth_call = handler(continued_events, ctx) # type:ignore - - assert sixth_call == "completed" - - self.elasticsearch.refresh(index="logs-generic-default") - assert self.elasticsearch.count(index="logs-generic-default")["count"] == 8 - - res = self.elasticsearch.search(index="logs-generic-default", sort="_seq_no") - assert res["hits"]["total"] == {"value": 8, "relation": "eq"} - - assert res["hits"]["hits"][5]["_source"]["message"] == fixtures[1].rstrip("\n") - assert res["hits"]["hits"][5]["_source"]["log"]["offset"] == 94 - assert res["hits"]["hits"][5]["_source"]["log"]["file"]["path"] == sqs_queue_url_path - assert res["hits"]["hits"][5]["_source"]["aws"]["sqs"]["name"] == sqs_queue_name - assert res["hits"]["hits"][5]["_source"]["aws"]["sqs"]["message_id"] == message_id - assert res["hits"]["hits"][5]["_source"]["cloud"]["provider"] == "aws" - assert res["hits"]["hits"][5]["_source"]["cloud"]["region"] == "us-east-1" - assert res["hits"]["hits"][5]["_source"]["cloud"]["account"]["id"] == "000000000000" - assert res["hits"]["hits"][5]["_source"]["tags"] == ["forwarded", "generic", "tag1", "tag2", "tag3"] - - assert res["hits"]["hits"][6]["_source"]["message"] == fixtures[1].rstrip("\n") - assert res["hits"]["hits"][6]["_source"]["log"]["offset"] == 94 - assert ( - res["hits"]["hits"][6]["_source"]["log"]["file"]["path"] - == f"{cloudwatch_group_name}/{cloudwatch_stream_name}" - ) - assert res["hits"]["hits"][6]["_source"]["aws"]["cloudwatch"]["log_group"] == cloudwatch_group_name - assert res["hits"]["hits"][6]["_source"]["aws"]["cloudwatch"]["log_stream"] == cloudwatch_stream_name - assert res["hits"]["hits"][6]["_source"]["aws"]["cloudwatch"]["event_id"] == event_ids_cloudwatch_logs[0] - assert res["hits"]["hits"][6]["_source"]["cloud"]["provider"] == "aws" - assert res["hits"]["hits"][6]["_source"]["cloud"]["region"] == "us-east-1" - assert res["hits"]["hits"][6]["_source"]["cloud"]["account"]["id"] == "000000000000" - assert res["hits"]["hits"][6]["_source"]["tags"] == ["forwarded", "generic", "tag1", "tag2", "tag3"] - - assert res["hits"]["hits"][7]["_source"]["message"] == fixtures[1].rstrip("\n") - assert res["hits"]["hits"][7]["_source"]["log"]["offset"] == 94 - assert res["hits"]["hits"][7]["_source"]["log"]["file"]["path"] == kinesis_stream_arn - assert res["hits"]["hits"][7]["_source"]["aws"]["kinesis"]["type"] == "stream" - assert res["hits"]["hits"][7]["_source"]["aws"]["kinesis"]["partition_key"] == "PartitionKey" - assert res["hits"]["hits"][7]["_source"]["aws"]["kinesis"]["name"] == kinesis_stream_name - assert ( - res["hits"]["hits"][7]["_source"]["aws"]["kinesis"]["sequence_number"] - == events_kinesis["Records"][0]["kinesis"]["sequenceNumber"] - ) - assert res["hits"]["hits"][7]["_source"]["cloud"]["provider"] == "aws" - assert res["hits"]["hits"][7]["_source"]["cloud"]["region"] == "us-east-1" - assert res["hits"]["hits"][7]["_source"]["cloud"]["account"]["id"] == "000000000000" - assert res["hits"]["hits"][7]["_source"]["tags"] == ["forwarded", "generic", "tag1", "tag2", "tag3"] - - logstash_message = self.logstash.get_messages(expected=8) - assert len(logstash_message) == 8 - res["hits"]["hits"][5]["_source"]["tags"].remove("generic") - res["hits"]["hits"][6]["_source"]["tags"].remove("generic") - res["hits"]["hits"][7]["_source"]["tags"].remove("generic") - - assert res["hits"]["hits"][5]["_source"]["aws"] == logstash_message[5]["aws"] - assert res["hits"]["hits"][5]["_source"]["cloud"] == logstash_message[5]["cloud"] - assert res["hits"]["hits"][5]["_source"]["log"] == logstash_message[5]["log"] - assert res["hits"]["hits"][5]["_source"]["message"] == logstash_message[5]["message"] - assert res["hits"]["hits"][5]["_source"]["tags"] == logstash_message[5]["tags"] - - assert res["hits"]["hits"][6]["_source"]["aws"] == logstash_message[6]["aws"] - assert res["hits"]["hits"][6]["_source"]["cloud"] == logstash_message[6]["cloud"] - assert res["hits"]["hits"][6]["_source"]["log"] == logstash_message[6]["log"] - assert res["hits"]["hits"][6]["_source"]["message"] == logstash_message[6]["message"] - assert res["hits"]["hits"][6]["_source"]["tags"] == logstash_message[6]["tags"] - - assert res["hits"]["hits"][7]["_source"]["aws"] == logstash_message[7]["aws"] - assert res["hits"]["hits"][7]["_source"]["cloud"] == logstash_message[7]["cloud"] - assert res["hits"]["hits"][7]["_source"]["log"] == logstash_message[7]["log"] - assert res["hits"]["hits"][7]["_source"]["message"] == logstash_message[7]["message"] - assert res["hits"]["hits"][7]["_source"]["tags"] == logstash_message[7]["tags"] - - def test_continuing_no_timeout_input_from_originalEventSourceARN_message_attribute(self) -> None: - assert isinstance(self.logstash, LogstashContainer) - assert isinstance(self.localstack, LocalStackContainer) - - fixtures = [ - _load_file_fixture("cloudwatch-log-1.json"), - _load_file_fixture("cloudwatch-log-2.json"), - _load_file_fixture("cloudwatch-log-3.json"), - ] - - sqs_queue_name = _time_based_id(suffix="source-sqs") - - sqs_queue = _sqs_create_queue(self.sqs_client, sqs_queue_name, self.localstack.get_url()) - - sqs_queue_arn = sqs_queue["QueueArn"] - sqs_queue_url = sqs_queue["QueueUrl"] - sqs_queue_url_path = sqs_queue["QueueUrlPath"] - - _sqs_send_messages(self.sqs_client, sqs_queue_url, fixtures[0]) - _sqs_send_messages(self.sqs_client, sqs_queue_url, fixtures[1]) - _sqs_send_messages(self.sqs_client, sqs_queue_url, fixtures[2]) - - config_yaml: str = f""" - inputs: - - type: sqs - id: "{sqs_queue_arn}" - tags: {self.default_tags} - outputs: - - type: "logstash" - args: - logstash_url: "{self.logstash.get_url()}" - ssl_assert_fingerprint: {self.logstash.ssl_assert_fingerprint} - username: "{self.logstash.logstash_user}" - password: "{self.logstash.logstash_password}" - """ - - config_file_path = "config.yaml" - config_bucket_name = _time_based_id(suffix="config-bucket") - _s3_upload_content_to_bucket( - client=self.s3_client, - content=config_yaml.encode("utf-8"), - content_type="text/plain", - bucket_name=config_bucket_name, - key=config_file_path, - ) - - os.environ["S3_CONFIG_FILE"] = f"s3://{config_bucket_name}/{config_file_path}" - - events_sqs, _ = _sqs_get_messages(self.sqs_client, sqs_queue_url, sqs_queue_arn) - - first_message_id = events_sqs["Records"][0]["messageId"] - second_message_id = events_sqs["Records"][1]["messageId"] - - ctx = ContextMock() - first_call = handler(events_sqs, ctx) # type:ignore - - assert first_call == "continuing" - - logstash_message = self.logstash.get_messages(expected=1) - assert len(logstash_message) == 1 - - assert logstash_message[0]["message"] == fixtures[0].rstrip("\n") - assert logstash_message[0]["log"]["offset"] == 0 - assert logstash_message[0]["log"]["file"]["path"] == sqs_queue_url_path - assert logstash_message[0]["aws"]["sqs"]["name"] == sqs_queue_name - assert logstash_message[0]["aws"]["sqs"]["message_id"] == first_message_id - assert logstash_message[0]["cloud"]["provider"] == "aws" - assert logstash_message[0]["cloud"]["region"] == "us-east-1" - assert logstash_message[0]["cloud"]["account"]["id"] == "000000000000" - assert logstash_message[0]["tags"] == ["forwarded", "tag1", "tag2", "tag3"] - - continued_events, _ = _sqs_get_messages( - self.sqs_client, os.environ["SQS_CONTINUE_URL"], self.sqs_continue_queue_arn - ) - - continued_events["Records"][2]["messageAttributes"]["originalEventSourceARN"][ - "stringValue" - ] += "-not-configured-arn" - second_call = handler(continued_events, ctx) # type:ignore - - assert second_call == "continuing" - - logstash_message = self.logstash.get_messages(expected=2) - assert len(logstash_message) == 2 - - assert logstash_message[1]["message"] == fixtures[1].rstrip("\n") - assert logstash_message[1]["log"]["offset"] == 0 - assert logstash_message[1]["log"]["file"]["path"] == sqs_queue_url_path - assert logstash_message[1]["aws"]["sqs"]["name"] == sqs_queue_name - assert logstash_message[1]["aws"]["sqs"]["message_id"] == second_message_id - assert logstash_message[1]["cloud"]["provider"] == "aws" - assert logstash_message[1]["cloud"]["region"] == "us-east-1" - assert logstash_message[1]["cloud"]["account"]["id"] == "000000000000" - assert logstash_message[1]["tags"] == ["forwarded", "tag1", "tag2", "tag3"] - - ctx = ContextMock(remaining_time_in_millis=_OVER_COMPLETION_GRACE_PERIOD_2m) - continued_events, _ = _sqs_get_messages( - self.sqs_client, os.environ["SQS_CONTINUE_URL"], self.sqs_continue_queue_arn - ) - - third_call = handler(continued_events, ctx) # type:ignore - - assert third_call == "completed" - - logstash_message = self.logstash.get_messages(expected=2) - assert len(logstash_message) == 2 - - def test_replay(self) -> None: - assert isinstance(self.elasticsearch, ElasticsearchContainer) - assert isinstance(self.logstash, LogstashContainer) - assert isinstance(self.localstack, LocalStackContainer) - - fixtures = [ - _load_file_fixture("cloudwatch-log-1.json"), - _load_file_fixture("cloudwatch-log-2.json"), - ] - - s3_bucket_name = _time_based_id(suffix="test-bucket") - first_filename = "exportedlog/uuid/yyyy-mm-dd-[$LATEST]hash/000000.gz" - _s3_upload_content_to_bucket( - client=self.s3_client, - content=gzip.compress("".join(fixtures).encode("utf-8")), - content_type="application/x-gzip", - bucket_name=s3_bucket_name, - key=first_filename, - ) - - cloudwatch_group_name = _time_based_id(suffix="source-group") - cloudwatch_group = _logs_create_cloudwatch_logs_group(self.logs_client, group_name=cloudwatch_group_name) - - cloudwatch_stream_name = _time_based_id(suffix="source-stream") - _logs_create_cloudwatch_logs_stream( - self.logs_client, group_name=cloudwatch_group_name, stream_name=cloudwatch_stream_name - ) - - _logs_upload_event_to_cloudwatch_logs( - self.logs_client, - group_name=cloudwatch_group_name, - stream_name=cloudwatch_stream_name, - messages_body=["".join(fixtures)], - ) - - cloudwatch_group_arn = cloudwatch_group["arn"] - - cloudwatch_group_name = cloudwatch_group_name - cloudwatch_stream_name = cloudwatch_stream_name - - sqs_queue_name = _time_based_id(suffix="source-sqs") - s3_sqs_queue_name = _time_based_id(suffix="source-s3-sqs") - - sqs_queue = _sqs_create_queue(self.sqs_client, sqs_queue_name, self.localstack.get_url()) - s3_sqs_queue = _sqs_create_queue(self.sqs_client, s3_sqs_queue_name, self.localstack.get_url()) - - sqs_queue_arn = sqs_queue["QueueArn"] - sqs_queue_url = sqs_queue["QueueUrl"] - sqs_queue_url_path = sqs_queue["QueueUrlPath"] - - s3_sqs_queue_arn = s3_sqs_queue["QueueArn"] - s3_sqs_queue_url = s3_sqs_queue["QueueUrl"] - - _sqs_send_messages(self.sqs_client, sqs_queue_url, "".join(fixtures)) - _sqs_send_s3_notifications(self.sqs_client, s3_sqs_queue_url, s3_bucket_name, [first_filename]) - - kinesis_stream_name = _time_based_id(suffix="source-kinesis") - kinesis_stream = _kinesis_create_stream(self.kinesis_client, kinesis_stream_name) - kinesis_stream_arn = kinesis_stream["StreamDescription"]["StreamARN"] - - _kinesis_put_records(self.kinesis_client, kinesis_stream_name, ["".join(fixtures)]) - - # the way to let logstash fail is to give wrong credentials - config_yaml: str = f""" - inputs: - - type: "kinesis-data-stream" - id: "{kinesis_stream_arn}" - tags: {self.default_tags} - outputs: - - type: "elasticsearch" - args: - elasticsearch_url: "{self.elasticsearch.get_url()}" - ssl_assert_fingerprint: {self.elasticsearch.ssl_assert_fingerprint} - username: "{self.secret_arn}:username" - password: "{self.secret_arn}:password" - - type: "logstash" - args: - logstash_url: "{self.logstash.get_url()}" - ssl_assert_fingerprint: {self.logstash.ssl_assert_fingerprint} - username: "wrong_username" - password: "wrong_username" - - type: "cloudwatch-logs" - id: "{cloudwatch_group_arn}" - tags: {self.default_tags} - outputs: - - type: "elasticsearch" - args: - elasticsearch_url: "{self.elasticsearch.get_url()}" - ssl_assert_fingerprint: {self.elasticsearch.ssl_assert_fingerprint} - username: "{self.secret_arn}:username" - password: "{self.secret_arn}:password" - - type: "logstash" - args: - logstash_url: "{self.logstash.get_url()}" - ssl_assert_fingerprint: {self.logstash.ssl_assert_fingerprint} - username: "wrong_username" - password: "wrong_username" - - type: sqs - id: "{sqs_queue_arn}" - tags: {self.default_tags} - outputs: - - type: "elasticsearch" - args: - elasticsearch_url: "{self.elasticsearch.get_url()}" - ssl_assert_fingerprint: {self.elasticsearch.ssl_assert_fingerprint} - username: "{self.secret_arn}:username" - password: "{self.secret_arn}:password" - - type: "logstash" - args: - logstash_url: "{self.logstash.get_url()}" - ssl_assert_fingerprint: {self.logstash.ssl_assert_fingerprint} - username: "wrong_username" - password: "wrong_username" - - type: s3-sqs - id: "{s3_sqs_queue_arn}" - tags: {self.default_tags} - outputs: - - type: "elasticsearch" - args: - elasticsearch_url: "{self.elasticsearch.get_url()}" - ssl_assert_fingerprint: {self.elasticsearch.ssl_assert_fingerprint} - username: "{self.secret_arn}:username" - password: "{self.secret_arn}:password" - - type: "logstash" - args: - logstash_url: "{self.logstash.get_url()}" - ssl_assert_fingerprint: {self.logstash.ssl_assert_fingerprint} - username: "wrong_username" - password: "wrong_username" - """ - - config_file_path = "config.yaml" - config_bucket_name = _time_based_id(suffix="config-bucket") - _s3_upload_content_to_bucket( - client=self.s3_client, - content=config_yaml.encode("utf-8"), - content_type="text/plain", - bucket_name=config_bucket_name, - key=config_file_path, - ) - - os.environ["S3_CONFIG_FILE"] = f"s3://{config_bucket_name}/{config_file_path}" - - events_s3, _ = _sqs_get_messages(self.sqs_client, s3_sqs_queue_url, s3_sqs_queue_arn) - - bucket_arn: str = f"arn:aws:s3:::{s3_bucket_name}" - event_time = int( - datetime.datetime.strptime(_S3_NOTIFICATION_EVENT_TIME, "%Y-%m-%dT%H:%M:%S.%fZ").timestamp() * 1000 - ) - - hash_first = get_hex_prefix(f"{bucket_arn}-{first_filename}") - prefix_s3_first = f"{event_time}-{hash_first}" - - events_sqs, events_sent_timestamps_sqs = _sqs_get_messages(self.sqs_client, sqs_queue_url, sqs_queue_arn) - - message_id = events_sqs["Records"][0]["messageId"] - hash_sqs = get_hex_prefix(f"{sqs_queue_name}-{message_id}") - prefix_sqs: str = f"{events_sent_timestamps_sqs[0]}-{hash_sqs}" - - ( - events_cloudwatch_logs, - event_ids_cloudwatch_logs, - event_timestamps_cloudwatch_logs, - ) = _logs_retrieve_event_from_cloudwatch_logs(self.logs_client, cloudwatch_group_name, cloudwatch_stream_name) - - hash_cw_logs = get_hex_prefix( - f"{cloudwatch_group_name}-{cloudwatch_stream_name}-{event_ids_cloudwatch_logs[0]}" - ) - prefix_cloudwatch_logs = f"{event_timestamps_cloudwatch_logs[0]}-{hash_cw_logs}" - - events_kinesis, event_timestamps_kinesis_records = _kinesis_retrieve_event_from_kinesis_stream( - self.kinesis_client, kinesis_stream_name, kinesis_stream_arn - ) - sequence_number = events_kinesis["Records"][0]["kinesis"]["sequenceNumber"] - hash_kinesis_record = get_hex_prefix(f"stream-{kinesis_stream_name}-PartitionKey-{sequence_number}") - prefix_kinesis = f"{int(float(event_timestamps_kinesis_records[0]) * 1000)}-{hash_kinesis_record}" - - # Create an expected id for s3-sqs so that es.send will fail - self.elasticsearch.index( - index="logs-generic-default", - op_type="create", - id=f"{prefix_s3_first}-000000000000", - document={"@timestamp": datetime.datetime.utcnow().strftime("%Y-%m-%dT%H:%M:%S.%fZ")}, - ) - - # Create an expected id for sqs so that es.send will fail - self.elasticsearch.index( - index="logs-generic-default", - op_type="create", - id=f"{prefix_sqs}-000000000000", - document={"@timestamp": datetime.datetime.utcnow().strftime("%Y-%m-%dT%H:%M:%S.%fZ")}, - ) - - # Create an expected id for cloudwatch-logs so that es.send will fail - self.elasticsearch.index( - index="logs-generic-default", - op_type="create", - id=f"{prefix_cloudwatch_logs}-000000000000", - document={"@timestamp": datetime.datetime.utcnow().strftime("%Y-%m-%dT%H:%M:%S.%fZ")}, - ) - - # Create an expected id for kinesis-data-stream so that es.send will fail - self.elasticsearch.index( - index="logs-generic-default", - op_type="create", - id=f"{prefix_kinesis}-000000000000", - document={"@timestamp": datetime.datetime.utcnow().strftime("%Y-%m-%dT%H:%M:%S.%fZ")}, - ) - - self.elasticsearch.refresh(index="logs-generic-default") - - res = self.elasticsearch.search(index="logs-generic-default") - assert res["hits"]["total"] == {"value": 4, "relation": "eq"} - - ctx = ContextMock(remaining_time_in_millis=_OVER_COMPLETION_GRACE_PERIOD_2m) - - first_call = handler(events_s3, ctx) # type:ignore - - assert first_call == "completed" - - self.elasticsearch.refresh(index="logs-generic-default") - res = self.elasticsearch.search( - index="logs-generic-default", - query={ - "bool": { - "must_not": { - "ids": { - "values": [ - f"{prefix_s3_first}-000000000000", - f"{prefix_sqs}-000000000000", - f"{prefix_cloudwatch_logs}-000000000000", - f"{prefix_kinesis}-000000000000", - ] - } - } - } - }, - sort="_seq_no", - ) - - assert res["hits"]["total"] == {"value": 1, "relation": "eq"} - - assert res["hits"]["hits"][0]["_source"]["message"] == fixtures[1].rstrip("\n") - assert res["hits"]["hits"][0]["_source"]["log"]["offset"] == 94 - assert ( - res["hits"]["hits"][0]["_source"]["log"]["file"]["path"] - == f"https://{s3_bucket_name}.s3.eu-central-1.amazonaws.com/{first_filename}" - ) - assert res["hits"]["hits"][0]["_source"]["aws"]["s3"]["bucket"]["name"] == s3_bucket_name - assert res["hits"]["hits"][0]["_source"]["aws"]["s3"]["bucket"]["arn"] == f"arn:aws:s3:::{s3_bucket_name}" - assert res["hits"]["hits"][0]["_source"]["aws"]["s3"]["object"]["key"] == first_filename - assert res["hits"]["hits"][0]["_source"]["cloud"]["provider"] == "aws" - assert res["hits"]["hits"][0]["_source"]["cloud"]["region"] == "eu-central-1" - assert res["hits"]["hits"][0]["_source"]["cloud"]["account"]["id"] == "000000000000" - assert res["hits"]["hits"][0]["_source"]["tags"] == ["forwarded", "generic", "tag1", "tag2", "tag3"] - - logstash_message = self.logstash.get_messages(expected=0) - assert len(logstash_message) == 0 - - second_call = handler(events_sqs, ctx) # type:ignore - - assert second_call == "completed" - - self.elasticsearch.refresh(index="logs-generic-default") - res = self.elasticsearch.search( - index="logs-generic-default", - query={ - "bool": { - "must_not": { - "ids": { - "values": [ - f"{prefix_s3_first}-000000000000", - f"{prefix_sqs}-000000000000", - f"{prefix_cloudwatch_logs}-000000000000", - f"{prefix_kinesis}-000000000000", - ] - } - } - } - }, - sort="_seq_no", - ) - - assert res["hits"]["total"] == {"value": 2, "relation": "eq"} - - assert res["hits"]["hits"][1]["_source"]["message"] == fixtures[1].rstrip("\n") - assert res["hits"]["hits"][1]["_source"]["log"]["offset"] == 94 - assert res["hits"]["hits"][1]["_source"]["log"]["file"]["path"] == sqs_queue_url_path - assert res["hits"]["hits"][1]["_source"]["aws"]["sqs"]["name"] == sqs_queue_name - assert res["hits"]["hits"][1]["_source"]["aws"]["sqs"]["message_id"] == message_id - assert res["hits"]["hits"][1]["_source"]["cloud"]["provider"] == "aws" - assert res["hits"]["hits"][1]["_source"]["cloud"]["region"] == "us-east-1" - assert res["hits"]["hits"][1]["_source"]["cloud"]["account"]["id"] == "000000000000" - assert res["hits"]["hits"][1]["_source"]["tags"] == ["forwarded", "generic", "tag1", "tag2", "tag3"] - - logstash_message = self.logstash.get_messages(expected=0) - assert len(logstash_message) == 0 - - third_call = handler(events_cloudwatch_logs, ctx) # type:ignore - - assert third_call == "completed" - - self.elasticsearch.refresh(index="logs-generic-default") - res = self.elasticsearch.search( - index="logs-generic-default", - query={ - "bool": { - "must_not": { - "ids": { - "values": [ - f"{prefix_s3_first}-000000000000", - f"{prefix_sqs}-000000000000", - f"{prefix_cloudwatch_logs}-000000000000", - f"{prefix_kinesis}-000000000000", - ] - } - } - } - }, - sort="_seq_no", - ) - - assert res["hits"]["total"] == {"value": 3, "relation": "eq"} - - assert res["hits"]["hits"][2]["_source"]["message"] == fixtures[1].rstrip("\n") - assert res["hits"]["hits"][2]["_source"]["log"]["offset"] == 94 - assert ( - res["hits"]["hits"][2]["_source"]["log"]["file"]["path"] - == f"{cloudwatch_group_name}/{cloudwatch_stream_name}" - ) - assert res["hits"]["hits"][2]["_source"]["aws"]["cloudwatch"]["log_group"] == cloudwatch_group_name - assert res["hits"]["hits"][2]["_source"]["aws"]["cloudwatch"]["log_stream"] == cloudwatch_stream_name - assert res["hits"]["hits"][2]["_source"]["aws"]["cloudwatch"]["event_id"] == event_ids_cloudwatch_logs[0] - assert res["hits"]["hits"][2]["_source"]["cloud"]["provider"] == "aws" - assert res["hits"]["hits"][2]["_source"]["cloud"]["region"] == "us-east-1" - assert res["hits"]["hits"][2]["_source"]["cloud"]["account"]["id"] == "000000000000" - assert res["hits"]["hits"][2]["_source"]["tags"] == ["forwarded", "generic", "tag1", "tag2", "tag3"] - - logstash_message = self.logstash.get_messages(expected=0) - assert len(logstash_message) == 0 - - fourth_call = handler(events_kinesis, ctx) # type:ignore - - assert fourth_call == "completed" - - self.elasticsearch.refresh(index="logs-generic-default") - res = self.elasticsearch.search( - index="logs-generic-default", - query={ - "bool": { - "must_not": { - "ids": { - "values": [ - f"{prefix_s3_first}-000000000000", - f"{prefix_sqs}-000000000000", - f"{prefix_cloudwatch_logs}-000000000000", - f"{prefix_kinesis}-000000000000", - ] - } - } - } - }, - sort="_seq_no", - ) - - assert res["hits"]["total"] == {"value": 4, "relation": "eq"} - - assert res["hits"]["hits"][3]["_source"]["message"] == fixtures[1].rstrip("\n") - assert res["hits"]["hits"][3]["_source"]["log"]["offset"] == 94 - assert res["hits"]["hits"][3]["_source"]["log"]["file"]["path"] == kinesis_stream_arn - assert res["hits"]["hits"][3]["_source"]["aws"]["kinesis"]["type"] == "stream" - assert res["hits"]["hits"][3]["_source"]["aws"]["kinesis"]["partition_key"] == "PartitionKey" - assert res["hits"]["hits"][3]["_source"]["aws"]["kinesis"]["name"] == kinesis_stream_name - assert ( - res["hits"]["hits"][3]["_source"]["aws"]["kinesis"]["sequence_number"] - == events_kinesis["Records"][0]["kinesis"]["sequenceNumber"] - ) - assert res["hits"]["hits"][3]["_source"]["cloud"]["provider"] == "aws" - assert res["hits"]["hits"][3]["_source"]["cloud"]["region"] == "us-east-1" - assert res["hits"]["hits"][3]["_source"]["cloud"]["account"]["id"] == "000000000000" - assert res["hits"]["hits"][3]["_source"]["tags"] == ["forwarded", "generic", "tag1", "tag2", "tag3"] - - logstash_message = self.logstash.get_messages(expected=0) - assert len(logstash_message) == 0 - - replayed_events, _ = _sqs_get_messages(self.sqs_client, os.environ["SQS_REPLAY_URL"], self.sqs_replay_queue_arn) - with self.assertRaises(ReplayHandlerException): - handler(replayed_events, ctx) # type:ignore - - self.elasticsearch.refresh(index="logs-generic-default") - - # Remove the expected id for s3-sqs so that it can be replayed - self.elasticsearch.delete_by_query( - index="logs-generic-default", body={"query": {"ids": {"values": [f"{prefix_s3_first}-000000000000"]}}} - ) - - # Remove the expected id for sqs so that it can be replayed - self.elasticsearch.delete_by_query( - index="logs-generic-default", body={"query": {"ids": {"values": [f"{prefix_sqs}-000000000000"]}}} - ) - - # Remove the expected id for cloudwatch logs so that it can be replayed - self.elasticsearch.delete_by_query( - index="logs-generic-default", - body={"query": {"ids": {"values": [f"{prefix_cloudwatch_logs}-000000000000"]}}}, - ) - - # Remove the expected id for kinesis data stream so that it can be replayed - self.elasticsearch.delete_by_query( - index="logs-generic-default", - body={"query": {"ids": {"values": [f"{prefix_kinesis}-000000000000"]}}}, - ) - - self.elasticsearch.refresh(index="logs-generic-default") - - # let's update the config file so that logstash won't fail anymore - config_yaml = f""" - inputs: - - type: "kinesis-data-stream" - id: "{kinesis_stream_arn}" - tags: {self.default_tags} - outputs: {self.default_outputs} - - type: "cloudwatch-logs" - id: "{cloudwatch_group_arn}" - tags: {self.default_tags} - outputs: {self.default_outputs} - - type: sqs - id: "{sqs_queue_arn}" - tags: {self.default_tags} - outputs: {self.default_outputs} - - type: s3-sqs - id: "{s3_sqs_queue_arn}" - tags: {self.default_tags} - outputs: {self.default_outputs} - """ - - _s3_upload_content_to_bucket( - client=self.s3_client, - content=config_yaml.encode("utf-8"), - content_type="text/plain", - bucket_name=config_bucket_name, - key=config_file_path, - create_bucket=False, - ) - - ctx = ContextMock(remaining_time_in_millis=_REMAINING_TIME_FORCE_CONTINUE_0ms) - - # implicit wait for the message to be back on the queue - time.sleep(35) - replayed_events, _ = _sqs_get_messages(self.sqs_client, os.environ["SQS_REPLAY_URL"], self.sqs_replay_queue_arn) - fifth_call = handler(replayed_events, ctx) # type:ignore - - assert fifth_call == "replayed" - - self.elasticsearch.refresh(index="logs-generic-default") - assert self.elasticsearch.count(index="logs-generic-default")["count"] == 5 - - self.elasticsearch.refresh(index="logs-generic-default") - res = self.elasticsearch.search(index="logs-generic-default", sort="_seq_no") - - assert res["hits"]["total"] == {"value": 5, "relation": "eq"} - - assert res["hits"]["hits"][4]["_source"]["message"] == fixtures[0].rstrip("\n") - assert res["hits"]["hits"][4]["_source"]["log"]["offset"] == 0 - assert ( - res["hits"]["hits"][4]["_source"]["log"]["file"]["path"] - == f"https://{s3_bucket_name}.s3.eu-central-1.amazonaws.com/{first_filename}" - ) - assert res["hits"]["hits"][4]["_source"]["aws"]["s3"]["bucket"]["name"] == s3_bucket_name - assert res["hits"]["hits"][4]["_source"]["aws"]["s3"]["bucket"]["arn"] == f"arn:aws:s3:::{s3_bucket_name}" - assert res["hits"]["hits"][4]["_source"]["aws"]["s3"]["object"]["key"] == first_filename - assert res["hits"]["hits"][4]["_source"]["cloud"]["provider"] == "aws" - assert res["hits"]["hits"][4]["_source"]["cloud"]["region"] == "eu-central-1" - assert res["hits"]["hits"][4]["_source"]["cloud"]["account"]["id"] == "000000000000" - assert res["hits"]["hits"][4]["_source"]["tags"] == ["forwarded", "generic", "tag1", "tag2", "tag3"] - - logstash_message = self.logstash.get_messages(expected=0) - assert len(logstash_message) == 0 - - # implicit wait for the message to be back on the queue - time.sleep(35) - replayed_events, _ = _sqs_get_messages(self.sqs_client, os.environ["SQS_REPLAY_URL"], self.sqs_replay_queue_arn) - sixth_call = handler(replayed_events, ctx) # type:ignore - - assert sixth_call == "replayed" - - self.elasticsearch.refresh(index="logs-generic-default") - assert self.elasticsearch.count(index="logs-generic-default")["count"] == 5 - - res = self.elasticsearch.search(index="logs-generic-default", sort="_seq_no") - assert res["hits"]["total"] == {"value": 5, "relation": "eq"} - - logstash_message = self.logstash.get_messages(expected=1) - assert len(logstash_message) == 1 - # positions on res["hits"]["hits"] are skewed compared to logstash_message - # in elasticsearch we inserted the second event of each input before the first one - res["hits"]["hits"][4]["_source"]["tags"].remove("generic") - assert res["hits"]["hits"][4]["_source"]["aws"] == logstash_message[0]["aws"] - assert res["hits"]["hits"][4]["_source"]["cloud"] == logstash_message[0]["cloud"] - assert res["hits"]["hits"][4]["_source"]["log"] == logstash_message[0]["log"] - assert res["hits"]["hits"][4]["_source"]["message"] == logstash_message[0]["message"] - assert res["hits"]["hits"][4]["_source"]["tags"] == logstash_message[0]["tags"] - - ctx = ContextMock(remaining_time_in_millis=_OVER_COMPLETION_GRACE_PERIOD_2m) - - # implicit wait for the message to be back on the queue - time.sleep(35) - replayed_events, _ = _sqs_get_messages(self.sqs_client, os.environ["SQS_REPLAY_URL"], self.sqs_replay_queue_arn) - seventh_call = handler(replayed_events, ctx) # type:ignore - - assert seventh_call == "replayed" - - self.elasticsearch.refresh(index="logs-generic-default") - assert self.elasticsearch.count(index="logs-generic-default")["count"] == 8 - - self.elasticsearch.refresh(index="logs-generic-default") - res = self.elasticsearch.search(index="logs-generic-default", sort="_seq_no") - - assert res["hits"]["total"] == {"value": 8, "relation": "eq"} - - assert res["hits"]["hits"][5]["_source"]["message"] == fixtures[0].rstrip("\n") - assert res["hits"]["hits"][5]["_source"]["log"]["offset"] == 0 - assert res["hits"]["hits"][5]["_source"]["log"]["file"]["path"] == sqs_queue_url_path - assert res["hits"]["hits"][5]["_source"]["aws"]["sqs"]["name"] == sqs_queue_name - assert res["hits"]["hits"][5]["_source"]["aws"]["sqs"]["message_id"] == message_id - assert res["hits"]["hits"][5]["_source"]["cloud"]["provider"] == "aws" - assert res["hits"]["hits"][5]["_source"]["cloud"]["region"] == "us-east-1" - assert res["hits"]["hits"][5]["_source"]["cloud"]["account"]["id"] == "000000000000" - assert res["hits"]["hits"][5]["_source"]["tags"] == ["forwarded", "generic", "tag1", "tag2", "tag3"] - - assert res["hits"]["hits"][6]["_source"]["message"] == fixtures[0].rstrip("\n") - assert res["hits"]["hits"][6]["_source"]["log"]["offset"] == 0 - assert ( - res["hits"]["hits"][6]["_source"]["log"]["file"]["path"] - == f"{cloudwatch_group_name}/{cloudwatch_stream_name}" - ) - assert res["hits"]["hits"][6]["_source"]["aws"]["cloudwatch"]["log_group"] == cloudwatch_group_name - assert res["hits"]["hits"][6]["_source"]["aws"]["cloudwatch"]["log_stream"] == cloudwatch_stream_name - assert res["hits"]["hits"][6]["_source"]["aws"]["cloudwatch"]["event_id"] == event_ids_cloudwatch_logs[0] - assert res["hits"]["hits"][6]["_source"]["cloud"]["provider"] == "aws" - assert res["hits"]["hits"][6]["_source"]["cloud"]["region"] == "us-east-1" - assert res["hits"]["hits"][6]["_source"]["cloud"]["account"]["id"] == "000000000000" - assert res["hits"]["hits"][6]["_source"]["tags"] == ["forwarded", "generic", "tag1", "tag2", "tag3"] - - assert res["hits"]["hits"][7]["_source"]["message"] == fixtures[0].rstrip("\n") - assert res["hits"]["hits"][7]["_source"]["log"]["offset"] == 0 - assert res["hits"]["hits"][7]["_source"]["log"]["file"]["path"] == kinesis_stream_arn - assert res["hits"]["hits"][7]["_source"]["aws"]["kinesis"]["type"] == "stream" - assert res["hits"]["hits"][7]["_source"]["aws"]["kinesis"]["partition_key"] == "PartitionKey" - assert res["hits"]["hits"][7]["_source"]["aws"]["kinesis"]["name"] == kinesis_stream_name - assert ( - res["hits"]["hits"][7]["_source"]["aws"]["kinesis"]["sequence_number"] - == events_kinesis["Records"][0]["kinesis"]["sequenceNumber"] - ) - assert res["hits"]["hits"][7]["_source"]["cloud"]["provider"] == "aws" - assert res["hits"]["hits"][7]["_source"]["cloud"]["region"] == "us-east-1" - assert res["hits"]["hits"][7]["_source"]["cloud"]["account"]["id"] == "000000000000" - assert res["hits"]["hits"][7]["_source"]["tags"] == ["forwarded", "generic", "tag1", "tag2", "tag3"] - - logstash_message = self.logstash.get_messages(expected=8) - assert len(logstash_message) == 8 - res["hits"]["hits"][0]["_source"]["tags"].remove("generic") - res["hits"]["hits"][1]["_source"]["tags"].remove("generic") - res["hits"]["hits"][2]["_source"]["tags"].remove("generic") - res["hits"]["hits"][3]["_source"]["tags"].remove("generic") - res["hits"]["hits"][5]["_source"]["tags"].remove("generic") - res["hits"]["hits"][6]["_source"]["tags"].remove("generic") - res["hits"]["hits"][7]["_source"]["tags"].remove("generic") - - # positions on res["hits"]["hits"] are skewed compared to logstash_message - # in elasticsearch we inserted the second event of each input before the first one - assert res["hits"]["hits"][0]["_source"]["aws"] == logstash_message[1]["aws"] - assert res["hits"]["hits"][0]["_source"]["cloud"] == logstash_message[1]["cloud"] - assert res["hits"]["hits"][0]["_source"]["log"] == logstash_message[1]["log"] - assert res["hits"]["hits"][0]["_source"]["message"] == logstash_message[1]["message"] - assert res["hits"]["hits"][0]["_source"]["tags"] == logstash_message[1]["tags"] - - assert res["hits"]["hits"][5]["_source"]["aws"] == logstash_message[2]["aws"] - assert res["hits"]["hits"][5]["_source"]["cloud"] == logstash_message[2]["cloud"] - assert res["hits"]["hits"][5]["_source"]["log"] == logstash_message[2]["log"] - assert res["hits"]["hits"][5]["_source"]["message"] == logstash_message[2]["message"] - assert res["hits"]["hits"][5]["_source"]["tags"] == logstash_message[2]["tags"] - - assert res["hits"]["hits"][1]["_source"]["aws"] == logstash_message[3]["aws"] - assert res["hits"]["hits"][1]["_source"]["cloud"] == logstash_message[3]["cloud"] - assert res["hits"]["hits"][1]["_source"]["log"] == logstash_message[3]["log"] - assert res["hits"]["hits"][1]["_source"]["message"] == logstash_message[3]["message"] - assert res["hits"]["hits"][1]["_source"]["tags"] == logstash_message[3]["tags"] - - assert res["hits"]["hits"][6]["_source"]["aws"] == logstash_message[4]["aws"] - assert res["hits"]["hits"][6]["_source"]["cloud"] == logstash_message[4]["cloud"] - assert res["hits"]["hits"][6]["_source"]["log"] == logstash_message[4]["log"] - assert res["hits"]["hits"][6]["_source"]["message"] == logstash_message[4]["message"] - assert res["hits"]["hits"][6]["_source"]["tags"] == logstash_message[4]["tags"] - - assert res["hits"]["hits"][2]["_source"]["aws"] == logstash_message[5]["aws"] - assert res["hits"]["hits"][2]["_source"]["cloud"] == logstash_message[5]["cloud"] - assert res["hits"]["hits"][2]["_source"]["log"] == logstash_message[5]["log"] - assert res["hits"]["hits"][2]["_source"]["message"] == logstash_message[5]["message"] - assert res["hits"]["hits"][2]["_source"]["tags"] == logstash_message[5]["tags"] - - assert res["hits"]["hits"][7]["_source"]["aws"] == logstash_message[6]["aws"] - assert res["hits"]["hits"][7]["_source"]["cloud"] == logstash_message[6]["cloud"] - assert res["hits"]["hits"][7]["_source"]["log"] == logstash_message[6]["log"] - assert res["hits"]["hits"][7]["_source"]["message"] == logstash_message[6]["message"] - assert res["hits"]["hits"][7]["_source"]["tags"] == logstash_message[6]["tags"] - - assert res["hits"]["hits"][3]["_source"]["aws"] == logstash_message[7]["aws"] - assert res["hits"]["hits"][3]["_source"]["cloud"] == logstash_message[7]["cloud"] - assert res["hits"]["hits"][3]["_source"]["log"] == logstash_message[7]["log"] - assert res["hits"]["hits"][3]["_source"]["message"] == logstash_message[7]["message"] - assert res["hits"]["hits"][3]["_source"]["tags"] == logstash_message[7]["tags"] - - def test_empty(self) -> None: - assert isinstance(self.elasticsearch, ElasticsearchContainer) - assert isinstance(self.logstash, LogstashContainer) - assert isinstance(self.localstack, LocalStackContainer) - - fixtures = [" \n"] # once stripped it is an empty event - - s3_bucket_name = _time_based_id(suffix="test-bucket") - first_filename = "exportedlog/uuid/yyyy-mm-dd-[$LATEST]hash/000000.gz" - _s3_upload_content_to_bucket( - client=self.s3_client, - content=gzip.compress("".join(fixtures).encode("utf-8")), - content_type="application/x-gzip", - bucket_name=s3_bucket_name, - key=first_filename, - ) - - cloudwatch_group_name = _time_based_id(suffix="source-group") - cloudwatch_group = _logs_create_cloudwatch_logs_group(self.logs_client, group_name=cloudwatch_group_name) - - cloudwatch_stream_name = _time_based_id(suffix="source-stream") - _logs_create_cloudwatch_logs_stream( - self.logs_client, group_name=cloudwatch_group_name, stream_name=cloudwatch_stream_name - ) - - _logs_upload_event_to_cloudwatch_logs( - self.logs_client, - group_name=cloudwatch_group_name, - stream_name=cloudwatch_stream_name, - messages_body=["".join(fixtures)], - ) - - cloudwatch_group_arn = cloudwatch_group["arn"] - - cloudwatch_group_name = cloudwatch_group_name - cloudwatch_stream_name = cloudwatch_stream_name - - sqs_queue_name = _time_based_id(suffix="source-sqs") - s3_sqs_queue_name = _time_based_id(suffix="source-s3-sqs") - - sqs_queue = _sqs_create_queue(self.sqs_client, sqs_queue_name, self.localstack.get_url()) - s3_sqs_queue = _sqs_create_queue(self.sqs_client, s3_sqs_queue_name, self.localstack.get_url()) - - sqs_queue_arn = sqs_queue["QueueArn"] - sqs_queue_url = sqs_queue["QueueUrl"] - - s3_sqs_queue_arn = s3_sqs_queue["QueueArn"] - s3_sqs_queue_url = s3_sqs_queue["QueueUrl"] - - _sqs_send_messages(self.sqs_client, sqs_queue_url, "".join(fixtures)) - _sqs_send_s3_notifications(self.sqs_client, s3_sqs_queue_url, s3_bucket_name, [first_filename]) - - kinesis_stream_name = _time_based_id(suffix="source-kinesis") - kinesis_stream = _kinesis_create_stream(self.kinesis_client, kinesis_stream_name) - kinesis_stream_arn = kinesis_stream["StreamDescription"]["StreamARN"] - - _kinesis_put_records(self.kinesis_client, kinesis_stream_name, ["".join(fixtures)]) - - config_yaml: str = f""" - inputs: - - type: "kinesis-data-stream" - id: "{kinesis_stream_arn}" - outputs: {self.default_outputs} - - type: "cloudwatch-logs" - id: "{cloudwatch_group_arn}" - outputs: {self.default_outputs} - - type: sqs - id: "{sqs_queue_arn}" - outputs: {self.default_outputs} - - type: s3-sqs - id: "{s3_sqs_queue_arn}" - outputs: {self.default_outputs} - """ - - config_file_path = "config.yaml" - config_bucket_name = _time_based_id(suffix="config-bucket") - _s3_upload_content_to_bucket( - client=self.s3_client, - content=config_yaml.encode("utf-8"), - content_type="text/plain", - bucket_name=config_bucket_name, - key=config_file_path, - ) - - os.environ["S3_CONFIG_FILE"] = f"s3://{config_bucket_name}/{config_file_path}" - - events_s3, _ = _sqs_get_messages(self.sqs_client, s3_sqs_queue_url, s3_sqs_queue_arn) - - events_sqs, _ = _sqs_get_messages(self.sqs_client, sqs_queue_url, sqs_queue_arn) - - events_cloudwatch_logs, _, _ = _logs_retrieve_event_from_cloudwatch_logs( - self.logs_client, cloudwatch_group_name, cloudwatch_stream_name - ) - - events_kinesis, _ = _kinesis_retrieve_event_from_kinesis_stream( - self.kinesis_client, kinesis_stream_name, kinesis_stream_arn - ) - - ctx = ContextMock(remaining_time_in_millis=_OVER_COMPLETION_GRACE_PERIOD_2m) - first_call = handler(events_s3, ctx) # type:ignore - - assert first_call == "completed" - - self.elasticsearch.refresh(index="logs-generic-default", ignore_unavailable=True) - assert self.elasticsearch.count(index="logs-generic-default", ignore_unavailable=True)["count"] == 0 - - logstash_message = self.logstash.get_messages(expected=0) - assert len(logstash_message) == 0 - - second_call = handler(events_sqs, ctx) # type:ignore - - assert second_call == "completed" - - self.elasticsearch.refresh(index="logs-generic-default", ignore_unavailable=True) - assert self.elasticsearch.count(index="logs-generic-default", ignore_unavailable=True)["count"] == 0 - - logstash_message = self.logstash.get_messages(expected=0) - assert len(logstash_message) == 0 - - third_call = handler(events_cloudwatch_logs, ctx) # type:ignore - - assert third_call == "completed" - - self.elasticsearch.refresh(index="logs-generic-default", ignore_unavailable=True) - assert self.elasticsearch.count(index="logs-generic-default", ignore_unavailable=True)["count"] == 0 - - logstash_message = self.logstash.get_messages(expected=0) - assert len(logstash_message) == 0 - - fourth_call = handler(events_kinesis, ctx) # type:ignore - - assert fourth_call == "completed" - - self.elasticsearch.refresh(index="logs-generic-default", ignore_unavailable=True) - assert self.elasticsearch.count(index="logs-generic-default", ignore_unavailable=True)["count"] == 0 - - logstash_message = self.logstash.get_messages(expected=0) - assert len(logstash_message) == 0 - - def test_filtered(self) -> None: - assert isinstance(self.elasticsearch, ElasticsearchContainer) - assert isinstance(self.logstash, LogstashContainer) - assert isinstance(self.localstack, LocalStackContainer) - - fixtures = ["excluded"] - - s3_bucket_name = _time_based_id(suffix="test-bucket") - first_filename = "exportedlog/uuid/yyyy-mm-dd-[$LATEST]hash/000000.gz" - _s3_upload_content_to_bucket( - client=self.s3_client, - content=gzip.compress("".join(fixtures).encode("utf-8")), - content_type="application/x-gzip", - bucket_name=s3_bucket_name, - key=first_filename, - ) - - cloudwatch_group_name = _time_based_id(suffix="source-group") - cloudwatch_group = _logs_create_cloudwatch_logs_group(self.logs_client, group_name=cloudwatch_group_name) - - cloudwatch_stream_name = _time_based_id(suffix="source-stream") - _logs_create_cloudwatch_logs_stream( - self.logs_client, group_name=cloudwatch_group_name, stream_name=cloudwatch_stream_name - ) - - _logs_upload_event_to_cloudwatch_logs( - self.logs_client, - group_name=cloudwatch_group_name, - stream_name=cloudwatch_stream_name, - messages_body=["".join(fixtures)], - ) - - cloudwatch_group_arn = cloudwatch_group["arn"] - - cloudwatch_group_name = cloudwatch_group_name - cloudwatch_stream_name = cloudwatch_stream_name - - sqs_queue_name = _time_based_id(suffix="source-sqs") - s3_sqs_queue_name = _time_based_id(suffix="source-s3-sqs") - - sqs_queue = _sqs_create_queue(self.sqs_client, sqs_queue_name, self.localstack.get_url()) - s3_sqs_queue = _sqs_create_queue(self.sqs_client, s3_sqs_queue_name, self.localstack.get_url()) - - sqs_queue_arn = sqs_queue["QueueArn"] - sqs_queue_url = sqs_queue["QueueUrl"] - - s3_sqs_queue_arn = s3_sqs_queue["QueueArn"] - s3_sqs_queue_url = s3_sqs_queue["QueueUrl"] - - _sqs_send_messages(self.sqs_client, sqs_queue_url, "".join(fixtures)) - _sqs_send_s3_notifications(self.sqs_client, s3_sqs_queue_url, s3_bucket_name, [first_filename]) - - kinesis_stream_name = _time_based_id(suffix="source-kinesis") - kinesis_stream = _kinesis_create_stream(self.kinesis_client, kinesis_stream_name) - kinesis_stream_arn = kinesis_stream["StreamDescription"]["StreamARN"] - - _kinesis_put_records(self.kinesis_client, kinesis_stream_name, ["".join(fixtures)]) - - config_yaml: str = f""" - inputs: - - type: "kinesis-data-stream" - id: "{kinesis_stream_arn}" - exclude: - - "excluded" - outputs: {self.default_outputs} - - type: "cloudwatch-logs" - id: "{cloudwatch_group_arn}" - exclude: - - "excluded" - outputs: {self.default_outputs} - - type: sqs - id: "{sqs_queue_arn}" - exclude: - - "excluded" - outputs: {self.default_outputs} - - type: s3-sqs - id: "{s3_sqs_queue_arn}" - exclude: - - "excluded" - outputs: {self.default_outputs} - """ - - config_file_path = "config.yaml" - config_bucket_name = _time_based_id(suffix="config-bucket") - _s3_upload_content_to_bucket( - client=self.s3_client, - content=config_yaml.encode("utf-8"), - content_type="text/plain", - bucket_name=config_bucket_name, - key=config_file_path, - ) - - os.environ["S3_CONFIG_FILE"] = f"s3://{config_bucket_name}/{config_file_path}" - - events_s3, _ = _sqs_get_messages(self.sqs_client, s3_sqs_queue_url, s3_sqs_queue_arn) - - events_sqs, _ = _sqs_get_messages(self.sqs_client, sqs_queue_url, sqs_queue_arn) - - events_cloudwatch_logs, _, _ = _logs_retrieve_event_from_cloudwatch_logs( - self.logs_client, cloudwatch_group_name, cloudwatch_stream_name - ) - - events_kinesis, _ = _kinesis_retrieve_event_from_kinesis_stream( - self.kinesis_client, kinesis_stream_name, kinesis_stream_arn - ) - - ctx = ContextMock(remaining_time_in_millis=_OVER_COMPLETION_GRACE_PERIOD_2m) - first_call = handler(events_s3, ctx) # type:ignore - - assert first_call == "completed" - - self.elasticsearch.refresh(index="logs-generic-default", ignore_unavailable=True) - assert self.elasticsearch.count(index="logs-generic-default", ignore_unavailable=True)["count"] == 0 - - logstash_message = self.logstash.get_messages(expected=0) - assert len(logstash_message) == 0 - - second_call = handler(events_sqs, ctx) # type:ignore - - assert second_call == "completed" - - self.elasticsearch.refresh(index="logs-generic-default", ignore_unavailable=True) - assert self.elasticsearch.count(index="logs-generic-default", ignore_unavailable=True)["count"] == 0 - - logstash_message = self.logstash.get_messages(expected=0) - assert len(logstash_message) == 0 - - third_call = handler(events_cloudwatch_logs, ctx) # type:ignore - - assert third_call == "completed" - - self.elasticsearch.refresh(index="logs-generic-default", ignore_unavailable=True) - assert self.elasticsearch.count(index="logs-generic-default", ignore_unavailable=True)["count"] == 0 - - logstash_message = self.logstash.get_messages(expected=0) - assert len(logstash_message) == 0 - - fourth_call = handler(events_kinesis, ctx) # type:ignore - - assert fourth_call == "completed" - - self.elasticsearch.refresh(index="logs-generic-default", ignore_unavailable=True) - assert self.elasticsearch.count(index="logs-generic-default", ignore_unavailable=True)["count"] == 0 - - logstash_message = self.logstash.get_messages(expected=0) - assert len(logstash_message) == 0 - - def test_expand_event_from_list_empty_line(self) -> None: - assert isinstance(self.logstash, LogstashContainer) - assert isinstance(self.localstack, LocalStackContainer) - - first_expanded_event: str = _load_file_fixture("cloudwatch-log-1.json") - second_expanded_event: str = _load_file_fixture("cloudwatch-log-2.json") - third_expanded_event: str = _load_file_fixture("cloudwatch-log-3.json") - - fixtures = [ - f"""{{"aField": [{first_expanded_event},{second_expanded_event}]}}\n""" - f"""\n{{"aField": [{third_expanded_event}]}}""" - ] - - sqs_queue_name = _time_based_id(suffix="source-sqs") - - sqs_queue = _sqs_create_queue(self.sqs_client, sqs_queue_name, self.localstack.get_url()) - - sqs_queue_arn = sqs_queue["QueueArn"] - sqs_queue_url = sqs_queue["QueueUrl"] - sqs_queue_url_path = sqs_queue["QueueUrlPath"] - - _sqs_send_messages(self.sqs_client, sqs_queue_url, "".join(fixtures)) - - config_yaml: str = f""" - inputs: - - type: "sqs" - id: "{sqs_queue_arn}" - expand_event_list_from_field: aField - tags: {self.default_tags} - outputs: - - type: "logstash" - args: - logstash_url: "{self.logstash.get_url()}" - ssl_assert_fingerprint: {self.logstash.ssl_assert_fingerprint} - username: "{self.logstash.logstash_user}" - password: "{self.logstash.logstash_password}" - """ - - config_file_path = "config.yaml" - config_bucket_name = _time_based_id(suffix="config-bucket") - _s3_upload_content_to_bucket( - client=self.s3_client, - content=config_yaml.encode("utf-8"), - content_type="text/plain", - bucket_name=config_bucket_name, - key=config_file_path, - ) - - os.environ["S3_CONFIG_FILE"] = f"s3://{config_bucket_name}/{config_file_path}" - - events_sqs, _ = _sqs_get_messages(self.sqs_client, sqs_queue_url, sqs_queue_arn) - - message_id = events_sqs["Records"][0]["messageId"] - - ctx = ContextMock(remaining_time_in_millis=_OVER_COMPLETION_GRACE_PERIOD_2m) - - first_call = handler(events_sqs, ctx) # type:ignore - - assert first_call == "completed" - - logstash_message = self.logstash.get_messages(expected=3) - assert len(logstash_message) == 3 - - assert logstash_message[0]["message"] == json_dumper(json_parser(first_expanded_event)) - assert logstash_message[0]["log"]["offset"] == 0 - assert logstash_message[0]["log"]["file"]["path"] == sqs_queue_url_path - assert logstash_message[0]["aws"]["sqs"]["name"] == sqs_queue_name - assert logstash_message[0]["aws"]["sqs"]["message_id"] == message_id - assert logstash_message[0]["cloud"]["provider"] == "aws" - assert logstash_message[0]["cloud"]["region"] == "us-east-1" - assert logstash_message[0]["cloud"]["account"]["id"] == "000000000000" - assert logstash_message[0]["tags"] == ["forwarded", "tag1", "tag2", "tag3"] - - assert logstash_message[1]["message"] == json_dumper(json_parser(second_expanded_event)) - assert logstash_message[1]["log"]["offset"] == 174 - assert logstash_message[1]["log"]["file"]["path"] == sqs_queue_url_path - assert logstash_message[1]["aws"]["sqs"]["name"] == sqs_queue_name - assert logstash_message[1]["aws"]["sqs"]["message_id"] == message_id - assert logstash_message[1]["cloud"]["provider"] == "aws" - assert logstash_message[1]["cloud"]["region"] == "us-east-1" - assert logstash_message[1]["cloud"]["account"]["id"] == "000000000000" - assert logstash_message[1]["tags"] == ["forwarded", "tag1", "tag2", "tag3"] - - assert logstash_message[2]["message"] == json_dumper(json_parser(third_expanded_event)) - assert logstash_message[2]["log"]["offset"] == 349 - assert logstash_message[2]["log"]["file"]["path"] == sqs_queue_url_path - assert logstash_message[2]["aws"]["sqs"]["name"] == sqs_queue_name - assert logstash_message[2]["aws"]["sqs"]["message_id"] == message_id - assert logstash_message[2]["cloud"]["provider"] == "aws" - assert logstash_message[2]["cloud"]["region"] == "us-east-1" - assert logstash_message[2]["cloud"]["account"]["id"] == "000000000000" - assert logstash_message[2]["tags"] == ["forwarded", "tag1", "tag2", "tag3"] - - def test_expand_event_from_list_empty_event_not_expanded(self) -> None: - assert isinstance(self.logstash, LogstashContainer) - assert isinstance(self.localstack, LocalStackContainer) - - first_expanded_event: str = _load_file_fixture("cloudwatch-log-1.json") - second_expanded_event: str = _load_file_fixture("cloudwatch-log-2.json") - - fixtures = [f"""{{"aField": [{first_expanded_event},"",{second_expanded_event}]}}"""] - - sqs_queue_name = _time_based_id(suffix="source-sqs") - - sqs_queue = _sqs_create_queue(self.sqs_client, sqs_queue_name, self.localstack.get_url()) - - sqs_queue_arn = sqs_queue["QueueArn"] - sqs_queue_url = sqs_queue["QueueUrl"] - sqs_queue_url_path = sqs_queue["QueueUrlPath"] - - _sqs_send_messages(self.sqs_client, sqs_queue_url, "".join(fixtures)) - - config_yaml: str = f""" - inputs: - - type: "sqs" - id: "{sqs_queue_arn}" - expand_event_list_from_field: aField - tags: {self.default_tags} - outputs: - - type: "logstash" - args: - logstash_url: "{self.logstash.get_url()}" - ssl_assert_fingerprint: {self.logstash.ssl_assert_fingerprint} - username: "{self.logstash.logstash_user}" - password: "{self.logstash.logstash_password}" - """ - - config_file_path = "config.yaml" - config_bucket_name = _time_based_id(suffix="config-bucket") - _s3_upload_content_to_bucket( - client=self.s3_client, - content=config_yaml.encode("utf-8"), - content_type="text/plain", - bucket_name=config_bucket_name, - key=config_file_path, - ) - - os.environ["S3_CONFIG_FILE"] = f"s3://{config_bucket_name}/{config_file_path}" - - events_sqs, _ = _sqs_get_messages(self.sqs_client, sqs_queue_url, sqs_queue_arn) - - message_id = events_sqs["Records"][0]["messageId"] - - ctx = ContextMock(remaining_time_in_millis=_OVER_COMPLETION_GRACE_PERIOD_2m) - - first_call = handler(events_sqs, ctx) # type:ignore - - assert first_call == "completed" - - logstash_message = self.logstash.get_messages(expected=2) - assert len(logstash_message) == 2 - - assert logstash_message[0]["message"] == json_dumper(json_parser(first_expanded_event)) - assert logstash_message[0]["log"]["offset"] == 0 - assert logstash_message[0]["log"]["file"]["path"] == sqs_queue_url_path - assert logstash_message[0]["aws"]["sqs"]["name"] == sqs_queue_name - assert logstash_message[0]["aws"]["sqs"]["message_id"] == message_id - assert logstash_message[0]["cloud"]["provider"] == "aws" - assert logstash_message[0]["cloud"]["region"] == "us-east-1" - assert logstash_message[0]["cloud"]["account"]["id"] == "000000000000" - assert logstash_message[0]["tags"] == ["forwarded", "tag1", "tag2", "tag3"] - - assert logstash_message[1]["message"] == json_dumper(json_parser(second_expanded_event)) - assert logstash_message[1]["log"]["offset"] == 233 - assert logstash_message[1]["log"]["file"]["path"] == sqs_queue_url_path - assert logstash_message[1]["aws"]["sqs"]["name"] == sqs_queue_name - assert logstash_message[1]["aws"]["sqs"]["message_id"] == message_id - assert logstash_message[1]["cloud"]["provider"] == "aws" - assert logstash_message[1]["cloud"]["region"] == "us-east-1" - assert logstash_message[1]["cloud"]["account"]["id"] == "000000000000" - assert logstash_message[1]["tags"] == ["forwarded", "tag1", "tag2", "tag3"] - - def test_root_fields_to_add_to_expanded_event_no_dict_event(self) -> None: - assert isinstance(self.logstash, LogstashContainer) - assert isinstance(self.localstack, LocalStackContainer) - - first_expanded_event: str = '"first_expanded_event"' - second_expanded_event: str = '"second_expanded_event"' - third_expanded_event: str = '"third_expanded_event"' - - fixtures = [ - f"""{{"firstRootField": "firstRootField", "secondRootField":"secondRootField", - "aField": [{first_expanded_event},{second_expanded_event},{third_expanded_event}]}}""" - ] - - sqs_queue_name = _time_based_id(suffix="source-sqs") - - sqs_queue = _sqs_create_queue(self.sqs_client, sqs_queue_name, self.localstack.get_url()) - - sqs_queue_arn = sqs_queue["QueueArn"] - sqs_queue_url = sqs_queue["QueueUrl"] - sqs_queue_url_path = sqs_queue["QueueUrlPath"] - - _sqs_send_messages(self.sqs_client, sqs_queue_url, "".join(fixtures)) - - config_yaml: str = f""" - inputs: - - type: "sqs" - id: "{sqs_queue_arn}" - expand_event_list_from_field: aField - root_fields_to_add_to_expanded_event: ["secondRootField"] - tags: {self.default_tags} - outputs: - - type: "logstash" - args: - logstash_url: "{self.logstash.get_url()}" - ssl_assert_fingerprint: {self.logstash.ssl_assert_fingerprint} - username: "{self.logstash.logstash_user}" - password: "{self.logstash.logstash_password}" - """ - - config_file_path = "config.yaml" - config_bucket_name = _time_based_id(suffix="config-bucket") - _s3_upload_content_to_bucket( - client=self.s3_client, - content=config_yaml.encode("utf-8"), - content_type="text/plain", - bucket_name=config_bucket_name, - key=config_file_path, - ) - - os.environ["S3_CONFIG_FILE"] = f"s3://{config_bucket_name}/{config_file_path}" - - events_sqs, _ = _sqs_get_messages(self.sqs_client, sqs_queue_url, sqs_queue_arn) - - message_id = events_sqs["Records"][0]["messageId"] - - ctx = ContextMock() - first_call = handler(events_sqs, ctx) # type:ignore - - assert first_call == "continuing" - - logstash_message = self.logstash.get_messages(expected=1) - assert len(logstash_message) == 1 - - assert logstash_message[0]["message"] == first_expanded_event - assert logstash_message[0]["log"]["offset"] == 0 - assert logstash_message[0]["log"]["file"]["path"] == sqs_queue_url_path - assert logstash_message[0]["aws"]["sqs"]["name"] == sqs_queue_name - assert logstash_message[0]["aws"]["sqs"]["message_id"] == message_id - assert logstash_message[0]["cloud"]["provider"] == "aws" - assert logstash_message[0]["cloud"]["region"] == "us-east-1" - assert logstash_message[0]["cloud"]["account"]["id"] == "000000000000" - assert logstash_message[0]["tags"] == ["forwarded", "tag1", "tag2", "tag3"] - - ctx = ContextMock(remaining_time_in_millis=_OVER_COMPLETION_GRACE_PERIOD_2m) - - continued_events, _ = _sqs_get_messages( - self.sqs_client, os.environ["SQS_CONTINUE_URL"], self.sqs_continue_queue_arn - ) - second_call = handler(continued_events, ctx) # type:ignore - - assert second_call == "completed" - - logstash_message = self.logstash.get_messages(expected=3) - assert len(logstash_message) == 3 - - assert logstash_message[1]["message"] == second_expanded_event - assert logstash_message[1]["log"]["offset"] == 56 - assert logstash_message[1]["log"]["file"]["path"] == sqs_queue_url_path - assert logstash_message[1]["aws"]["sqs"]["name"] == sqs_queue_name - assert logstash_message[1]["aws"]["sqs"]["message_id"] == message_id - assert logstash_message[1]["cloud"]["provider"] == "aws" - assert logstash_message[1]["cloud"]["region"] == "us-east-1" - assert logstash_message[1]["cloud"]["account"]["id"] == "000000000000" - assert logstash_message[1]["tags"] == ["forwarded", "tag1", "tag2", "tag3"] - - assert logstash_message[2]["message"] == third_expanded_event - assert logstash_message[2]["log"]["offset"] == 112 - assert logstash_message[2]["log"]["file"]["path"] == sqs_queue_url_path - assert logstash_message[2]["aws"]["sqs"]["name"] == sqs_queue_name - assert logstash_message[2]["aws"]["sqs"]["message_id"] == message_id - assert logstash_message[2]["cloud"]["provider"] == "aws" - assert logstash_message[2]["cloud"]["region"] == "us-east-1" - assert logstash_message[2]["cloud"]["account"]["id"] == "000000000000" - assert logstash_message[2]["tags"] == ["forwarded", "tag1", "tag2", "tag3"] - - def test_root_fields_to_add_to_expanded_event_event_not_expanded(self) -> None: - assert isinstance(self.logstash, LogstashContainer) - assert isinstance(self.localstack, LocalStackContainer) - - first_expanded_event: str = _load_file_fixture("cloudwatch-log-1.json") - first_expanded_with_root_fields: dict[str, Any] = json_parser(first_expanded_event) - first_expanded_with_root_fields["secondRootField"] = "secondRootField" - - second_expanded_event: str = _load_file_fixture("cloudwatch-log-3.json") - second_expanded_with_root_fields: dict[str, Any] = json_parser(second_expanded_event) - second_expanded_with_root_fields["secondRootField"] = "secondRootField" - - fixtures = [ - f"""{{"firstRootField": "firstRootField", "secondRootField":"secondRootField", - "aField": [{first_expanded_event},{{}},{second_expanded_event}]}}""" - ] - - sqs_queue_name = _time_based_id(suffix="source-sqs") - - sqs_queue = _sqs_create_queue(self.sqs_client, sqs_queue_name, self.localstack.get_url()) - - sqs_queue_arn = sqs_queue["QueueArn"] - sqs_queue_url = sqs_queue["QueueUrl"] - sqs_queue_url_path = sqs_queue["QueueUrlPath"] - - _sqs_send_messages(self.sqs_client, sqs_queue_url, "".join(fixtures)) - - config_yaml: str = f""" - inputs: - - type: "sqs" - id: "{sqs_queue_arn}" - expand_event_list_from_field: aField - root_fields_to_add_to_expanded_event: ["secondRootField"] - tags: {self.default_tags} - outputs: - - type: "logstash" - args: - logstash_url: "{self.logstash.get_url()}" - ssl_assert_fingerprint: {self.logstash.ssl_assert_fingerprint} - username: "{self.logstash.logstash_user}" - password: "{self.logstash.logstash_password}" - """ - - config_file_path = "config.yaml" - config_bucket_name = _time_based_id(suffix="config-bucket") - _s3_upload_content_to_bucket( - client=self.s3_client, - content=config_yaml.encode("utf-8"), - content_type="text/plain", - bucket_name=config_bucket_name, - key=config_file_path, - ) - - os.environ["S3_CONFIG_FILE"] = f"s3://{config_bucket_name}/{config_file_path}" - - events_sqs, _ = _sqs_get_messages(self.sqs_client, sqs_queue_url, sqs_queue_arn) - - message_id = events_sqs["Records"][0]["messageId"] - - ctx = ContextMock(remaining_time_in_millis=_OVER_COMPLETION_GRACE_PERIOD_2m) - - first_call = handler(events_sqs, ctx) # type:ignore - - assert first_call == "completed" - - logstash_message = self.logstash.get_messages(expected=2) - assert len(logstash_message) == 2 - - assert logstash_message[0]["message"] == json_dumper(first_expanded_with_root_fields) - assert logstash_message[0]["log"]["offset"] == 0 - assert logstash_message[0]["log"]["file"]["path"] == sqs_queue_url_path - assert logstash_message[0]["aws"]["sqs"]["name"] == sqs_queue_name - assert logstash_message[0]["aws"]["sqs"]["message_id"] == message_id - assert logstash_message[0]["cloud"]["provider"] == "aws" - assert logstash_message[0]["cloud"]["region"] == "us-east-1" - assert logstash_message[0]["cloud"]["account"]["id"] == "000000000000" - assert logstash_message[0]["tags"] == ["forwarded", "tag1", "tag2", "tag3"] - - assert logstash_message[1]["message"] == json_dumper(second_expanded_with_root_fields) - assert logstash_message[1]["log"]["offset"] == 180 - assert logstash_message[1]["log"]["file"]["path"] == sqs_queue_url_path - assert logstash_message[1]["aws"]["sqs"]["name"] == sqs_queue_name - assert logstash_message[1]["aws"]["sqs"]["message_id"] == message_id - assert logstash_message[1]["cloud"]["provider"] == "aws" - assert logstash_message[1]["cloud"]["region"] == "us-east-1" - assert logstash_message[1]["cloud"]["account"]["id"] == "000000000000" - assert logstash_message[1]["tags"] == ["forwarded", "tag1", "tag2", "tag3"] - - def test_root_fields_to_add_to_expanded_event_list(self) -> None: - assert isinstance(self.logstash, LogstashContainer) - assert isinstance(self.localstack, LocalStackContainer) - - first_expanded_event: str = _load_file_fixture("cloudwatch-log-1.json") - first_expanded_with_root_fields: dict[str, Any] = json_parser(first_expanded_event) - first_expanded_with_root_fields["secondRootField"] = "secondRootField" - - second_expanded_event: str = _load_file_fixture("cloudwatch-log-3.json") - second_expanded_with_root_fields: dict[str, Any] = json_parser(second_expanded_event) - second_expanded_with_root_fields["secondRootField"] = "secondRootField" - - third_expanded_event: str = _load_file_fixture("cloudwatch-log-3.json") - third_expanded_event_with_root_fields: dict[str, Any] = json_parser(third_expanded_event) - third_expanded_event_with_root_fields["secondRootField"] = "secondRootField" - - fixtures = [ - f"""{{"firstRootField": "firstRootField", "secondRootField":"secondRootField", - "aField": [{first_expanded_event},{second_expanded_event},{third_expanded_event}]}}""" - ] - - sqs_queue_name = _time_based_id(suffix="source-sqs") - - sqs_queue = _sqs_create_queue(self.sqs_client, sqs_queue_name, self.localstack.get_url()) - - sqs_queue_arn = sqs_queue["QueueArn"] - sqs_queue_url = sqs_queue["QueueUrl"] - sqs_queue_url_path = sqs_queue["QueueUrlPath"] - - _sqs_send_messages(self.sqs_client, sqs_queue_url, "".join(fixtures)) - - config_yaml: str = f""" - inputs: - - type: "sqs" - id: "{sqs_queue_arn}" - expand_event_list_from_field: aField - root_fields_to_add_to_expanded_event: ["secondRootField"] - tags: {self.default_tags} - outputs: - - type: "logstash" - args: - logstash_url: "{self.logstash.get_url()}" - ssl_assert_fingerprint: {self.logstash.ssl_assert_fingerprint} - username: "{self.logstash.logstash_user}" - password: "{self.logstash.logstash_password}" - """ - - config_file_path = "config.yaml" - config_bucket_name = _time_based_id(suffix="config-bucket") - _s3_upload_content_to_bucket( - client=self.s3_client, - content=config_yaml.encode("utf-8"), - content_type="text/plain", - bucket_name=config_bucket_name, - key=config_file_path, - ) - - os.environ["S3_CONFIG_FILE"] = f"s3://{config_bucket_name}/{config_file_path}" - - events_sqs, _ = _sqs_get_messages(self.sqs_client, sqs_queue_url, sqs_queue_arn) - - message_id = events_sqs["Records"][0]["messageId"] - - ctx = ContextMock() - first_call = handler(events_sqs, ctx) # type:ignore - - assert first_call == "continuing" - - logstash_message = self.logstash.get_messages(expected=1) - assert len(logstash_message) == 1 - - assert logstash_message[0]["message"] == json_dumper(first_expanded_with_root_fields) - assert logstash_message[0]["log"]["offset"] == 0 - assert logstash_message[0]["log"]["file"]["path"] == sqs_queue_url_path - assert logstash_message[0]["aws"]["sqs"]["name"] == sqs_queue_name - assert logstash_message[0]["aws"]["sqs"]["message_id"] == message_id - assert logstash_message[0]["cloud"]["provider"] == "aws" - assert logstash_message[0]["cloud"]["region"] == "us-east-1" - assert logstash_message[0]["cloud"]["account"]["id"] == "000000000000" - assert logstash_message[0]["tags"] == ["forwarded", "tag1", "tag2", "tag3"] - - ctx = ContextMock(remaining_time_in_millis=_OVER_COMPLETION_GRACE_PERIOD_2m) - - continued_events, _ = _sqs_get_messages( - self.sqs_client, os.environ["SQS_CONTINUE_URL"], self.sqs_continue_queue_arn - ) - second_call = handler(continued_events, ctx) # type:ignore - - assert second_call == "completed" - - logstash_message = self.logstash.get_messages(expected=3) - assert len(logstash_message) == 3 - - assert logstash_message[1]["message"] == json_dumper(second_expanded_with_root_fields) - assert logstash_message[1]["log"]["offset"] == 114 - assert logstash_message[1]["log"]["file"]["path"] == sqs_queue_url_path - assert logstash_message[1]["aws"]["sqs"]["name"] == sqs_queue_name - assert logstash_message[1]["aws"]["sqs"]["message_id"] == message_id - assert logstash_message[1]["cloud"]["provider"] == "aws" - assert logstash_message[1]["cloud"]["region"] == "us-east-1" - assert logstash_message[1]["cloud"]["account"]["id"] == "000000000000" - assert logstash_message[1]["tags"] == ["forwarded", "tag1", "tag2", "tag3"] - - assert logstash_message[2]["message"] == json_dumper(third_expanded_event_with_root_fields) - assert logstash_message[2]["log"]["offset"] == 228 - assert logstash_message[2]["log"]["file"]["path"] == sqs_queue_url_path - assert logstash_message[2]["aws"]["sqs"]["name"] == sqs_queue_name - assert logstash_message[2]["aws"]["sqs"]["message_id"] == message_id - assert logstash_message[2]["cloud"]["provider"] == "aws" - assert logstash_message[2]["cloud"]["region"] == "us-east-1" - assert logstash_message[2]["cloud"]["account"]["id"] == "000000000000" - assert logstash_message[2]["tags"] == ["forwarded", "tag1", "tag2", "tag3"] - - def test_root_fields_to_add_to_expanded_event_list_no_fields_in_root(self) -> None: - assert isinstance(self.logstash, LogstashContainer) - assert isinstance(self.localstack, LocalStackContainer) - - first_expanded_event: str = _load_file_fixture("cloudwatch-log-1.json") - first_expanded_with_root_fields: dict[str, Any] = json_parser(first_expanded_event) - first_expanded_with_root_fields["secondRootField"] = "secondRootField" - - second_expanded_event: str = _load_file_fixture("cloudwatch-log-3.json") - second_expanded_with_root_fields: dict[str, Any] = json_parser(second_expanded_event) - second_expanded_with_root_fields["secondRootField"] = "secondRootField" - - third_expanded_event: str = _load_file_fixture("cloudwatch-log-3.json") - third_expanded_event_with_root_fields: dict[str, Any] = json_parser(third_expanded_event) - third_expanded_event_with_root_fields["secondRootField"] = "secondRootField" - - fixtures = [ - f"""{{"firstRootField": "firstRootField", "secondRootField":"secondRootField", - "aField": [{first_expanded_event},{second_expanded_event},{third_expanded_event}]}}""" - ] - - sqs_queue_name = _time_based_id(suffix="source-sqs") - - sqs_queue = _sqs_create_queue(self.sqs_client, sqs_queue_name, self.localstack.get_url()) - - sqs_queue_arn = sqs_queue["QueueArn"] - sqs_queue_url = sqs_queue["QueueUrl"] - sqs_queue_url_path = sqs_queue["QueueUrlPath"] - - _sqs_send_messages(self.sqs_client, sqs_queue_url, "".join(fixtures)) - - config_yaml: str = f""" - inputs: - - type: "sqs" - id: "{sqs_queue_arn}" - expand_event_list_from_field: aField - root_fields_to_add_to_expanded_event: ["secondRootField", "thirdRootField"] - tags: {self.default_tags} - outputs: - - type: "logstash" - args: - logstash_url: "{self.logstash.get_url()}" - ssl_assert_fingerprint: {self.logstash.ssl_assert_fingerprint} - username: "{self.logstash.logstash_user}" - password: "{self.logstash.logstash_password}" - """ - - config_file_path = "config.yaml" - config_bucket_name = _time_based_id(suffix="config-bucket") - _s3_upload_content_to_bucket( - client=self.s3_client, - content=config_yaml.encode("utf-8"), - content_type="text/plain", - bucket_name=config_bucket_name, - key=config_file_path, - ) - - os.environ["S3_CONFIG_FILE"] = f"s3://{config_bucket_name}/{config_file_path}" - - events_sqs, _ = _sqs_get_messages(self.sqs_client, sqs_queue_url, sqs_queue_arn) - - message_id = events_sqs["Records"][0]["messageId"] - - ctx = ContextMock() - first_call = handler(events_sqs, ctx) # type:ignore - - assert first_call == "continuing" - - logstash_message = self.logstash.get_messages(expected=1) - assert len(logstash_message) == 1 - - assert logstash_message[0]["message"] == json_dumper(first_expanded_with_root_fields) - assert logstash_message[0]["log"]["offset"] == 0 - assert logstash_message[0]["log"]["file"]["path"] == sqs_queue_url_path - assert logstash_message[0]["aws"]["sqs"]["name"] == sqs_queue_name - assert logstash_message[0]["aws"]["sqs"]["message_id"] == message_id - assert logstash_message[0]["cloud"]["provider"] == "aws" - assert logstash_message[0]["cloud"]["region"] == "us-east-1" - assert logstash_message[0]["cloud"]["account"]["id"] == "000000000000" - assert logstash_message[0]["tags"] == ["forwarded", "tag1", "tag2", "tag3"] - - ctx = ContextMock(remaining_time_in_millis=_OVER_COMPLETION_GRACE_PERIOD_2m) - - continued_events, _ = _sqs_get_messages( - self.sqs_client, os.environ["SQS_CONTINUE_URL"], self.sqs_continue_queue_arn - ) - second_call = handler(continued_events, ctx) # type:ignore - - assert second_call == "completed" - - logstash_message = self.logstash.get_messages(expected=3) - assert len(logstash_message) == 3 - - assert logstash_message[1]["message"] == json_dumper(second_expanded_with_root_fields) - assert logstash_message[1]["log"]["offset"] == 114 - assert logstash_message[1]["log"]["file"]["path"] == sqs_queue_url_path - assert logstash_message[1]["aws"]["sqs"]["name"] == sqs_queue_name - assert logstash_message[1]["aws"]["sqs"]["message_id"] == message_id - assert logstash_message[1]["cloud"]["provider"] == "aws" - assert logstash_message[1]["cloud"]["region"] == "us-east-1" - assert logstash_message[1]["cloud"]["account"]["id"] == "000000000000" - assert logstash_message[1]["tags"] == ["forwarded", "tag1", "tag2", "tag3"] - - assert logstash_message[2]["message"] == json_dumper(third_expanded_event_with_root_fields) - assert logstash_message[2]["log"]["offset"] == 228 - assert logstash_message[2]["log"]["file"]["path"] == sqs_queue_url_path - assert logstash_message[2]["aws"]["sqs"]["name"] == sqs_queue_name - assert logstash_message[2]["aws"]["sqs"]["message_id"] == message_id - assert logstash_message[2]["cloud"]["provider"] == "aws" - assert logstash_message[2]["cloud"]["region"] == "us-east-1" - assert logstash_message[2]["cloud"]["account"]["id"] == "000000000000" - assert logstash_message[2]["tags"] == ["forwarded", "tag1", "tag2", "tag3"] - - def test_root_fields_to_add_to_expanded_event_all(self) -> None: - assert isinstance(self.logstash, LogstashContainer) - assert isinstance(self.localstack, LocalStackContainer) - - first_expanded_event: str = _load_file_fixture("cloudwatch-log-1.json") - first_expanded_with_root_fields: dict[str, Any] = json_parser(first_expanded_event) - first_expanded_with_root_fields["firstRootField"] = "firstRootField" - first_expanded_with_root_fields["secondRootField"] = "secondRootField" - - second_expanded_event: str = _load_file_fixture("cloudwatch-log-3.json") - second_expanded_with_root_fields: dict[str, Any] = json_parser(second_expanded_event) - second_expanded_with_root_fields["firstRootField"] = "firstRootField" - second_expanded_with_root_fields["secondRootField"] = "secondRootField" - - third_expanded_event: str = _load_file_fixture("cloudwatch-log-3.json") - third_expanded_event_with_root_fields: dict[str, Any] = json_parser(third_expanded_event) - third_expanded_event_with_root_fields["firstRootField"] = "firstRootField" - third_expanded_event_with_root_fields["secondRootField"] = "secondRootField" - - fixtures = [ - f"""{{"firstRootField": "firstRootField", "secondRootField":"secondRootField", - "aField": [{first_expanded_event},{second_expanded_event},{third_expanded_event}]}}""" - ] - - sqs_queue_name = _time_based_id(suffix="source-sqs") - - sqs_queue = _sqs_create_queue(self.sqs_client, sqs_queue_name, self.localstack.get_url()) - - sqs_queue_arn = sqs_queue["QueueArn"] - sqs_queue_url = sqs_queue["QueueUrl"] - sqs_queue_url_path = sqs_queue["QueueUrlPath"] - - _sqs_send_messages(self.sqs_client, sqs_queue_url, "".join(fixtures)) - - config_yaml: str = f""" - inputs: - - type: "sqs" - id: "{sqs_queue_arn}" - expand_event_list_from_field: aField - root_fields_to_add_to_expanded_event: all - tags: {self.default_tags} - outputs: - - type: "logstash" - args: - logstash_url: "{self.logstash.get_url()}" - ssl_assert_fingerprint: {self.logstash.ssl_assert_fingerprint} - username: "{self.logstash.logstash_user}" - password: "{self.logstash.logstash_password}" - """ - - config_file_path = "config.yaml" - config_bucket_name = _time_based_id(suffix="config-bucket") - _s3_upload_content_to_bucket( - client=self.s3_client, - content=config_yaml.encode("utf-8"), - content_type="text/plain", - bucket_name=config_bucket_name, - key=config_file_path, - ) - - os.environ["S3_CONFIG_FILE"] = f"s3://{config_bucket_name}/{config_file_path}" - - events_sqs, _ = _sqs_get_messages(self.sqs_client, sqs_queue_url, sqs_queue_arn) - - message_id = events_sqs["Records"][0]["messageId"] - - ctx = ContextMock() - first_call = handler(events_sqs, ctx) # type:ignore - - assert first_call == "continuing" - - logstash_message = self.logstash.get_messages(expected=1) - assert len(logstash_message) == 1 - - assert logstash_message[0]["message"] == json_dumper(first_expanded_with_root_fields) - assert logstash_message[0]["log"]["offset"] == 0 - assert logstash_message[0]["log"]["file"]["path"] == sqs_queue_url_path - assert logstash_message[0]["aws"]["sqs"]["name"] == sqs_queue_name - assert logstash_message[0]["aws"]["sqs"]["message_id"] == message_id - assert logstash_message[0]["cloud"]["provider"] == "aws" - assert logstash_message[0]["cloud"]["region"] == "us-east-1" - assert logstash_message[0]["cloud"]["account"]["id"] == "000000000000" - assert logstash_message[0]["tags"] == ["forwarded", "tag1", "tag2", "tag3"] - - ctx = ContextMock(remaining_time_in_millis=_OVER_COMPLETION_GRACE_PERIOD_2m) - - continued_events, _ = _sqs_get_messages( - self.sqs_client, os.environ["SQS_CONTINUE_URL"], self.sqs_continue_queue_arn - ) - second_call = handler(continued_events, ctx) # type:ignore - - assert second_call == "completed" - - logstash_message = self.logstash.get_messages(expected=3) - assert len(logstash_message) == 3 - - assert logstash_message[1]["message"] == json_dumper(second_expanded_with_root_fields) - assert logstash_message[1]["log"]["offset"] == 114 - assert logstash_message[1]["log"]["file"]["path"] == sqs_queue_url_path - assert logstash_message[1]["aws"]["sqs"]["name"] == sqs_queue_name - assert logstash_message[1]["aws"]["sqs"]["message_id"] == message_id - assert logstash_message[1]["cloud"]["provider"] == "aws" - assert logstash_message[1]["cloud"]["region"] == "us-east-1" - assert logstash_message[1]["cloud"]["account"]["id"] == "000000000000" - assert logstash_message[1]["tags"] == ["forwarded", "tag1", "tag2", "tag3"] - - assert logstash_message[2]["message"] == json_dumper(third_expanded_event_with_root_fields) - assert logstash_message[2]["log"]["offset"] == 228 - assert logstash_message[2]["log"]["file"]["path"] == sqs_queue_url_path - assert logstash_message[2]["aws"]["sqs"]["name"] == sqs_queue_name - assert logstash_message[2]["aws"]["sqs"]["message_id"] == message_id - assert logstash_message[2]["cloud"]["provider"] == "aws" - assert logstash_message[2]["cloud"]["region"] == "us-east-1" - assert logstash_message[2]["cloud"]["account"]["id"] == "000000000000" - assert logstash_message[2]["tags"] == ["forwarded", "tag1", "tag2", "tag3"] - - def test_root_fields_to_add_to_expanded_event_all_no_fields_in_root(self) -> None: - assert isinstance(self.logstash, LogstashContainer) - assert isinstance(self.localstack, LocalStackContainer) - - first_expanded_event: str = _load_file_fixture("cloudwatch-log-1.json") - first_expanded_with_root_fields: dict[str, Any] = json_parser(first_expanded_event) - - second_expanded_event: str = _load_file_fixture("cloudwatch-log-3.json") - second_expanded_with_root_fields: dict[str, Any] = json_parser(second_expanded_event) - - third_expanded_event: str = _load_file_fixture("cloudwatch-log-3.json") - third_expanded_event_with_root_fields: dict[str, Any] = json_parser(third_expanded_event) - - fixtures = [f"""{{"aField": [{first_expanded_event},{second_expanded_event},{third_expanded_event}]}}"""] - - sqs_queue_name = _time_based_id(suffix="source-sqs") - - sqs_queue = _sqs_create_queue(self.sqs_client, sqs_queue_name, self.localstack.get_url()) - - sqs_queue_arn = sqs_queue["QueueArn"] - sqs_queue_url = sqs_queue["QueueUrl"] - sqs_queue_url_path = sqs_queue["QueueUrlPath"] - - _sqs_send_messages(self.sqs_client, sqs_queue_url, "".join(fixtures)) - - config_yaml: str = f""" - inputs: - - type: "sqs" - id: "{sqs_queue_arn}" - expand_event_list_from_field: aField - root_fields_to_add_to_expanded_event: all - tags: {self.default_tags} - outputs: - - type: "logstash" - args: - logstash_url: "{self.logstash.get_url()}" - ssl_assert_fingerprint: {self.logstash.ssl_assert_fingerprint} - username: "{self.logstash.logstash_user}" - password: "{self.logstash.logstash_password}" - """ - - config_file_path = "config.yaml" - config_bucket_name = _time_based_id(suffix="config-bucket") - _s3_upload_content_to_bucket( - client=self.s3_client, - content=config_yaml.encode("utf-8"), - content_type="text/plain", - bucket_name=config_bucket_name, - key=config_file_path, - ) - - os.environ["S3_CONFIG_FILE"] = f"s3://{config_bucket_name}/{config_file_path}" - - events_sqs, _ = _sqs_get_messages(self.sqs_client, sqs_queue_url, sqs_queue_arn) - - message_id = events_sqs["Records"][0]["messageId"] - - ctx = ContextMock() - first_call = handler(events_sqs, ctx) # type:ignore - - assert first_call == "continuing" - - logstash_message = self.logstash.get_messages(expected=1) - assert len(logstash_message) == 1 - - assert logstash_message[0]["message"] == json_dumper(first_expanded_with_root_fields) - assert logstash_message[0]["log"]["offset"] == 0 - assert logstash_message[0]["log"]["file"]["path"] == sqs_queue_url_path - assert logstash_message[0]["aws"]["sqs"]["name"] == sqs_queue_name - assert logstash_message[0]["aws"]["sqs"]["message_id"] == message_id - assert logstash_message[0]["cloud"]["provider"] == "aws" - assert logstash_message[0]["cloud"]["region"] == "us-east-1" - assert logstash_message[0]["cloud"]["account"]["id"] == "000000000000" - assert logstash_message[0]["tags"] == ["forwarded", "tag1", "tag2", "tag3"] - - ctx = ContextMock(remaining_time_in_millis=_OVER_COMPLETION_GRACE_PERIOD_2m) - - continued_events, _ = _sqs_get_messages( - self.sqs_client, os.environ["SQS_CONTINUE_URL"], self.sqs_continue_queue_arn - ) - second_call = handler(continued_events, ctx) # type:ignore - - assert second_call == "completed" - - logstash_message = self.logstash.get_messages(expected=3) - assert len(logstash_message) == 3 - - assert logstash_message[1]["message"] == json_dumper(second_expanded_with_root_fields) - assert logstash_message[1]["log"]["offset"] == 86 - assert logstash_message[1]["log"]["file"]["path"] == sqs_queue_url_path - assert logstash_message[1]["aws"]["sqs"]["name"] == sqs_queue_name - assert logstash_message[1]["aws"]["sqs"]["message_id"] == message_id - assert logstash_message[1]["cloud"]["provider"] == "aws" - assert logstash_message[1]["cloud"]["region"] == "us-east-1" - assert logstash_message[1]["cloud"]["account"]["id"] == "000000000000" - assert logstash_message[1]["tags"] == ["forwarded", "tag1", "tag2", "tag3"] - - assert logstash_message[2]["message"] == json_dumper(third_expanded_event_with_root_fields) - assert logstash_message[2]["log"]["offset"] == 172 - assert logstash_message[2]["log"]["file"]["path"] == sqs_queue_url_path - assert logstash_message[2]["aws"]["sqs"]["name"] == sqs_queue_name - assert logstash_message[2]["aws"]["sqs"]["message_id"] == message_id - assert logstash_message[2]["cloud"]["provider"] == "aws" - assert logstash_message[2]["cloud"]["region"] == "us-east-1" - assert logstash_message[2]["cloud"]["account"]["id"] == "000000000000" - assert logstash_message[2]["tags"] == ["forwarded", "tag1", "tag2", "tag3"] - - def test_cloudwatch_logs_stream_as_input_instead_of_group(self) -> None: - assert isinstance(self.logstash, LogstashContainer) - assert isinstance(self.localstack, LocalStackContainer) - - fixtures = [ - _load_file_fixture("cloudwatch-log-1.json"), - _load_file_fixture("cloudwatch-log-2.json"), - _load_file_fixture("cloudwatch-log-3.json"), - ] - - cloudwatch_group_name = _time_based_id(suffix="source-group") - cloudwatch_group = _logs_create_cloudwatch_logs_group(self.logs_client, group_name=cloudwatch_group_name) - - cloudwatch_stream_name = _time_based_id(suffix="source-stream") - _logs_create_cloudwatch_logs_stream( - self.logs_client, group_name=cloudwatch_group_name, stream_name=cloudwatch_stream_name - ) - - cloudwatch_stream_name_different = _time_based_id(suffix="source-stream-different") - _logs_create_cloudwatch_logs_stream( - self.logs_client, group_name=cloudwatch_group_name, stream_name=cloudwatch_stream_name_different - ) - - _logs_upload_event_to_cloudwatch_logs( - self.logs_client, - group_name=cloudwatch_group_name, - stream_name=cloudwatch_stream_name, - messages_body=[fixtures[0], fixtures[2]], - ) - - _logs_upload_event_to_cloudwatch_logs( - self.logs_client, - group_name=cloudwatch_group_name, - stream_name=cloudwatch_stream_name_different, - messages_body=[fixtures[1]], - ) - - cloudwatch_group_arn = cloudwatch_group["arn"][0:-2] - - cloudwatch_group_name = cloudwatch_group_name - cloudwatch_stream_name = cloudwatch_stream_name - - config_yaml: str = f""" - inputs: - - type: "cloudwatch-logs" - id: "{cloudwatch_group_arn}:log-stream:{cloudwatch_stream_name}" - tags: {self.default_tags} - outputs: - - type: "logstash" - args: - logstash_url: "{self.logstash.get_url()}" - ssl_assert_fingerprint: {self.logstash.ssl_assert_fingerprint} - username: "{self.logstash.logstash_user}" - password: "{self.logstash.logstash_password}" - """ - - config_file_path = "config.yaml" - config_bucket_name = _time_based_id(suffix="config-bucket") - _s3_upload_content_to_bucket( - client=self.s3_client, - content=config_yaml.encode("utf-8"), - content_type="text/plain", - bucket_name=config_bucket_name, - key=config_file_path, - ) - - os.environ["S3_CONFIG_FILE"] = f"s3://{config_bucket_name}/{config_file_path}" - - events_cloudwatch_logs, event_ids_cloudwatch_logs, _ = _logs_retrieve_event_from_cloudwatch_logs( - self.logs_client, cloudwatch_group_name, cloudwatch_stream_name - ) - - events_cloudwatch_logs_different, _, _ = _logs_retrieve_event_from_cloudwatch_logs( - self.logs_client, cloudwatch_group_name, cloudwatch_stream_name_different - ) - - ctx = ContextMock(remaining_time_in_millis=_OVER_COMPLETION_GRACE_PERIOD_2m) - first_call = handler(events_cloudwatch_logs, ctx) # type:ignore - - assert first_call == "completed" - - second_call = handler(events_cloudwatch_logs_different, ctx) # type:ignore - - assert second_call == "completed" - - logstash_message = self.logstash.get_messages(expected=2) - assert len(logstash_message) == 2 - - assert logstash_message[0]["message"] == fixtures[0].rstrip("\n") - assert logstash_message[0]["log"]["offset"] == 0 - assert logstash_message[0]["log"]["file"]["path"] == f"{cloudwatch_group_name}/{cloudwatch_stream_name}" - assert logstash_message[0]["aws"]["cloudwatch"]["log_group"] == cloudwatch_group_name - assert logstash_message[0]["aws"]["cloudwatch"]["log_stream"] == cloudwatch_stream_name - assert logstash_message[0]["aws"]["cloudwatch"]["event_id"] == event_ids_cloudwatch_logs[0] - assert logstash_message[0]["cloud"]["provider"] == "aws" - assert logstash_message[0]["cloud"]["region"] == "us-east-1" - assert logstash_message[0]["cloud"]["account"]["id"] == "000000000000" - assert logstash_message[0]["tags"] == ["forwarded", "tag1", "tag2", "tag3"] - - assert logstash_message[1]["message"] == fixtures[2].rstrip("\n") - assert logstash_message[1]["log"]["offset"] == 0 - assert logstash_message[1]["log"]["file"]["path"] == f"{cloudwatch_group_name}/{cloudwatch_stream_name}" - assert logstash_message[1]["aws"]["cloudwatch"]["log_group"] == cloudwatch_group_name - assert logstash_message[1]["aws"]["cloudwatch"]["log_stream"] == cloudwatch_stream_name - assert logstash_message[1]["aws"]["cloudwatch"]["event_id"] == event_ids_cloudwatch_logs[1] - assert logstash_message[1]["cloud"]["provider"] == "aws" - assert logstash_message[1]["cloud"]["region"] == "us-east-1" - assert logstash_message[1]["cloud"]["account"]["id"] == "000000000000" - assert logstash_message[1]["tags"] == ["forwarded", "tag1", "tag2", "tag3"] - - def test_cloudwatch_logs_last_ending_offset_reset(self) -> None: - assert isinstance(self.logstash, LogstashContainer) - assert isinstance(self.localstack, LocalStackContainer) - - fixtures = [ - _load_file_fixture("cloudwatch-log-1.json"), - _load_file_fixture("cloudwatch-log-2.json"), - _load_file_fixture("cloudwatch-log-3.json"), - ] - - cloudwatch_group_name = _time_based_id(suffix="source-group") - cloudwatch_group = _logs_create_cloudwatch_logs_group(self.logs_client, group_name=cloudwatch_group_name) - - cloudwatch_stream_name = _time_based_id(suffix="source-stream") - _logs_create_cloudwatch_logs_stream( - self.logs_client, group_name=cloudwatch_group_name, stream_name=cloudwatch_stream_name - ) - - _logs_upload_event_to_cloudwatch_logs( - self.logs_client, - group_name=cloudwatch_group_name, - stream_name=cloudwatch_stream_name, - messages_body=fixtures, - ) - - cloudwatch_group_arn = cloudwatch_group["arn"] - - cloudwatch_group_name = cloudwatch_group_name - cloudwatch_stream_name = cloudwatch_stream_name - - config_yaml: str = f""" - inputs: - - type: "cloudwatch-logs" - id: "{cloudwatch_group_arn}" - tags: {self.default_tags} - outputs: - - type: "logstash" - args: - logstash_url: "{self.logstash.get_url()}" - ssl_assert_fingerprint: {self.logstash.ssl_assert_fingerprint} - username: "{self.logstash.logstash_user}" - password: "{self.logstash.logstash_password}" - """ - - config_file_path = "config.yaml" - config_bucket_name = _time_based_id(suffix="config-bucket") - _s3_upload_content_to_bucket( - client=self.s3_client, - content=config_yaml.encode("utf-8"), - content_type="text/plain", - bucket_name=config_bucket_name, - key=config_file_path, - ) - - os.environ["S3_CONFIG_FILE"] = f"s3://{config_bucket_name}/{config_file_path}" - - events_cloudwatch_logs, event_ids_cloudwatch_logs, _ = _logs_retrieve_event_from_cloudwatch_logs( - self.logs_client, cloudwatch_group_name, cloudwatch_stream_name - ) - - ctx = ContextMock() - first_call = handler(events_cloudwatch_logs, ctx) # type:ignore - - assert first_call == "continuing" - - logstash_message = self.logstash.get_messages(expected=1) - assert len(logstash_message) == 1 - - assert logstash_message[0]["message"] == fixtures[0].rstrip("\n") - assert logstash_message[0]["log"]["offset"] == 0 - assert logstash_message[0]["log"]["file"]["path"] == f"{cloudwatch_group_name}/{cloudwatch_stream_name}" - assert logstash_message[0]["aws"]["cloudwatch"]["log_group"] == cloudwatch_group_name - assert logstash_message[0]["aws"]["cloudwatch"]["log_stream"] == cloudwatch_stream_name - assert logstash_message[0]["aws"]["cloudwatch"]["event_id"] == event_ids_cloudwatch_logs[0] - assert logstash_message[0]["cloud"]["provider"] == "aws" - assert logstash_message[0]["cloud"]["region"] == "us-east-1" - assert logstash_message[0]["cloud"]["account"]["id"] == "000000000000" - assert logstash_message[0]["tags"] == ["forwarded", "tag1", "tag2", "tag3"] - - ctx = ContextMock(remaining_time_in_millis=_OVER_COMPLETION_GRACE_PERIOD_2m) - - continued_events, _ = _sqs_get_messages( - self.sqs_client, os.environ["SQS_CONTINUE_URL"], self.sqs_continue_queue_arn - ) - second_call = handler(continued_events, ctx) # type:ignore - - assert second_call == "completed" - - logstash_message = self.logstash.get_messages(expected=3) - assert len(logstash_message) == 3 - - assert logstash_message[1]["message"] == fixtures[1].rstrip("\n") - assert logstash_message[1]["log"]["offset"] == 0 - assert logstash_message[1]["log"]["file"]["path"] == f"{cloudwatch_group_name}/{cloudwatch_stream_name}" - assert logstash_message[1]["aws"]["cloudwatch"]["log_group"] == cloudwatch_group_name - assert logstash_message[1]["aws"]["cloudwatch"]["log_stream"] == cloudwatch_stream_name - assert logstash_message[1]["aws"]["cloudwatch"]["event_id"] == event_ids_cloudwatch_logs[1] - assert logstash_message[1]["cloud"]["provider"] == "aws" - assert logstash_message[1]["cloud"]["region"] == "us-east-1" - assert logstash_message[1]["cloud"]["account"]["id"] == "000000000000" - assert logstash_message[1]["tags"] == ["forwarded", "tag1", "tag2", "tag3"] - - assert logstash_message[2]["message"] == fixtures[2].rstrip("\n") - assert logstash_message[2]["log"]["offset"] == 0 - assert logstash_message[2]["log"]["file"]["path"] == f"{cloudwatch_group_name}/{cloudwatch_stream_name}" - assert logstash_message[2]["aws"]["cloudwatch"]["log_group"] == cloudwatch_group_name - assert logstash_message[2]["aws"]["cloudwatch"]["log_stream"] == cloudwatch_stream_name - assert logstash_message[2]["aws"]["cloudwatch"]["event_id"] == event_ids_cloudwatch_logs[2] - assert logstash_message[2]["cloud"]["provider"] == "aws" - assert logstash_message[2]["cloud"]["region"] == "us-east-1" - assert logstash_message[2]["cloud"]["account"]["id"] == "000000000000" - assert logstash_message[2]["tags"] == ["forwarded", "tag1", "tag2", "tag3"] - - def test_cloudwatch_logs_last_event_expanded_offset_continue(self) -> None: - assert isinstance(self.logstash, LogstashContainer) - assert isinstance(self.localstack, LocalStackContainer) - - first_expanded_event: str = _load_file_fixture("cloudwatch-log-1.json") - second_expanded_event: str = _load_file_fixture("cloudwatch-log-3.json") - third_expanded_event: str = _load_file_fixture("cloudwatch-log-3.json") - - fixtures = [f"""{{"aField": [{first_expanded_event},{second_expanded_event},{third_expanded_event}]}}"""] - - cloudwatch_group_name = _time_based_id(suffix="source-group") - cloudwatch_group = _logs_create_cloudwatch_logs_group(self.logs_client, group_name=cloudwatch_group_name) - - cloudwatch_stream_name = _time_based_id(suffix="source-stream") - _logs_create_cloudwatch_logs_stream( - self.logs_client, group_name=cloudwatch_group_name, stream_name=cloudwatch_stream_name - ) - - _logs_upload_event_to_cloudwatch_logs( - self.logs_client, - group_name=cloudwatch_group_name, - stream_name=cloudwatch_stream_name, - messages_body=fixtures, - ) - - cloudwatch_group_arn = cloudwatch_group["arn"] - - cloudwatch_group_name = cloudwatch_group_name - cloudwatch_stream_name = cloudwatch_stream_name - - config_yaml: str = f""" - inputs: - - type: "cloudwatch-logs" - id: "{cloudwatch_group_arn}" - expand_event_list_from_field: aField - tags: {self.default_tags} - outputs: - - type: "logstash" - args: - logstash_url: "{self.logstash.get_url()}" - ssl_assert_fingerprint: {self.logstash.ssl_assert_fingerprint} - username: "{self.logstash.logstash_user}" - password: "{self.logstash.logstash_password}" - """ - - config_file_path = "config.yaml" - config_bucket_name = _time_based_id(suffix="config-bucket") - _s3_upload_content_to_bucket( - client=self.s3_client, - content=config_yaml.encode("utf-8"), - content_type="text/plain", - bucket_name=config_bucket_name, - key=config_file_path, - ) - - os.environ["S3_CONFIG_FILE"] = f"s3://{config_bucket_name}/{config_file_path}" - - events_cloudwatch_logs, event_ids_cloudwatch_logs, _ = _logs_retrieve_event_from_cloudwatch_logs( - self.logs_client, cloudwatch_group_name, cloudwatch_stream_name - ) - - ctx = ContextMock() - first_call = handler(events_cloudwatch_logs, ctx) # type:ignore - - assert first_call == "continuing" - - logstash_message = self.logstash.get_messages(expected=1) - assert len(logstash_message) == 1 - - assert logstash_message[0]["message"] == json_dumper(json_parser(first_expanded_event)) - assert logstash_message[0]["log"]["offset"] == 0 - assert logstash_message[0]["log"]["file"]["path"] == f"{cloudwatch_group_name}/{cloudwatch_stream_name}" - assert logstash_message[0]["aws"]["cloudwatch"]["log_group"] == cloudwatch_group_name - assert logstash_message[0]["aws"]["cloudwatch"]["log_stream"] == cloudwatch_stream_name - assert logstash_message[0]["aws"]["cloudwatch"]["event_id"] == event_ids_cloudwatch_logs[0] - assert logstash_message[0]["cloud"]["provider"] == "aws" - assert logstash_message[0]["cloud"]["region"] == "us-east-1" - assert logstash_message[0]["cloud"]["account"]["id"] == "000000000000" - assert logstash_message[0]["tags"] == ["forwarded", "tag1", "tag2", "tag3"] - - ctx = ContextMock(remaining_time_in_millis=_OVER_COMPLETION_GRACE_PERIOD_2m) - - continued_events, _ = _sqs_get_messages( - self.sqs_client, os.environ["SQS_CONTINUE_URL"], self.sqs_continue_queue_arn - ) - second_call = handler(continued_events, ctx) # type:ignore - - assert second_call == "completed" - - logstash_message = self.logstash.get_messages(expected=3) - assert len(logstash_message) == 3 - - assert logstash_message[1]["message"] == json_dumper(json_parser(second_expanded_event)) - assert logstash_message[1]["log"]["offset"] == 86 - assert logstash_message[1]["log"]["file"]["path"] == f"{cloudwatch_group_name}/{cloudwatch_stream_name}" - assert logstash_message[1]["aws"]["cloudwatch"]["log_group"] == cloudwatch_group_name - assert logstash_message[1]["aws"]["cloudwatch"]["log_stream"] == cloudwatch_stream_name - assert logstash_message[1]["aws"]["cloudwatch"]["event_id"] == event_ids_cloudwatch_logs[0] - assert logstash_message[1]["cloud"]["provider"] == "aws" - assert logstash_message[1]["cloud"]["region"] == "us-east-1" - assert logstash_message[1]["cloud"]["account"]["id"] == "000000000000" - assert logstash_message[1]["tags"] == ["forwarded", "tag1", "tag2", "tag3"] - - assert logstash_message[2]["message"] == json_dumper(json_parser(third_expanded_event)) - assert logstash_message[2]["log"]["offset"] == 172 - assert logstash_message[2]["log"]["file"]["path"] == f"{cloudwatch_group_name}/{cloudwatch_stream_name}" - assert logstash_message[2]["aws"]["cloudwatch"]["log_group"] == cloudwatch_group_name - assert logstash_message[2]["aws"]["cloudwatch"]["log_stream"] == cloudwatch_stream_name - assert logstash_message[2]["aws"]["cloudwatch"]["event_id"] == event_ids_cloudwatch_logs[0] - assert logstash_message[2]["cloud"]["provider"] == "aws" - assert logstash_message[2]["cloud"]["region"] == "us-east-1" - assert logstash_message[2]["cloud"]["account"]["id"] == "000000000000" - assert logstash_message[2]["tags"] == ["forwarded", "tag1", "tag2", "tag3"] - - def test_kinesis_data_stream_last_ending_offset_reset(self) -> None: - assert isinstance(self.logstash, LogstashContainer) - assert isinstance(self.localstack, LocalStackContainer) - - fixtures = [ - _load_file_fixture("cloudwatch-log-1.json"), - _load_file_fixture("cloudwatch-log-2.json"), - _load_file_fixture("cloudwatch-log-3.json"), - ] - - kinesis_stream_name = _time_based_id(suffix="source-kinesis") - kinesis_stream = _kinesis_create_stream(self.kinesis_client, kinesis_stream_name) - kinesis_stream_arn = kinesis_stream["StreamDescription"]["StreamARN"] - - _kinesis_put_records(self.kinesis_client, kinesis_stream_name, fixtures) - - config_yaml: str = f""" - inputs: - - type: "kinesis-data-stream" - id: "{kinesis_stream_arn}" - tags: {self.default_tags} - outputs: - - type: "logstash" - args: - logstash_url: "{self.logstash.get_url()}" - ssl_assert_fingerprint: {self.logstash.ssl_assert_fingerprint} - username: "{self.logstash.logstash_user}" - password: "{self.logstash.logstash_password}" - """ - - config_file_path = "config.yaml" - config_bucket_name = _time_based_id(suffix="config-bucket") - _s3_upload_content_to_bucket( - client=self.s3_client, - content=config_yaml.encode("utf-8"), - content_type="text/plain", - bucket_name=config_bucket_name, - key=config_file_path, - ) - - os.environ["S3_CONFIG_FILE"] = f"s3://{config_bucket_name}/{config_file_path}" - - events_kinesis, _ = _kinesis_retrieve_event_from_kinesis_stream( - self.kinesis_client, kinesis_stream_name, kinesis_stream_arn - ) - - ctx = ContextMock() - first_call = handler(events_kinesis, ctx) # type:ignore - - assert first_call == "continuing" - - logstash_message = self.logstash.get_messages(expected=1) - assert len(logstash_message) == 1 - - assert logstash_message[0]["message"] == fixtures[0].rstrip("\n") - assert logstash_message[0]["log"]["offset"] == 0 - assert logstash_message[0]["log"]["file"]["path"] == kinesis_stream_arn - assert logstash_message[0]["aws"]["kinesis"]["type"] == "stream" - assert logstash_message[0]["aws"]["kinesis"]["partition_key"] == "PartitionKey" - assert logstash_message[0]["aws"]["kinesis"]["name"] == kinesis_stream_name - assert ( - logstash_message[0]["aws"]["kinesis"]["sequence_number"] - == events_kinesis["Records"][0]["kinesis"]["sequenceNumber"] - ) - assert logstash_message[0]["cloud"]["provider"] == "aws" - assert logstash_message[0]["cloud"]["region"] == "us-east-1" - assert logstash_message[0]["cloud"]["account"]["id"] == "000000000000" - assert logstash_message[0]["tags"] == ["forwarded", "tag1", "tag2", "tag3"] - - ctx = ContextMock(remaining_time_in_millis=_OVER_COMPLETION_GRACE_PERIOD_2m) - - continued_events, _ = _sqs_get_messages( - self.sqs_client, os.environ["SQS_CONTINUE_URL"], self.sqs_continue_queue_arn - ) - second_call = handler(continued_events, ctx) # type:ignore - - assert second_call == "completed" - - logstash_message = self.logstash.get_messages(expected=3) - assert len(logstash_message) == 3 - - assert logstash_message[1]["message"] == fixtures[1].rstrip("\n") - assert logstash_message[1]["log"]["offset"] == 0 - assert logstash_message[1]["log"]["file"]["path"] == kinesis_stream_arn - assert logstash_message[1]["aws"]["kinesis"]["type"] == "stream" - assert logstash_message[1]["aws"]["kinesis"]["partition_key"] == "PartitionKey" - assert logstash_message[1]["aws"]["kinesis"]["name"] == kinesis_stream_name - assert ( - logstash_message[1]["aws"]["kinesis"]["sequence_number"] - == events_kinesis["Records"][1]["kinesis"]["sequenceNumber"] - ) - assert logstash_message[1]["cloud"]["provider"] == "aws" - assert logstash_message[1]["cloud"]["region"] == "us-east-1" - assert logstash_message[1]["cloud"]["account"]["id"] == "000000000000" - assert logstash_message[1]["tags"] == ["forwarded", "tag1", "tag2", "tag3"] - - assert logstash_message[2]["message"] == fixtures[2].rstrip("\n") - assert logstash_message[2]["log"]["offset"] == 0 - assert logstash_message[2]["log"]["file"]["path"] == kinesis_stream_arn - assert logstash_message[2]["aws"]["kinesis"]["type"] == "stream" - assert logstash_message[2]["aws"]["kinesis"]["partition_key"] == "PartitionKey" - assert logstash_message[2]["aws"]["kinesis"]["name"] == kinesis_stream_name - assert ( - logstash_message[2]["aws"]["kinesis"]["sequence_number"] - == events_kinesis["Records"][2]["kinesis"]["sequenceNumber"] - ) - assert logstash_message[2]["cloud"]["provider"] == "aws" - assert logstash_message[2]["cloud"]["region"] == "us-east-1" - assert logstash_message[2]["cloud"]["account"]["id"] == "000000000000" - assert logstash_message[2]["tags"] == ["forwarded", "tag1", "tag2", "tag3"] - - def test_kinesis_data_stream_last_event_expanded_offset_continue(self) -> None: - assert isinstance(self.logstash, LogstashContainer) - assert isinstance(self.localstack, LocalStackContainer) - - first_expanded_event: str = _load_file_fixture("cloudwatch-log-1.json") - second_expanded_event: str = _load_file_fixture("cloudwatch-log-3.json") - third_expanded_event: str = _load_file_fixture("cloudwatch-log-3.json") - - fixtures = [f"""{{"aField": [{first_expanded_event},{second_expanded_event},{third_expanded_event}]}}"""] - - kinesis_stream_name = _time_based_id(suffix="source-kinesis") - kinesis_stream = _kinesis_create_stream(self.kinesis_client, kinesis_stream_name) - kinesis_stream_arn = kinesis_stream["StreamDescription"]["StreamARN"] - - _kinesis_put_records(self.kinesis_client, kinesis_stream_name, fixtures) - - config_yaml: str = f""" - inputs: - - type: "kinesis-data-stream" - id: "{kinesis_stream_arn}" - expand_event_list_from_field: aField - tags: {self.default_tags} - outputs: - - type: "logstash" - args: - logstash_url: "{self.logstash.get_url()}" - ssl_assert_fingerprint: {self.logstash.ssl_assert_fingerprint} - username: "{self.logstash.logstash_user}" - password: "{self.logstash.logstash_password}" - """ - - config_file_path = "config.yaml" - config_bucket_name = _time_based_id(suffix="config-bucket") - _s3_upload_content_to_bucket( - client=self.s3_client, - content=config_yaml.encode("utf-8"), - content_type="text/plain", - bucket_name=config_bucket_name, - key=config_file_path, - ) - - os.environ["S3_CONFIG_FILE"] = f"s3://{config_bucket_name}/{config_file_path}" - - events_kinesis, _ = _kinesis_retrieve_event_from_kinesis_stream( - self.kinesis_client, kinesis_stream_name, kinesis_stream_arn - ) - - ctx = ContextMock() - first_call = handler(events_kinesis, ctx) # type:ignore - - assert first_call == "continuing" - - logstash_message = self.logstash.get_messages(expected=1) - assert len(logstash_message) == 1 - - assert logstash_message[0]["message"] == json_dumper(json_parser(first_expanded_event)) - assert logstash_message[0]["log"]["offset"] == 0 - assert logstash_message[0]["log"]["file"]["path"] == kinesis_stream_arn - assert logstash_message[0]["aws"]["kinesis"]["type"] == "stream" - assert logstash_message[0]["aws"]["kinesis"]["partition_key"] == "PartitionKey" - assert logstash_message[0]["aws"]["kinesis"]["name"] == kinesis_stream_name - assert ( - logstash_message[0]["aws"]["kinesis"]["sequence_number"] - == events_kinesis["Records"][0]["kinesis"]["sequenceNumber"] - ) - assert logstash_message[0]["cloud"]["provider"] == "aws" - assert logstash_message[0]["cloud"]["region"] == "us-east-1" - assert logstash_message[0]["cloud"]["account"]["id"] == "000000000000" - assert logstash_message[0]["tags"] == ["forwarded", "tag1", "tag2", "tag3"] - - ctx = ContextMock(remaining_time_in_millis=_OVER_COMPLETION_GRACE_PERIOD_2m) - - continued_events, _ = _sqs_get_messages( - self.sqs_client, os.environ["SQS_CONTINUE_URL"], self.sqs_continue_queue_arn - ) - second_call = handler(continued_events, ctx) # type:ignore - - assert second_call == "completed" - - logstash_message = self.logstash.get_messages(expected=3) - assert len(logstash_message) == 3 - - assert logstash_message[1]["message"] == json_dumper(json_parser(second_expanded_event)) - assert logstash_message[1]["log"]["offset"] == 86 - assert logstash_message[1]["log"]["file"]["path"] == kinesis_stream_arn - assert logstash_message[1]["aws"]["kinesis"]["type"] == "stream" - assert logstash_message[1]["aws"]["kinesis"]["partition_key"] == "PartitionKey" - assert logstash_message[1]["aws"]["kinesis"]["name"] == kinesis_stream_name - assert ( - logstash_message[1]["aws"]["kinesis"]["sequence_number"] - == events_kinesis["Records"][0]["kinesis"]["sequenceNumber"] - ) - assert logstash_message[1]["cloud"]["provider"] == "aws" - assert logstash_message[1]["cloud"]["region"] == "us-east-1" - assert logstash_message[1]["cloud"]["account"]["id"] == "000000000000" - assert logstash_message[1]["tags"] == ["forwarded", "tag1", "tag2", "tag3"] - - assert logstash_message[2]["message"] == json_dumper(json_parser(third_expanded_event)) - assert logstash_message[2]["log"]["offset"] == 172 - assert logstash_message[2]["log"]["file"]["path"] == kinesis_stream_arn - assert logstash_message[2]["aws"]["kinesis"]["type"] == "stream" - assert logstash_message[2]["aws"]["kinesis"]["partition_key"] == "PartitionKey" - assert logstash_message[2]["aws"]["kinesis"]["name"] == kinesis_stream_name - assert ( - logstash_message[2]["aws"]["kinesis"]["sequence_number"] - == events_kinesis["Records"][0]["kinesis"]["sequenceNumber"] - ) - assert logstash_message[2]["cloud"]["provider"] == "aws" - assert logstash_message[2]["cloud"]["region"] == "us-east-1" - assert logstash_message[2]["cloud"]["account"]["id"] == "000000000000" - assert logstash_message[2]["tags"] == ["forwarded", "tag1", "tag2", "tag3"] - - def test_sqs_last_ending_offset_reset(self) -> None: - assert isinstance(self.logstash, LogstashContainer) - assert isinstance(self.localstack, LocalStackContainer) - - fixtures = [ - _load_file_fixture("cloudwatch-log-1.json"), - _load_file_fixture("cloudwatch-log-2.json"), - _load_file_fixture("cloudwatch-log-3.json"), - ] - - sqs_queue_name = _time_based_id(suffix="source-sqs") - - sqs_queue = _sqs_create_queue(self.sqs_client, sqs_queue_name, self.localstack.get_url()) - - sqs_queue_arn = sqs_queue["QueueArn"] - sqs_queue_url = sqs_queue["QueueUrl"] - sqs_queue_url_path = sqs_queue["QueueUrlPath"] - - _sqs_send_messages(self.sqs_client, sqs_queue_url, "".join(fixtures)) - - config_yaml: str = f""" - inputs: - - type: "sqs" - id: "{sqs_queue_arn}" - tags: {self.default_tags} - outputs: - - type: "logstash" - args: - logstash_url: "{self.logstash.get_url()}" - ssl_assert_fingerprint: {self.logstash.ssl_assert_fingerprint} - username: "{self.logstash.logstash_user}" - password: "{self.logstash.logstash_password}" - """ - - config_file_path = "config.yaml" - config_bucket_name = _time_based_id(suffix="config-bucket") - _s3_upload_content_to_bucket( - client=self.s3_client, - content=config_yaml.encode("utf-8"), - content_type="text/plain", - bucket_name=config_bucket_name, - key=config_file_path, - ) - - os.environ["S3_CONFIG_FILE"] = f"s3://{config_bucket_name}/{config_file_path}" - - events_sqs, _ = _sqs_get_messages(self.sqs_client, sqs_queue_url, sqs_queue_arn) - - message_id = events_sqs["Records"][0]["messageId"] - - ctx = ContextMock() - first_call = handler(events_sqs, ctx) # type:ignore - - assert first_call == "continuing" - - logstash_message = self.logstash.get_messages(expected=1) - assert len(logstash_message) == 1 - - assert logstash_message[0]["message"] == fixtures[0].rstrip("\n") - assert logstash_message[0]["log"]["offset"] == 0 - assert logstash_message[0]["log"]["file"]["path"] == sqs_queue_url_path - assert logstash_message[0]["aws"]["sqs"]["name"] == sqs_queue_name - assert logstash_message[0]["aws"]["sqs"]["message_id"] == message_id - assert logstash_message[0]["cloud"]["provider"] == "aws" - assert logstash_message[0]["cloud"]["region"] == "us-east-1" - assert logstash_message[0]["cloud"]["account"]["id"] == "000000000000" - assert logstash_message[0]["tags"] == ["forwarded", "tag1", "tag2", "tag3"] - - ctx = ContextMock(remaining_time_in_millis=_OVER_COMPLETION_GRACE_PERIOD_2m) - - continued_events, _ = _sqs_get_messages( - self.sqs_client, os.environ["SQS_CONTINUE_URL"], self.sqs_continue_queue_arn - ) - second_call = handler(continued_events, ctx) # type:ignore - - assert second_call == "completed" - - logstash_message = self.logstash.get_messages(expected=3) - assert len(logstash_message) == 3 - - assert logstash_message[1]["message"] == fixtures[1].rstrip("\n") - assert logstash_message[1]["log"]["offset"] == 94 - assert logstash_message[1]["log"]["file"]["path"] == sqs_queue_url_path - assert logstash_message[1]["aws"]["sqs"]["name"] == sqs_queue_name - assert logstash_message[1]["aws"]["sqs"]["message_id"] == message_id - assert logstash_message[1]["cloud"]["provider"] == "aws" - assert logstash_message[1]["cloud"]["region"] == "us-east-1" - assert logstash_message[1]["cloud"]["account"]["id"] == "000000000000" - assert logstash_message[1]["tags"] == ["forwarded", "tag1", "tag2", "tag3"] - - assert logstash_message[2]["message"] == fixtures[2].rstrip("\n") - assert logstash_message[2]["log"]["offset"] == 332 - assert logstash_message[2]["log"]["file"]["path"] == sqs_queue_url_path - assert logstash_message[2]["aws"]["sqs"]["name"] == sqs_queue_name - assert logstash_message[2]["aws"]["sqs"]["message_id"] == message_id - assert logstash_message[2]["cloud"]["provider"] == "aws" - assert logstash_message[2]["cloud"]["region"] == "us-east-1" - assert logstash_message[2]["cloud"]["account"]["id"] == "000000000000" - assert logstash_message[2]["tags"] == ["forwarded", "tag1", "tag2", "tag3"] - - def test_sqs_last_event_expanded_offset_continue(self) -> None: - assert isinstance(self.logstash, LogstashContainer) - assert isinstance(self.localstack, LocalStackContainer) - - first_expanded_event: str = _load_file_fixture("cloudwatch-log-1.json") - second_expanded_event: str = _load_file_fixture("cloudwatch-log-3.json") - third_expanded_event: str = _load_file_fixture("cloudwatch-log-3.json") - - fixtures = [f"""{{"aField": [{first_expanded_event},{second_expanded_event},{third_expanded_event}]}}"""] - - sqs_queue_name = _time_based_id(suffix="source-sqs") - - sqs_queue = _sqs_create_queue(self.sqs_client, sqs_queue_name, self.localstack.get_url()) - - sqs_queue_arn = sqs_queue["QueueArn"] - sqs_queue_url = sqs_queue["QueueUrl"] - sqs_queue_url_path = sqs_queue["QueueUrlPath"] - - _sqs_send_messages(self.sqs_client, sqs_queue_url, "".join(fixtures)) - - config_yaml: str = f""" - inputs: - - type: "sqs" - id: "{sqs_queue_arn}" - expand_event_list_from_field: aField - tags: {self.default_tags} - outputs: - - type: "logstash" - args: - logstash_url: "{self.logstash.get_url()}" - ssl_assert_fingerprint: {self.logstash.ssl_assert_fingerprint} - username: "{self.logstash.logstash_user}" - password: "{self.logstash.logstash_password}" - """ - - config_file_path = "config.yaml" - config_bucket_name = _time_based_id(suffix="config-bucket") - _s3_upload_content_to_bucket( - client=self.s3_client, - content=config_yaml.encode("utf-8"), - content_type="text/plain", - bucket_name=config_bucket_name, - key=config_file_path, - ) - - os.environ["S3_CONFIG_FILE"] = f"s3://{config_bucket_name}/{config_file_path}" - - events_sqs, _ = _sqs_get_messages(self.sqs_client, sqs_queue_url, sqs_queue_arn) - - message_id = events_sqs["Records"][0]["messageId"] - - ctx = ContextMock() - first_call = handler(events_sqs, ctx) # type:ignore - - assert first_call == "continuing" - - logstash_message = self.logstash.get_messages(expected=1) - assert len(logstash_message) == 1 - - assert logstash_message[0]["message"] == json_dumper(json_parser(first_expanded_event)) - assert logstash_message[0]["log"]["offset"] == 0 - assert logstash_message[0]["log"]["file"]["path"] == sqs_queue_url_path - assert logstash_message[0]["aws"]["sqs"]["name"] == sqs_queue_name - assert logstash_message[0]["aws"]["sqs"]["message_id"] == message_id - assert logstash_message[0]["cloud"]["provider"] == "aws" - assert logstash_message[0]["cloud"]["region"] == "us-east-1" - assert logstash_message[0]["cloud"]["account"]["id"] == "000000000000" - assert logstash_message[0]["tags"] == ["forwarded", "tag1", "tag2", "tag3"] - - ctx = ContextMock(remaining_time_in_millis=_OVER_COMPLETION_GRACE_PERIOD_2m) - - continued_events, _ = _sqs_get_messages( - self.sqs_client, os.environ["SQS_CONTINUE_URL"], self.sqs_continue_queue_arn - ) - second_call = handler(continued_events, ctx) # type:ignore - - assert second_call == "completed" - - logstash_message = self.logstash.get_messages(expected=3) - assert len(logstash_message) == 3 - - assert logstash_message[1]["message"] == json_dumper(json_parser(second_expanded_event)) - assert logstash_message[1]["log"]["offset"] == 86 - assert logstash_message[1]["log"]["file"]["path"] == sqs_queue_url_path - assert logstash_message[1]["aws"]["sqs"]["name"] == sqs_queue_name - assert logstash_message[1]["aws"]["sqs"]["message_id"] == message_id - assert logstash_message[1]["cloud"]["provider"] == "aws" - assert logstash_message[1]["cloud"]["region"] == "us-east-1" - assert logstash_message[1]["cloud"]["account"]["id"] == "000000000000" - assert logstash_message[1]["tags"] == ["forwarded", "tag1", "tag2", "tag3"] - - assert logstash_message[2]["message"] == json_dumper(json_parser(third_expanded_event)) - assert logstash_message[2]["log"]["offset"] == 172 - assert logstash_message[2]["log"]["file"]["path"] == sqs_queue_url_path - assert logstash_message[2]["aws"]["sqs"]["name"] == sqs_queue_name - assert logstash_message[2]["aws"]["sqs"]["message_id"] == message_id - assert logstash_message[2]["cloud"]["provider"] == "aws" - assert logstash_message[2]["cloud"]["region"] == "us-east-1" - assert logstash_message[2]["cloud"]["account"]["id"] == "000000000000" - assert logstash_message[2]["tags"] == ["forwarded", "tag1", "tag2", "tag3"] - - def test_s3_sqs_last_ending_offset_reset(self) -> None: - assert isinstance(self.logstash, LogstashContainer) - assert isinstance(self.localstack, LocalStackContainer) - - fixtures = [ - _load_file_fixture("cloudwatch-log-1.json"), - _load_file_fixture("cloudwatch-log-2.json"), - _load_file_fixture("cloudwatch-log-3.json"), - ] - - s3_bucket_name = _time_based_id(suffix="test-bucket") - first_filename = "exportedlog/uuid/yyyy-mm-dd-[$LATEST]hash/000000.gz" - _s3_upload_content_to_bucket( - client=self.s3_client, - content=gzip.compress("".join(fixtures).encode("utf-8")), - content_type="application/x-gzip", - bucket_name=s3_bucket_name, - key=first_filename, - ) - - s3_sqs_queue_name = _time_based_id(suffix="source-s3-sqs") - - s3_sqs_queue = _sqs_create_queue(self.sqs_client, s3_sqs_queue_name, self.localstack.get_url()) - - s3_sqs_queue_arn = s3_sqs_queue["QueueArn"] - s3_sqs_queue_url = s3_sqs_queue["QueueUrl"] - - _sqs_send_s3_notifications(self.sqs_client, s3_sqs_queue_url, s3_bucket_name, [first_filename]) - - config_yaml: str = f""" - inputs: - - type: "s3-sqs" - id: "{s3_sqs_queue_arn}" - tags: {self.default_tags} - outputs: - - type: "logstash" - args: - logstash_url: "{self.logstash.get_url()}" - ssl_assert_fingerprint: {self.logstash.ssl_assert_fingerprint} - username: "{self.logstash.logstash_user}" - password: "{self.logstash.logstash_password}" - """ - - config_file_path = "config.yaml" - config_bucket_name = _time_based_id(suffix="config-bucket") - _s3_upload_content_to_bucket( - client=self.s3_client, - content=config_yaml.encode("utf-8"), - content_type="text/plain", - bucket_name=config_bucket_name, - key=config_file_path, - ) - - os.environ["S3_CONFIG_FILE"] = f"s3://{config_bucket_name}/{config_file_path}" - - events_s3, _ = _sqs_get_messages(self.sqs_client, s3_sqs_queue_url, s3_sqs_queue_arn) - - ctx = ContextMock() - first_call = handler(events_s3, ctx) # type:ignore - - assert first_call == "continuing" - - logstash_message = self.logstash.get_messages(expected=1) - assert len(logstash_message) == 1 - - assert logstash_message[0]["message"] == fixtures[0].rstrip("\n") - assert logstash_message[0]["log"]["offset"] == 0 - assert ( - logstash_message[0]["log"]["file"]["path"] - == f"https://{s3_bucket_name}.s3.eu-central-1.amazonaws.com/{first_filename}" - ) - assert logstash_message[0]["aws"]["s3"]["bucket"]["name"] == s3_bucket_name - assert logstash_message[0]["aws"]["s3"]["bucket"]["arn"] == f"arn:aws:s3:::{s3_bucket_name}" - assert logstash_message[0]["aws"]["s3"]["object"]["key"] == first_filename - assert logstash_message[0]["cloud"]["provider"] == "aws" - assert logstash_message[0]["cloud"]["region"] == "eu-central-1" - assert logstash_message[0]["cloud"]["account"]["id"] == "000000000000" - assert logstash_message[0]["tags"] == ["forwarded", "tag1", "tag2", "tag3"] - - ctx = ContextMock(remaining_time_in_millis=_OVER_COMPLETION_GRACE_PERIOD_2m) - - continued_events, _ = _sqs_get_messages( - self.sqs_client, os.environ["SQS_CONTINUE_URL"], self.sqs_continue_queue_arn - ) - second_call = handler(continued_events, ctx) # type:ignore - - assert second_call == "completed" - - logstash_message = self.logstash.get_messages(expected=3) - assert len(logstash_message) == 3 - - assert logstash_message[1]["message"] == fixtures[1].rstrip("\n") - assert logstash_message[1]["log"]["offset"] == 94 - assert ( - logstash_message[1]["log"]["file"]["path"] - == f"https://{s3_bucket_name}.s3.eu-central-1.amazonaws.com/{first_filename}" - ) - assert logstash_message[1]["aws"]["s3"]["bucket"]["name"] == s3_bucket_name - assert logstash_message[1]["aws"]["s3"]["bucket"]["arn"] == f"arn:aws:s3:::{s3_bucket_name}" - assert logstash_message[1]["aws"]["s3"]["object"]["key"] == first_filename - assert logstash_message[1]["cloud"]["provider"] == "aws" - assert logstash_message[1]["cloud"]["region"] == "eu-central-1" - assert logstash_message[1]["cloud"]["account"]["id"] == "000000000000" - assert logstash_message[1]["tags"] == ["forwarded", "tag1", "tag2", "tag3"] - - assert logstash_message[2]["message"] == fixtures[2].rstrip("\n") - assert logstash_message[2]["log"]["offset"] == 332 - assert ( - logstash_message[2]["log"]["file"]["path"] - == f"https://{s3_bucket_name}.s3.eu-central-1.amazonaws.com/{first_filename}" - ) - assert logstash_message[2]["aws"]["s3"]["bucket"]["name"] == s3_bucket_name - assert logstash_message[2]["aws"]["s3"]["bucket"]["arn"] == f"arn:aws:s3:::{s3_bucket_name}" - assert logstash_message[2]["aws"]["s3"]["object"]["key"] == first_filename - assert logstash_message[2]["cloud"]["provider"] == "aws" - assert logstash_message[2]["cloud"]["region"] == "eu-central-1" - assert logstash_message[2]["cloud"]["account"]["id"] == "000000000000" - assert logstash_message[2]["tags"] == ["forwarded", "tag1", "tag2", "tag3"] - - def test_s3_sqs_last_event_expanded_offset_continue(self) -> None: - assert isinstance(self.logstash, LogstashContainer) - assert isinstance(self.localstack, LocalStackContainer) - - first_expanded_event: str = _load_file_fixture("cloudwatch-log-1.json") - second_expanded_event: str = _load_file_fixture("cloudwatch-log-3.json") - third_expanded_event: str = _load_file_fixture("cloudwatch-log-3.json") - - fixtures = [ - f"""{{"aField": [{first_expanded_event},{second_expanded_event}]}}""", - f"""{{"aField": [{third_expanded_event}]}}""", - ] - - s3_bucket_name = _time_based_id(suffix="test-bucket") - first_filename = "exportedlog/uuid/yyyy-mm-dd-[$LATEST]hash/000000.gz" - _s3_upload_content_to_bucket( - client=self.s3_client, - content=gzip.compress(fixtures[0].encode("utf-8")), - content_type="application/x-gzip", - bucket_name=s3_bucket_name, - key=first_filename, - ) - - second_filename = "exportedlog/uuid/yyyy-mm-dd-[$LATEST]hash/000001.gz" - _s3_upload_content_to_bucket( - client=self.s3_client, - content=gzip.compress(fixtures[1].encode("utf-8")), - content_type="application/x-gzip", - bucket_name=s3_bucket_name, - key=second_filename, - ) - - s3_sqs_queue_name = _time_based_id(suffix="source-s3-sqs") - - s3_sqs_queue = _sqs_create_queue(self.sqs_client, s3_sqs_queue_name, self.localstack.get_url()) - - s3_sqs_queue_arn = s3_sqs_queue["QueueArn"] - s3_sqs_queue_url = s3_sqs_queue["QueueUrl"] - - _sqs_send_s3_notifications(self.sqs_client, s3_sqs_queue_url, s3_bucket_name, [first_filename]) - _sqs_send_s3_notifications(self.sqs_client, s3_sqs_queue_url, s3_bucket_name, [second_filename]) - - config_yaml: str = f""" - inputs: - - type: "s3-sqs" - id: "{s3_sqs_queue_arn}" - expand_event_list_from_field: aField - tags: {self.default_tags} - outputs: - - type: "logstash" - args: - logstash_url: "{self.logstash.get_url()}" - ssl_assert_fingerprint: {self.logstash.ssl_assert_fingerprint} - username: "{self.logstash.logstash_user}" - password: "{self.logstash.logstash_password}" - """ - - config_file_path = "config.yaml" - config_bucket_name = _time_based_id(suffix="config-bucket") - _s3_upload_content_to_bucket( - client=self.s3_client, - content=config_yaml.encode("utf-8"), - content_type="text/plain", - bucket_name=config_bucket_name, - key=config_file_path, - ) - - os.environ["S3_CONFIG_FILE"] = f"s3://{config_bucket_name}/{config_file_path}" - - events_s3, _ = _sqs_get_messages(self.sqs_client, s3_sqs_queue_url, s3_sqs_queue_arn) - - ctx = ContextMock() - first_call = handler(events_s3, ctx) # type:ignore - - assert first_call == "continuing" - - logstash_message = self.logstash.get_messages(expected=1) - assert len(logstash_message) == 1 - - assert logstash_message[0]["message"] == json_dumper(json_parser(first_expanded_event)) - assert logstash_message[0]["log"]["offset"] == 0 - assert ( - logstash_message[0]["log"]["file"]["path"] - == f"https://{s3_bucket_name}.s3.eu-central-1.amazonaws.com/{first_filename}" - ) - assert logstash_message[0]["aws"]["s3"]["bucket"]["name"] == s3_bucket_name - assert logstash_message[0]["aws"]["s3"]["bucket"]["arn"] == f"arn:aws:s3:::{s3_bucket_name}" - assert logstash_message[0]["aws"]["s3"]["object"]["key"] == first_filename - assert logstash_message[0]["cloud"]["provider"] == "aws" - assert logstash_message[0]["cloud"]["region"] == "eu-central-1" - assert logstash_message[0]["cloud"]["account"]["id"] == "000000000000" - assert logstash_message[0]["tags"] == ["forwarded", "tag1", "tag2", "tag3"] - - continued_events, _ = _sqs_get_messages( - self.sqs_client, os.environ["SQS_CONTINUE_URL"], self.sqs_continue_queue_arn - ) - second_call = handler(continued_events, ctx) # type:ignore - - assert second_call == "continuing" - - logstash_message = self.logstash.get_messages(expected=2) - assert len(logstash_message) == 2 - - assert logstash_message[1]["message"] == json_dumper(json_parser(second_expanded_event)) - assert logstash_message[1]["log"]["offset"] == 91 - assert ( - logstash_message[1]["log"]["file"]["path"] - == f"https://{s3_bucket_name}.s3.eu-central-1.amazonaws.com/{first_filename}" - ) - assert logstash_message[1]["aws"]["s3"]["bucket"]["name"] == s3_bucket_name - assert logstash_message[1]["aws"]["s3"]["bucket"]["arn"] == f"arn:aws:s3:::{s3_bucket_name}" - assert logstash_message[1]["aws"]["s3"]["object"]["key"] == first_filename - assert logstash_message[1]["cloud"]["provider"] == "aws" - assert logstash_message[1]["cloud"]["region"] == "eu-central-1" - assert logstash_message[1]["cloud"]["account"]["id"] == "000000000000" - assert logstash_message[1]["tags"] == ["forwarded", "tag1", "tag2", "tag3"] - - ctx = ContextMock(remaining_time_in_millis=_OVER_COMPLETION_GRACE_PERIOD_2m) - - continued_events, _ = _sqs_get_messages( - self.sqs_client, os.environ["SQS_CONTINUE_URL"], self.sqs_continue_queue_arn - ) - third_call = handler(continued_events, ctx) # type:ignore - - assert third_call == "completed" - - logstash_message = self.logstash.get_messages(expected=3) - assert len(logstash_message) == 3 - assert logstash_message[2]["message"] == json_dumper(json_parser(third_expanded_event)) - assert logstash_message[2]["log"]["offset"] == 0 - assert ( - logstash_message[2]["log"]["file"]["path"] - == f"https://{s3_bucket_name}.s3.eu-central-1.amazonaws.com/{second_filename}" - ) - assert logstash_message[2]["aws"]["s3"]["bucket"]["name"] == s3_bucket_name - assert logstash_message[2]["aws"]["s3"]["bucket"]["arn"] == f"arn:aws:s3:::{s3_bucket_name}" - assert logstash_message[2]["aws"]["s3"]["object"]["key"] == second_filename - assert logstash_message[2]["cloud"]["provider"] == "aws" - assert logstash_message[2]["cloud"]["region"] == "eu-central-1" - assert logstash_message[2]["cloud"]["account"]["id"] == "000000000000" - assert logstash_message[2]["tags"] == ["forwarded", "tag1", "tag2", "tag3"] - - def test_cloud_trail_race(self) -> None: - assert isinstance(self.elasticsearch, ElasticsearchContainer) - assert isinstance(self.logstash, LogstashContainer) - assert isinstance(self.localstack, LocalStackContainer) - - s3_sqs_queue_name = _time_based_id(suffix="source-s3-sqs") - - s3_sqs_queue = _sqs_create_queue(self.sqs_client, s3_sqs_queue_name, self.localstack.get_url()) - - s3_sqs_queue_arn = s3_sqs_queue["QueueArn"] - s3_sqs_queue_url = s3_sqs_queue["QueueUrl"] - - config_yaml: str = f""" - inputs: - - type: s3-sqs - id: "{s3_sqs_queue_arn}" - tags: {self.default_tags} - outputs: {self.default_outputs} - """ - - config_file_path = "config.yaml" - config_bucket_name = _time_based_id(suffix="config-bucket") - _s3_upload_content_to_bucket( - client=self.s3_client, - content=config_yaml.encode("utf-8"), - content_type="text/plain", - bucket_name=config_bucket_name, - key=config_file_path, - ) - - os.environ["S3_CONFIG_FILE"] = f"s3://{config_bucket_name}/{config_file_path}" - fixtures = [ - _load_file_fixture("cloudwatch-log-1.json"), - '{"Records": [' + _load_file_fixture("cloudwatch-log-2.json") + "]}", - ] - - cloudtrail_filename_digest = ( - "AWSLogs/aws-account-id/CloudTrail-Digest/region/yyyy/mm/dd/" - "aws-account-id_CloudTrail-Digest_region_end-time_random-string.log.gz" - ) - cloudtrail_filename_non_digest = ( - "AWSLogs/aws-account-id/CloudTrail/region/yyyy/mm/dd/" - "aws-account-id_CloudTrail_region_end-time_random-string.log.gz" - ) - - s3_bucket_name = _time_based_id(suffix="test-bucket") - - _s3_upload_content_to_bucket( - client=self.s3_client, - content=gzip.compress(fixtures[0].encode("utf-8")), - content_type="application/x-gzip", - bucket_name=s3_bucket_name, - key=cloudtrail_filename_digest, - ) - - _s3_upload_content_to_bucket( - client=self.s3_client, - content=gzip.compress(fixtures[1].encode("utf-8")), - content_type="application/x-gzip", - bucket_name=s3_bucket_name, - key=cloudtrail_filename_non_digest, - ) - - _sqs_send_s3_notifications( - self.sqs_client, - s3_sqs_queue_url, - s3_bucket_name, - [cloudtrail_filename_digest, cloudtrail_filename_non_digest], - ) - - event, _ = _sqs_get_messages(self.sqs_client, s3_sqs_queue_url, s3_sqs_queue_arn) - - ctx = ContextMock(remaining_time_in_millis=_OVER_COMPLETION_GRACE_PERIOD_2m) - first_call = handler(event, ctx) # type:ignore - - assert first_call == "completed" - - self.elasticsearch.refresh(index="logs-aws.cloudtrail-default") - assert self.elasticsearch.count(index="logs-aws.cloudtrail-default")["count"] == 2 - - res = self.elasticsearch.search(index="logs-aws.cloudtrail-default", sort="_seq_no") - assert res["hits"]["total"] == {"value": 2, "relation": "eq"} - - assert res["hits"]["hits"][0]["_source"]["message"] == fixtures[0].rstrip("\n") - assert res["hits"]["hits"][0]["_source"]["log"]["offset"] == 0 - assert ( - res["hits"]["hits"][0]["_source"]["log"]["file"]["path"] - == f"https://{s3_bucket_name}.s3.eu-central-1.amazonaws.com/{cloudtrail_filename_digest}" - ) - assert res["hits"]["hits"][0]["_source"]["aws"]["s3"]["bucket"]["name"] == s3_bucket_name - assert res["hits"]["hits"][0]["_source"]["aws"]["s3"]["bucket"]["arn"] == f"arn:aws:s3:::{s3_bucket_name}" - assert res["hits"]["hits"][0]["_source"]["aws"]["s3"]["object"]["key"] == cloudtrail_filename_digest - assert res["hits"]["hits"][0]["_source"]["cloud"]["provider"] == "aws" - assert res["hits"]["hits"][0]["_source"]["cloud"]["region"] == "eu-central-1" - assert res["hits"]["hits"][0]["_source"]["cloud"]["account"]["id"] == "000000000000" - assert res["hits"]["hits"][0]["_source"]["tags"] == ["forwarded", "aws-cloudtrail", "tag1", "tag2", "tag3"] - - assert res["hits"]["hits"][1]["_source"]["message"] == json_dumper( - json_parser(_load_file_fixture("cloudwatch-log-2.json").rstrip("\n")) - ) - assert res["hits"]["hits"][1]["_source"]["log"]["offset"] == 0 - assert ( - res["hits"]["hits"][1]["_source"]["log"]["file"]["path"] - == f"https://{s3_bucket_name}.s3.eu-central-1.amazonaws.com/{cloudtrail_filename_non_digest}" - ) - assert res["hits"]["hits"][1]["_source"]["aws"]["s3"]["bucket"]["name"] == s3_bucket_name - assert res["hits"]["hits"][1]["_source"]["aws"]["s3"]["bucket"]["arn"] == f"arn:aws:s3:::{s3_bucket_name}" - assert res["hits"]["hits"][1]["_source"]["aws"]["s3"]["object"]["key"] == cloudtrail_filename_non_digest - assert res["hits"]["hits"][1]["_source"]["cloud"]["provider"] == "aws" - assert res["hits"]["hits"][1]["_source"]["cloud"]["region"] == "eu-central-1" - assert res["hits"]["hits"][1]["_source"]["cloud"]["account"]["id"] == "000000000000" - assert res["hits"]["hits"][1]["_source"]["tags"] == ["forwarded", "aws-cloudtrail", "tag1", "tag2", "tag3"] - - logstash_message = self.logstash.get_messages(expected=2) - assert len(logstash_message) == 2 - res["hits"]["hits"][0]["_source"]["tags"].remove("aws-cloudtrail") - res["hits"]["hits"][1]["_source"]["tags"].remove("aws-cloudtrail") - - assert res["hits"]["hits"][0]["_source"]["aws"] == logstash_message[0]["aws"] - assert res["hits"]["hits"][0]["_source"]["cloud"] == logstash_message[0]["cloud"] - assert res["hits"]["hits"][0]["_source"]["log"] == logstash_message[0]["log"] - assert res["hits"]["hits"][0]["_source"]["message"] == logstash_message[0]["message"] - assert res["hits"]["hits"][0]["_source"]["tags"] == logstash_message[0]["tags"] - - assert res["hits"]["hits"][1]["_source"]["aws"] == logstash_message[1]["aws"] - assert res["hits"]["hits"][1]["_source"]["cloud"] == logstash_message[1]["cloud"] - assert res["hits"]["hits"][1]["_source"]["log"] == logstash_message[1]["log"] - assert res["hits"]["hits"][1]["_source"]["message"] == logstash_message[1]["message"] - assert res["hits"]["hits"][1]["_source"]["tags"] == logstash_message[1]["tags"] - - def test_es_ssl_fingerprint_mismatch(self) -> None: - assert isinstance(self.elasticsearch, ElasticsearchContainer) - assert isinstance(self.localstack, LocalStackContainer) - - sqs_queue_name = _time_based_id(suffix="source-sqs") - sqs_queue = _sqs_create_queue(self.sqs_client, sqs_queue_name, self.localstack.get_url()) - - sqs_queue_arn = sqs_queue["QueueArn"] - sqs_queue_url = sqs_queue["QueueUrl"] - sqs_queue_url_path = sqs_queue["QueueUrlPath"] - - config_yaml: str = f""" - inputs: - - type: sqs - id: "{sqs_queue_arn}" - tags: {self.default_tags} - outputs: - - type: "elasticsearch" - args: - elasticsearch_url: "{self.elasticsearch.get_url()}" - ssl_assert_fingerprint: {self.elasticsearch.ssl_assert_fingerprint}:AA - username: "{self.secret_arn}:username" - password: "{self.secret_arn}:password" - """ - - config_file_path = "config.yaml" - config_bucket_name = _time_based_id(suffix="config-bucket") - _s3_upload_content_to_bucket( - client=self.s3_client, - content=config_yaml.encode("utf-8"), - content_type="text/plain", - bucket_name=config_bucket_name, - key=config_file_path, - ) - - os.environ["S3_CONFIG_FILE"] = f"s3://{config_bucket_name}/{config_file_path}" - ctx = ContextMock() - - fixtures = [ - _load_file_fixture("cloudwatch-log-1.json"), - _load_file_fixture("cloudwatch-log-2.json"), - ] - - _sqs_send_messages(self.sqs_client, sqs_queue_url, "".join(fixtures)) - - event, _ = _sqs_get_messages(self.sqs_client, sqs_queue_url, sqs_queue_arn) - message_id = event["Records"][0]["messageId"] - - first_call = handler(event, ctx) # type:ignore - - assert first_call == "continuing" - - assert self.elasticsearch.exists(index="logs-generic-default") is False - - event, _ = _sqs_get_messages(self.sqs_client, os.environ["SQS_CONTINUE_URL"], self.sqs_continue_queue_arn) - second_call = handler(event, ctx) # type:ignore - - assert second_call == "continuing" - - assert self.elasticsearch.exists(index="logs-generic-default") is False - - event, _ = _sqs_get_messages(self.sqs_client, os.environ["SQS_CONTINUE_URL"], self.sqs_continue_queue_arn) - third_call = handler(event, ctx) # type:ignore - - assert third_call == "completed" - - assert self.elasticsearch.exists(index="logs-generic-default") is False - - events, _ = _sqs_get_messages(self.sqs_client, os.environ["SQS_REPLAY_URL"], self.sqs_replay_queue_arn) - assert len(events["Records"]) == 2 - - first_body: dict[str, Any] = json_parser(events["Records"][0]["body"]) - second_body: dict[str, Any] = json_parser(events["Records"][1]["body"]) - - assert first_body["event_payload"]["message"] == fixtures[0].rstrip("\n") - assert first_body["event_payload"]["log"]["offset"] == 0 - assert first_body["event_payload"]["log"]["file"]["path"] == sqs_queue_url_path - assert first_body["event_payload"]["aws"]["sqs"]["name"] == sqs_queue_name - assert first_body["event_payload"]["aws"]["sqs"]["message_id"] == message_id - assert first_body["event_payload"]["cloud"]["provider"] == "aws" - assert first_body["event_payload"]["cloud"]["region"] == "us-east-1" - assert first_body["event_payload"]["cloud"]["account"]["id"] == "000000000000" - assert first_body["event_payload"]["tags"] == ["forwarded", "generic", "tag1", "tag2", "tag3"] - - assert second_body["event_payload"]["message"] == fixtures[1].rstrip("\n") - assert second_body["event_payload"]["log"]["offset"] == 94 - assert second_body["event_payload"]["log"]["file"]["path"] == sqs_queue_url_path - assert second_body["event_payload"]["aws"]["sqs"]["name"] == sqs_queue_name - assert second_body["event_payload"]["aws"]["sqs"]["message_id"] == message_id - assert second_body["event_payload"]["cloud"]["provider"] == "aws" - assert second_body["event_payload"]["cloud"]["region"] == "us-east-1" - assert second_body["event_payload"]["cloud"]["account"]["id"] == "000000000000" - assert second_body["event_payload"]["tags"] == ["forwarded", "generic", "tag1", "tag2", "tag3"] - - def test_es_no_matching_action_failed(self) -> None: - assert isinstance(self.elasticsearch, ElasticsearchContainer) - assert isinstance(self.localstack, LocalStackContainer) - - sqs_queue_name = _time_based_id(suffix="source-sqs") - sqs_queue = _sqs_create_queue(self.sqs_client, sqs_queue_name, self.localstack.get_url()) - - sqs_queue_arn = sqs_queue["QueueArn"] - sqs_queue_url = sqs_queue["QueueUrl"] - - message: str = "a message" - fingerprint: str = "DUEwoALOve1Y9MtPCfT7IJGU3IQ=" - - # Create an expected id so that es.send will fail - self.elasticsearch.index( - index="logs-generic-default", - op_type="create", - id=fingerprint, - document={"@timestamp": datetime.datetime.utcnow().strftime("%Y-%m-%dT%H:%M:%S.%fZ")}, - ) - - processors = { - "processors": [ - { - "fingerprint": { - "fields": ["message"], - "target_field": "_id", - } - } - ] - } - - # Add a pipeline that will generate the same _id - self.elasticsearch.put_pipeline(id="id_fingerprint_pipeline", body=processors) - self.elasticsearch.put_settings( - index="logs-generic-default", body={"index.default_pipeline": "id_fingerprint_pipeline"} - ) - - self.elasticsearch.refresh(index="logs-generic-default") - - assert self.elasticsearch.count(index="logs-generic-default")["count"] == 1 - - _sqs_send_messages(self.sqs_client, sqs_queue_url, message) - - event, _ = _sqs_get_messages(self.sqs_client, sqs_queue_url, sqs_queue_arn) - - config_yaml: str = f""" - inputs: - - type: sqs - id: "{sqs_queue_arn}" - tags: {self.default_tags} - outputs: - - type: "elasticsearch" - args: - elasticsearch_url: "{self.elasticsearch.get_url()}" - ssl_assert_fingerprint: {self.elasticsearch.ssl_assert_fingerprint} - username: "{self.secret_arn}:username" - password: "{self.secret_arn}:password" - """ - - config_file_path = "config.yaml" - config_bucket_name = _time_based_id(suffix="config-bucket") - _s3_upload_content_to_bucket( - client=self.s3_client, - content=config_yaml.encode("utf-8"), - content_type="text/plain", - bucket_name=config_bucket_name, - key=config_file_path, - ) - - os.environ["S3_CONFIG_FILE"] = f"s3://{config_bucket_name}/{config_file_path}" - - ctx = ContextMock(remaining_time_in_millis=_OVER_COMPLETION_GRACE_PERIOD_2m) - - first_call = handler(event, ctx) # type:ignore - - assert first_call == "completed" - - self.elasticsearch.refresh(index="logs-generic-default") - - assert self.elasticsearch.count(index="logs-generic-default")["count"] == 1 - - res = self.elasticsearch.search(index="logs-generic-default") - assert "message" not in res["hits"]["hits"][0]["_source"] - - event, timestamp = _sqs_get_messages(self.sqs_client, os.environ["SQS_REPLAY_URL"], self.sqs_replay_queue_arn) - assert not event["Records"] - assert not timestamp - - def test_ls_ssl_fingerprint_mimsmatch(self) -> None: - assert isinstance(self.logstash, LogstashContainer) - assert isinstance(self.localstack, LocalStackContainer) - - sqs_queue_name = _time_based_id(suffix="source-sqs") - sqs_queue = _sqs_create_queue(self.sqs_client, sqs_queue_name, self.localstack.get_url()) - - sqs_queue_arn = sqs_queue["QueueArn"] - sqs_queue_url = sqs_queue["QueueUrl"] - sqs_queue_url_path = sqs_queue["QueueUrlPath"] - - config_yaml: str = f""" - inputs: - - type: sqs - id: "{sqs_queue_arn}" - tags: {self.default_tags} - outputs: - - type: "logstash" - args: - logstash_url: "{self.logstash.get_url()}" - ssl_assert_fingerprint: {self.logstash.ssl_assert_fingerprint}:AA - username: "{self.logstash.logstash_user}" - password: "{self.logstash.logstash_password}" - """ - - config_file_path = "config.yaml" - config_bucket_name = _time_based_id(suffix="config-bucket") - _s3_upload_content_to_bucket( - client=self.s3_client, - content=config_yaml.encode("utf-8"), - content_type="text/plain", - bucket_name=config_bucket_name, - key=config_file_path, - ) - - os.environ["S3_CONFIG_FILE"] = f"s3://{config_bucket_name}/{config_file_path}" - ctx = ContextMock() - - fixtures = [ - _load_file_fixture("cloudwatch-log-1.json"), - _load_file_fixture("cloudwatch-log-2.json"), - ] - - _sqs_send_messages(self.sqs_client, sqs_queue_url, "".join(fixtures)) - - event, _ = _sqs_get_messages(self.sqs_client, sqs_queue_url, sqs_queue_arn) - message_id = event["Records"][0]["messageId"] - - first_call = handler(event, ctx) # type:ignore - - assert first_call == "continuing" - - event, _ = _sqs_get_messages(self.sqs_client, os.environ["SQS_CONTINUE_URL"], self.sqs_continue_queue_arn) - second_call = handler(event, ctx) # type:ignore - - assert second_call == "continuing" - - event, _ = _sqs_get_messages(self.sqs_client, os.environ["SQS_CONTINUE_URL"], self.sqs_continue_queue_arn) - third_call = handler(event, ctx) # type:ignore - - assert third_call == "completed" - - events, _ = _sqs_get_messages(self.sqs_client, os.environ["SQS_REPLAY_URL"], self.sqs_replay_queue_arn) - assert len(events["Records"]) == 2 - - first_body: dict[str, Any] = json_parser(events["Records"][0]["body"]) - second_body: dict[str, Any] = json_parser(events["Records"][1]["body"]) - - assert first_body["event_payload"]["message"] == fixtures[0].rstrip("\n") - assert first_body["event_payload"]["log"]["offset"] == 0 - assert first_body["event_payload"]["log"]["file"]["path"] == sqs_queue_url_path - assert first_body["event_payload"]["aws"]["sqs"]["name"] == sqs_queue_name - assert first_body["event_payload"]["aws"]["sqs"]["message_id"] == message_id - assert first_body["event_payload"]["cloud"]["provider"] == "aws" - assert first_body["event_payload"]["cloud"]["region"] == "us-east-1" - assert first_body["event_payload"]["cloud"]["account"]["id"] == "000000000000" - assert first_body["event_payload"]["tags"] == ["forwarded", "tag1", "tag2", "tag3"] - - assert second_body["event_payload"]["message"] == fixtures[1].rstrip("\n") - assert second_body["event_payload"]["log"]["offset"] == 94 - assert second_body["event_payload"]["log"]["file"]["path"] == sqs_queue_url_path - assert second_body["event_payload"]["aws"]["sqs"]["name"] == sqs_queue_name - assert second_body["event_payload"]["aws"]["sqs"]["message_id"] == message_id - assert second_body["event_payload"]["cloud"]["provider"] == "aws" - assert second_body["event_payload"]["cloud"]["region"] == "us-east-1" - assert second_body["event_payload"]["cloud"]["account"]["id"] == "000000000000" - assert second_body["event_payload"]["tags"] == ["forwarded", "tag1", "tag2", "tag3"] - - def test_ls_wrong_auth_creds(self) -> None: - assert isinstance(self.logstash, LogstashContainer) - assert isinstance(self.localstack, LocalStackContainer) - - sqs_queue_name = _time_based_id(suffix="source-sqs") - sqs_queue = _sqs_create_queue(self.sqs_client, sqs_queue_name, self.localstack.get_url()) - - sqs_queue_arn = sqs_queue["QueueArn"] - sqs_queue_url = sqs_queue["QueueUrl"] - sqs_queue_url_path = sqs_queue["QueueUrlPath"] - - config_yaml: str = f""" - inputs: - - type: sqs - id: "{sqs_queue_arn}" - tags: {self.default_tags} - outputs: - - type: "logstash" - args: - logstash_url: "{self.logstash.get_url()}" - ssl_assert_fingerprint: {self.logstash.ssl_assert_fingerprint} - username: "wrong_username" - password: "wrong_password" - """ - - config_file_path = "config.yaml" - config_bucket_name = _time_based_id(suffix="config-bucket") - _s3_upload_content_to_bucket( - client=self.s3_client, - content=config_yaml.encode("utf-8"), - content_type="text/plain", - bucket_name=config_bucket_name, - key=config_file_path, - ) - - os.environ["S3_CONFIG_FILE"] = f"s3://{config_bucket_name}/{config_file_path}" - ctx = ContextMock() - - fixtures = [ - _load_file_fixture("cloudwatch-log-1.json"), - _load_file_fixture("cloudwatch-log-2.json"), - ] - - _sqs_send_messages(self.sqs_client, sqs_queue_url, "".join(fixtures)) - - event, _ = _sqs_get_messages(self.sqs_client, sqs_queue_url, sqs_queue_arn) - message_id = event["Records"][0]["messageId"] - - first_call = handler(event, ctx) # type:ignore - - assert first_call == "continuing" - - event, _ = _sqs_get_messages(self.sqs_client, os.environ["SQS_CONTINUE_URL"], self.sqs_continue_queue_arn) - second_call = handler(event, ctx) # type:ignore - - assert second_call == "continuing" - - event, _ = _sqs_get_messages(self.sqs_client, os.environ["SQS_CONTINUE_URL"], self.sqs_continue_queue_arn) - third_call = handler(event, ctx) # type:ignore - - assert third_call == "completed" - - events, _ = _sqs_get_messages(self.sqs_client, os.environ["SQS_REPLAY_URL"], self.sqs_replay_queue_arn) - assert len(events["Records"]) == 2 - - first_body: dict[str, Any] = json_parser(events["Records"][0]["body"]) - second_body: dict[str, Any] = json_parser(events["Records"][1]["body"]) - - assert first_body["event_payload"]["message"] == fixtures[0].rstrip("\n") - assert first_body["event_payload"]["log"]["offset"] == 0 - assert first_body["event_payload"]["log"]["file"]["path"] == sqs_queue_url_path - assert first_body["event_payload"]["aws"]["sqs"]["name"] == sqs_queue_name - assert first_body["event_payload"]["aws"]["sqs"]["message_id"] == message_id - assert first_body["event_payload"]["cloud"]["provider"] == "aws" - assert first_body["event_payload"]["cloud"]["region"] == "us-east-1" - assert first_body["event_payload"]["cloud"]["account"]["id"] == "000000000000" - assert first_body["event_payload"]["tags"] == ["forwarded", "tag1", "tag2", "tag3"] - - assert second_body["event_payload"]["message"] == fixtures[1].rstrip("\n") - assert second_body["event_payload"]["log"]["offset"] == 94 - assert second_body["event_payload"]["log"]["file"]["path"] == sqs_queue_url_path - assert second_body["event_payload"]["aws"]["sqs"]["name"] == sqs_queue_name - assert second_body["event_payload"]["aws"]["sqs"]["message_id"] == message_id - assert second_body["event_payload"]["cloud"]["provider"] == "aws" - assert second_body["event_payload"]["cloud"]["region"] == "us-east-1" - assert second_body["event_payload"]["cloud"]["account"]["id"] == "000000000000" - assert second_body["event_payload"]["tags"] == ["forwarded", "tag1", "tag2", "tag3"] From 3e12c4913d4dbdd0e7f66edc784b6a136282e0ae Mon Sep 17 00:00:00 2001 From: constanca Date: Wed, 17 Apr 2024 11:46:45 +0200 Subject: [PATCH 03/26] . --- tests/handlers/aws/test_integrations.py | 2387 +++++++++++++++++++++++ 1 file changed, 2387 insertions(+) diff --git a/tests/handlers/aws/test_integrations.py b/tests/handlers/aws/test_integrations.py index e69de29b..3390a568 100644 --- a/tests/handlers/aws/test_integrations.py +++ b/tests/handlers/aws/test_integrations.py @@ -0,0 +1,2387 @@ +# Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one +# or more contributor license agreements. Licensed under the Elastic License 2.0; +# you may not use this file except in compliance with the Elastic License 2.0. + +import datetime +import gzip +import os +import time +from typing import Any, Optional +from unittest import TestCase + +import boto3 +import mock +import pytest +from botocore.client import BaseClient as BotoBaseClient +from testcontainers.localstack import LocalStackContainer + +from handlers.aws.exceptions import ReplayHandlerException +from main_aws import handler +from share import get_hex_prefix, json_dumper, json_parser +from tests.testcontainers.es import ElasticsearchContainer +from tests.testcontainers.logstash import LogstashContainer + +from .utils import ( + _AWS_REGION, + _S3_NOTIFICATION_EVENT_TIME, + ContextMock, + _create_secrets, + _kinesis_create_stream, + _kinesis_put_records, + _kinesis_retrieve_event_from_kinesis_stream, + _load_file_fixture, + _logs_create_cloudwatch_logs_group, + _logs_create_cloudwatch_logs_stream, + _logs_retrieve_event_from_cloudwatch_logs, + _logs_upload_event_to_cloudwatch_logs, + _REMAINING_TIME_FORCE_CONTINUE_0ms, + _s3_upload_content_to_bucket, + _sqs_create_queue, + _sqs_get_messages, + _sqs_send_messages, + _sqs_send_s3_notifications, + _time_based_id, +) + +_OVER_COMPLETION_GRACE_PERIOD_2m = 1 + (1000 * 60 * 2) + + +@pytest.mark.integration +class TestLambdaHandlerIntegration(TestCase): + elasticsearch: Optional[ElasticsearchContainer] = None + logstash: Optional[LogstashContainer] = None + localstack: Optional[LocalStackContainer] = None + + aws_session: Optional[boto3.Session] = None + s3_client: Optional[BotoBaseClient] = None + logs_client: Optional[BotoBaseClient] = None + sqs_client: Optional[BotoBaseClient] = None + kinesis_client: Optional[BotoBaseClient] = None + sm_client: Optional[BotoBaseClient] = None + ec2_client: Optional[BotoBaseClient] = None + + secret_arn: Optional[Any] = None + + mocks: dict[str, Any] = {} + + @classmethod + def setUpClass(cls) -> None: + esc = ElasticsearchContainer() + cls.elasticsearch = esc.start() + + lgc = LogstashContainer(es_container=esc) + cls.logstash = lgc.start() + + lsc = LocalStackContainer(image="localstack/localstack:3.0.1") + lsc.with_env("EAGER_SERVICE_LOADING", "1") + lsc.with_env("SQS_DISABLE_CLOUDWATCH_METRICS", "1") + lsc.with_services("ec2", "kinesis", "logs", "s3", "sqs", "secretsmanager") + + cls.localstack = lsc.start() + + session = boto3.Session(region_name=_AWS_REGION) + cls.aws_session = session + cls.s3_client = session.client("s3", endpoint_url=cls.localstack.get_url()) + cls.logs_client = session.client("logs", endpoint_url=cls.localstack.get_url()) + cls.sqs_client = session.client("sqs", endpoint_url=cls.localstack.get_url()) + cls.kinesis_client = session.client("kinesis", endpoint_url=cls.localstack.get_url()) + cls.sm_client = session.client("secretsmanager", endpoint_url=cls.localstack.get_url()) + cls.ec2_client = session.client("ec2", endpoint_url=cls.localstack.get_url()) + + cls.secret_arn = _create_secrets( + cls.sm_client, + "es_secrets", + {"username": cls.elasticsearch.elastic_user, "password": cls.elasticsearch.elastic_password}, + ) + + cls.mocks = { + "storage.S3Storage._s3_client": mock.patch("storage.S3Storage._s3_client", new=cls.s3_client), + "share.secretsmanager._get_aws_sm_client": mock.patch( + "share.secretsmanager._get_aws_sm_client", lambda region_name: cls.sm_client + ), + "handlers.aws.utils.get_sqs_client": mock.patch( + "handlers.aws.utils.get_sqs_client", lambda: cls.sqs_client + ), + "handlers.aws.utils.get_ec2_client": mock.patch( + "handlers.aws.utils.get_ec2_client", lambda: cls.ec2_client + ), + "handlers.aws.handler.get_sqs_client": mock.patch( + "handlers.aws.handler.get_sqs_client", lambda: cls.sqs_client + ), + } + + for k, m in cls.mocks.items(): + m.start() + + @classmethod + def tearDownClass(cls) -> None: + assert cls.elasticsearch is not None + assert cls.logstash is not None + assert cls.localstack is not None + + cls.elasticsearch.stop() + cls.logstash.stop() + cls.localstack.stop() + + for k, m in cls.mocks.items(): + m.stop() + + def setUp(self) -> None: + assert isinstance(self.elasticsearch, ElasticsearchContainer) + assert isinstance(self.logstash, LogstashContainer) + assert isinstance(self.localstack, LocalStackContainer) + + os.environ["S3_CONFIG_FILE"] = "" + + sqs_continue_queue = _sqs_create_queue(self.sqs_client, _time_based_id(suffix="continuing")) + sqs_replay_queue = _sqs_create_queue(self.sqs_client, _time_based_id(suffix="replay")) + os.environ["SQS_CONTINUE_URL"] = sqs_continue_queue["QueueUrl"] + os.environ["SQS_REPLAY_URL"] = sqs_replay_queue["QueueUrl"] + + self.sqs_continue_queue_arn = sqs_continue_queue["QueueArn"] + self.sqs_replay_queue_arn = sqs_replay_queue["QueueArn"] + + self.default_tags: str = """ + - "tag1" + - "tag2" + - "tag3" + """ + + self.default_outputs: str = f""" + - type: "elasticsearch" + args: + elasticsearch_url: "{self.elasticsearch.get_url()}" + ssl_assert_fingerprint: {self.elasticsearch.ssl_assert_fingerprint} + username: "{self.secret_arn}:username" + password: "{self.secret_arn}:password" + - type: "logstash" + args: + logstash_url: "{self.logstash.get_url()}" + ssl_assert_fingerprint: {self.logstash.ssl_assert_fingerprint} + username: "{self.logstash.logstash_user}" + password: "{self.logstash.logstash_password}" + """ + + def tearDown(self) -> None: + assert isinstance(self.elasticsearch, ElasticsearchContainer) + assert isinstance(self.logstash, LogstashContainer) + + self.logstash.reset() + self.elasticsearch.reset() + + os.environ["S3_CONFIG_FILE"] = "" + os.environ["SQS_CONTINUE_URL"] = "" + os.environ["SQS_REPLAY_URL"] = "" + + def test_ls_es_output(self) -> None: + assert isinstance(self.elasticsearch, ElasticsearchContainer) + assert isinstance(self.logstash, LogstashContainer) + assert isinstance(self.localstack, LocalStackContainer) + + s3_sqs_queue_name = _time_based_id(suffix="source-s3-sqs") + + s3_sqs_queue = _sqs_create_queue(self.sqs_client, s3_sqs_queue_name, self.localstack.get_url()) + + s3_sqs_queue_arn = s3_sqs_queue["QueueArn"] + s3_sqs_queue_url = s3_sqs_queue["QueueUrl"] + + config_yaml: str = f""" + inputs: + - type: s3-sqs + id: "{s3_sqs_queue_arn}" + tags: {self.default_tags} + outputs: {self.default_outputs} + """ + + config_file_path = "config.yaml" + config_bucket_name = _time_based_id(suffix="config-bucket") + _s3_upload_content_to_bucket( + client=self.s3_client, + content=config_yaml.encode("utf-8"), + content_type="text/plain", + bucket_name=config_bucket_name, + key=config_file_path, + ) + + os.environ["S3_CONFIG_FILE"] = f"s3://{config_bucket_name}/{config_file_path}" + fixtures = [ + _load_file_fixture("cloudwatch-log-1.json"), + _load_file_fixture("cloudwatch-log-2.json"), + ] + + cloudtrail_filename_digest = ( + "AWSLogs/aws-account-id/CloudTrail-Digest/region/yyyy/mm/dd/" + "aws-account-id_CloudTrail-Digest_region_end-time_random-string.log.gz" + ) + cloudtrail_filename_non_digest = ( + "AWSLogs/aws-account-id/CloudTrail/region/yyyy/mm/dd/" + "aws-account-id_CloudTrail_region_end-time_random-string.log.gz" + ) + + s3_bucket_name = _time_based_id(suffix="test-bucket") + + _s3_upload_content_to_bucket( + client=self.s3_client, + content=gzip.compress(fixtures[0].encode("utf-8")), + content_type="application/x-gzip", + bucket_name=s3_bucket_name, + key=cloudtrail_filename_digest, + ) + + _s3_upload_content_to_bucket( + client=self.s3_client, + content=gzip.compress(fixtures[1].encode("utf-8")), + content_type="application/x-gzip", + bucket_name=s3_bucket_name, + key=cloudtrail_filename_non_digest, + ) + + _sqs_send_s3_notifications( + self.sqs_client, + s3_sqs_queue_url, + s3_bucket_name, + [cloudtrail_filename_digest, cloudtrail_filename_non_digest], + ) + + event, _ = _sqs_get_messages(self.sqs_client, s3_sqs_queue_url, s3_sqs_queue_arn) + + ctx = ContextMock(remaining_time_in_millis=_OVER_COMPLETION_GRACE_PERIOD_2m) + first_call = handler(event, ctx) # type:ignore + + assert first_call == "completed" + + self.elasticsearch.refresh(index="logs-aws.cloudtrail-default") + assert self.elasticsearch.count(index="logs-aws.cloudtrail-default")["count"] == 2 + + res = self.elasticsearch.search(index="logs-aws.cloudtrail-default", sort="_seq_no") + assert res["hits"]["total"] == {"value": 2, "relation": "eq"} + + assert res["hits"]["hits"][0]["_source"]["message"] == fixtures[0].rstrip("\n") + assert res["hits"]["hits"][0]["_source"]["log"]["offset"] == 0 + assert ( + res["hits"]["hits"][0]["_source"]["log"]["file"]["path"] + == f"https://{s3_bucket_name}.s3.eu-central-1.amazonaws.com/{cloudtrail_filename_digest}" + ) + assert res["hits"]["hits"][0]["_source"]["aws"]["s3"]["bucket"]["name"] == s3_bucket_name + assert res["hits"]["hits"][0]["_source"]["aws"]["s3"]["bucket"]["arn"] == f"arn:aws:s3:::{s3_bucket_name}" + assert res["hits"]["hits"][0]["_source"]["aws"]["s3"]["object"]["key"] == cloudtrail_filename_digest + assert res["hits"]["hits"][0]["_source"]["cloud"]["provider"] == "aws" + assert res["hits"]["hits"][0]["_source"]["cloud"]["region"] == "eu-central-1" + assert res["hits"]["hits"][0]["_source"]["cloud"]["account"]["id"] == "000000000000" + assert res["hits"]["hits"][0]["_source"]["tags"] == ["forwarded", "aws-cloudtrail", "tag1", "tag2", "tag3"] + + assert res["hits"]["hits"][1]["_source"]["message"] == fixtures[1].rstrip("\n") + assert res["hits"]["hits"][1]["_source"]["log"]["offset"] == 0 + assert ( + res["hits"]["hits"][1]["_source"]["log"]["file"]["path"] + == f"https://{s3_bucket_name}.s3.eu-central-1.amazonaws.com/{cloudtrail_filename_non_digest}" + ) + assert res["hits"]["hits"][1]["_source"]["aws"]["s3"]["bucket"]["name"] == s3_bucket_name + assert res["hits"]["hits"][1]["_source"]["aws"]["s3"]["bucket"]["arn"] == f"arn:aws:s3:::{s3_bucket_name}" + assert res["hits"]["hits"][1]["_source"]["aws"]["s3"]["object"]["key"] == cloudtrail_filename_non_digest + assert res["hits"]["hits"][1]["_source"]["cloud"]["provider"] == "aws" + assert res["hits"]["hits"][1]["_source"]["cloud"]["region"] == "eu-central-1" + assert res["hits"]["hits"][1]["_source"]["cloud"]["account"]["id"] == "000000000000" + assert res["hits"]["hits"][1]["_source"]["tags"] == ["forwarded", "aws-cloudtrail", "tag1", "tag2", "tag3"] + + logstash_message = self.logstash.get_messages(expected=2) + assert len(logstash_message) == 2 + res["hits"]["hits"][0]["_source"]["tags"].remove("aws-cloudtrail") + res["hits"]["hits"][1]["_source"]["tags"].remove("aws-cloudtrail") + + assert res["hits"]["hits"][0]["_source"]["aws"] == logstash_message[0]["aws"] + assert res["hits"]["hits"][0]["_source"]["cloud"] == logstash_message[0]["cloud"] + assert res["hits"]["hits"][0]["_source"]["log"] == logstash_message[0]["log"] + assert res["hits"]["hits"][0]["_source"]["message"] == logstash_message[0]["message"] + assert res["hits"]["hits"][0]["_source"]["tags"] == logstash_message[0]["tags"] + + assert res["hits"]["hits"][1]["_source"]["aws"] == logstash_message[1]["aws"] + assert res["hits"]["hits"][1]["_source"]["cloud"] == logstash_message[1]["cloud"] + assert res["hits"]["hits"][1]["_source"]["log"] == logstash_message[1]["log"] + assert res["hits"]["hits"][1]["_source"]["message"] == logstash_message[1]["message"] + assert res["hits"]["hits"][1]["_source"]["tags"] == logstash_message[1]["tags"] + + self.elasticsearch.refresh(index="logs-stash.elasticsearch-output") + assert self.elasticsearch.count(index="logs-stash.elasticsearch-output")["count"] == 2 + + res = self.elasticsearch.search(index="logs-stash.elasticsearch-output", sort="_seq_no") + assert res["hits"]["total"] == {"value": 2, "relation": "eq"} + + assert res["hits"]["hits"][0]["_source"]["aws"] == logstash_message[0]["aws"] + assert res["hits"]["hits"][0]["_source"]["cloud"] == logstash_message[0]["cloud"] + assert res["hits"]["hits"][0]["_source"]["log"] == logstash_message[0]["log"] + assert res["hits"]["hits"][0]["_source"]["message"] == logstash_message[0]["message"] + assert res["hits"]["hits"][0]["_source"]["tags"] == logstash_message[0]["tags"] + + assert res["hits"]["hits"][1]["_source"]["aws"] == logstash_message[1]["aws"] + assert res["hits"]["hits"][1]["_source"]["cloud"] == logstash_message[1]["cloud"] + assert res["hits"]["hits"][1]["_source"]["log"] == logstash_message[1]["log"] + assert res["hits"]["hits"][1]["_source"]["message"] == logstash_message[1]["message"] + assert res["hits"]["hits"][1]["_source"]["tags"] == logstash_message[1]["tags"] + + def test_continuing(self) -> None: + assert isinstance(self.elasticsearch, ElasticsearchContainer) + assert isinstance(self.logstash, LogstashContainer) + assert isinstance(self.localstack, LocalStackContainer) + + fixtures = [ + _load_file_fixture("cloudwatch-log-1.json"), + _load_file_fixture("cloudwatch-log-2.json"), + ] + + s3_bucket_name = _time_based_id(suffix="test-bucket") + first_filename = "exportedlog/uuid/yyyy-mm-dd-[$LATEST]hash/000000.gz" + _s3_upload_content_to_bucket( + client=self.s3_client, + content=gzip.compress("".join(fixtures).encode("utf-8")), + content_type="application/x-gzip", + bucket_name=s3_bucket_name, + key=first_filename, + ) + + cloudwatch_group_name = _time_based_id(suffix="source-group") + cloudwatch_group = _logs_create_cloudwatch_logs_group(self.logs_client, group_name=cloudwatch_group_name) + + cloudwatch_stream_name = _time_based_id(suffix="source-stream") + _logs_create_cloudwatch_logs_stream( + self.logs_client, group_name=cloudwatch_group_name, stream_name=cloudwatch_stream_name + ) + + _logs_upload_event_to_cloudwatch_logs( + self.logs_client, + group_name=cloudwatch_group_name, + stream_name=cloudwatch_stream_name, + messages_body=["".join(fixtures)], + ) + + cloudwatch_group_arn = cloudwatch_group["arn"] + + cloudwatch_group_name = cloudwatch_group_name + cloudwatch_stream_name = cloudwatch_stream_name + + sqs_queue_name = _time_based_id(suffix="source-sqs") + s3_sqs_queue_name = _time_based_id(suffix="source-s3-sqs") + + sqs_queue = _sqs_create_queue(self.sqs_client, sqs_queue_name, self.localstack.get_url()) + s3_sqs_queue = _sqs_create_queue(self.sqs_client, s3_sqs_queue_name, self.localstack.get_url()) + + sqs_queue_arn = sqs_queue["QueueArn"] + sqs_queue_url = sqs_queue["QueueUrl"] + sqs_queue_url_path = sqs_queue["QueueUrlPath"] + + s3_sqs_queue_arn = s3_sqs_queue["QueueArn"] + s3_sqs_queue_url = s3_sqs_queue["QueueUrl"] + + _sqs_send_messages(self.sqs_client, sqs_queue_url, "".join(fixtures)) + _sqs_send_s3_notifications(self.sqs_client, s3_sqs_queue_url, s3_bucket_name, [first_filename]) + + kinesis_stream_name = _time_based_id(suffix="source-kinesis") + kinesis_stream = _kinesis_create_stream(self.kinesis_client, kinesis_stream_name) + kinesis_stream_arn = kinesis_stream["StreamDescription"]["StreamARN"] + + _kinesis_put_records(self.kinesis_client, kinesis_stream_name, ["".join(fixtures)]) + + config_yaml: str = f""" + inputs: + - type: "kinesis-data-stream" + id: "{kinesis_stream_arn}" + tags: {self.default_tags} + outputs: {self.default_outputs} + - type: "cloudwatch-logs" + id: "{cloudwatch_group_arn}" + tags: {self.default_tags} + outputs: {self.default_outputs} + - type: sqs + id: "{sqs_queue_arn}" + tags: {self.default_tags} + outputs: {self.default_outputs} + - type: s3-sqs + id: "{s3_sqs_queue_arn}" + tags: {self.default_tags} + outputs: {self.default_outputs} + """ + + config_file_path = "config.yaml" + config_bucket_name = _time_based_id(suffix="config-bucket") + _s3_upload_content_to_bucket( + client=self.s3_client, + content=config_yaml.encode("utf-8"), + content_type="text/plain", + bucket_name=config_bucket_name, + key=config_file_path, + ) + + os.environ["S3_CONFIG_FILE"] = f"s3://{config_bucket_name}/{config_file_path}" + + events_s3, _ = _sqs_get_messages(self.sqs_client, s3_sqs_queue_url, s3_sqs_queue_arn) + + events_sqs, _ = _sqs_get_messages(self.sqs_client, sqs_queue_url, sqs_queue_arn) + + message_id = events_sqs["Records"][0]["messageId"] + + events_cloudwatch_logs, event_ids_cloudwatch_logs, _ = _logs_retrieve_event_from_cloudwatch_logs( + self.logs_client, cloudwatch_group_name, cloudwatch_stream_name + ) + + events_kinesis, _ = _kinesis_retrieve_event_from_kinesis_stream( + self.kinesis_client, kinesis_stream_name, kinesis_stream_arn + ) + + ctx = ContextMock() + first_call = handler(events_s3, ctx) # type:ignore + + assert first_call == "continuing" + + self.elasticsearch.refresh(index="logs-generic-default") + assert self.elasticsearch.count(index="logs-generic-default")["count"] == 1 + + res = self.elasticsearch.search(index="logs-generic-default", sort="_seq_no") + assert res["hits"]["total"] == {"value": 1, "relation": "eq"} + + assert res["hits"]["hits"][0]["_source"]["message"] == fixtures[0].rstrip("\n") + assert res["hits"]["hits"][0]["_source"]["log"]["offset"] == 0 + assert ( + res["hits"]["hits"][0]["_source"]["log"]["file"]["path"] + == f"https://{s3_bucket_name}.s3.eu-central-1.amazonaws.com/{first_filename}" + ) + assert res["hits"]["hits"][0]["_source"]["aws"]["s3"]["bucket"]["name"] == s3_bucket_name + assert res["hits"]["hits"][0]["_source"]["aws"]["s3"]["bucket"]["arn"] == f"arn:aws:s3:::{s3_bucket_name}" + assert res["hits"]["hits"][0]["_source"]["aws"]["s3"]["object"]["key"] == first_filename + assert res["hits"]["hits"][0]["_source"]["cloud"]["provider"] == "aws" + assert res["hits"]["hits"][0]["_source"]["cloud"]["region"] == "eu-central-1" + assert res["hits"]["hits"][0]["_source"]["cloud"]["account"]["id"] == "000000000000" + assert res["hits"]["hits"][0]["_source"]["tags"] == ["forwarded", "generic", "tag1", "tag2", "tag3"] + + logstash_message = self.logstash.get_messages(expected=1) + assert len(logstash_message) == 1 + res["hits"]["hits"][0]["_source"]["tags"].remove("generic") + assert res["hits"]["hits"][0]["_source"]["aws"] == logstash_message[0]["aws"] + assert res["hits"]["hits"][0]["_source"]["cloud"] == logstash_message[0]["cloud"] + assert res["hits"]["hits"][0]["_source"]["log"] == logstash_message[0]["log"] + assert res["hits"]["hits"][0]["_source"]["message"] == logstash_message[0]["message"] + assert res["hits"]["hits"][0]["_source"]["tags"] == logstash_message[0]["tags"] + + second_call = handler(events_sqs, ctx) # type:ignore + + assert second_call == "continuing" + + self.elasticsearch.refresh(index="logs-generic-default") + assert self.elasticsearch.count(index="logs-generic-default")["count"] == 2 + + res = self.elasticsearch.search(index="logs-generic-default", sort="_seq_no") + assert res["hits"]["total"] == {"value": 2, "relation": "eq"} + + assert res["hits"]["hits"][1]["_source"]["message"] == fixtures[0].rstrip("\n") + assert res["hits"]["hits"][1]["_source"]["log"]["offset"] == 0 + assert res["hits"]["hits"][1]["_source"]["log"]["file"]["path"] == sqs_queue_url_path + assert res["hits"]["hits"][1]["_source"]["aws"]["sqs"]["name"] == sqs_queue_name + assert res["hits"]["hits"][1]["_source"]["aws"]["sqs"]["message_id"] == message_id + assert res["hits"]["hits"][1]["_source"]["cloud"]["provider"] == "aws" + assert res["hits"]["hits"][1]["_source"]["cloud"]["region"] == "us-east-1" + assert res["hits"]["hits"][1]["_source"]["cloud"]["account"]["id"] == "000000000000" + assert res["hits"]["hits"][1]["_source"]["tags"] == ["forwarded", "generic", "tag1", "tag2", "tag3"] + + logstash_message = self.logstash.get_messages(expected=2) + assert len(logstash_message) == 2 + res["hits"]["hits"][1]["_source"]["tags"].remove("generic") + assert res["hits"]["hits"][1]["_source"]["aws"] == logstash_message[1]["aws"] + assert res["hits"]["hits"][1]["_source"]["cloud"] == logstash_message[1]["cloud"] + assert res["hits"]["hits"][1]["_source"]["log"] == logstash_message[1]["log"] + assert res["hits"]["hits"][1]["_source"]["message"] == logstash_message[1]["message"] + assert res["hits"]["hits"][1]["_source"]["tags"] == logstash_message[1]["tags"] + + third_call = handler(events_cloudwatch_logs, ctx) # type:ignore + + assert third_call == "continuing" + + self.elasticsearch.refresh(index="logs-generic-default") + assert self.elasticsearch.count(index="logs-generic-default")["count"] == 3 + + res = self.elasticsearch.search(index="logs-generic-default", sort="_seq_no") + assert res["hits"]["total"] == {"value": 3, "relation": "eq"} + + assert res["hits"]["hits"][2]["_source"]["message"] == fixtures[0].rstrip("\n") + assert res["hits"]["hits"][2]["_source"]["log"]["offset"] == 0 + assert ( + res["hits"]["hits"][2]["_source"]["log"]["file"]["path"] + == f"{cloudwatch_group_name}/{cloudwatch_stream_name}" + ) + assert res["hits"]["hits"][2]["_source"]["aws"]["cloudwatch"]["log_group"] == cloudwatch_group_name + assert res["hits"]["hits"][2]["_source"]["aws"]["cloudwatch"]["log_stream"] == cloudwatch_stream_name + assert res["hits"]["hits"][2]["_source"]["aws"]["cloudwatch"]["event_id"] == event_ids_cloudwatch_logs[0] + assert res["hits"]["hits"][2]["_source"]["cloud"]["provider"] == "aws" + assert res["hits"]["hits"][2]["_source"]["cloud"]["region"] == "us-east-1" + assert res["hits"]["hits"][2]["_source"]["cloud"]["account"]["id"] == "000000000000" + assert res["hits"]["hits"][2]["_source"]["tags"] == ["forwarded", "generic", "tag1", "tag2", "tag3"] + + logstash_message = self.logstash.get_messages(expected=3) + assert len(logstash_message) == 3 + res["hits"]["hits"][2]["_source"]["tags"].remove("generic") + assert res["hits"]["hits"][2]["_source"]["aws"] == logstash_message[2]["aws"] + assert res["hits"]["hits"][2]["_source"]["cloud"] == logstash_message[2]["cloud"] + assert res["hits"]["hits"][2]["_source"]["log"] == logstash_message[2]["log"] + assert res["hits"]["hits"][2]["_source"]["message"] == logstash_message[2]["message"] + assert res["hits"]["hits"][2]["_source"]["tags"] == logstash_message[2]["tags"] + + fourth_call = handler(events_kinesis, ctx) # type:ignore + + assert fourth_call == "continuing" + + self.elasticsearch.refresh(index="logs-generic-default") + assert self.elasticsearch.count(index="logs-generic-default")["count"] == 4 + + res = self.elasticsearch.search(index="logs-generic-default", sort="_seq_no") + assert res["hits"]["total"] == {"value": 4, "relation": "eq"} + + assert res["hits"]["hits"][3]["_source"]["message"] == fixtures[0].rstrip("\n") + assert res["hits"]["hits"][3]["_source"]["log"]["offset"] == 0 + assert res["hits"]["hits"][3]["_source"]["log"]["file"]["path"] == kinesis_stream_arn + assert res["hits"]["hits"][3]["_source"]["aws"]["kinesis"]["type"] == "stream" + assert res["hits"]["hits"][3]["_source"]["aws"]["kinesis"]["partition_key"] == "PartitionKey" + assert res["hits"]["hits"][3]["_source"]["aws"]["kinesis"]["name"] == kinesis_stream_name + assert ( + res["hits"]["hits"][3]["_source"]["aws"]["kinesis"]["sequence_number"] + == events_kinesis["Records"][0]["kinesis"]["sequenceNumber"] + ) + assert res["hits"]["hits"][3]["_source"]["cloud"]["provider"] == "aws" + assert res["hits"]["hits"][3]["_source"]["cloud"]["region"] == "us-east-1" + assert res["hits"]["hits"][3]["_source"]["cloud"]["account"]["id"] == "000000000000" + assert res["hits"]["hits"][3]["_source"]["tags"] == ["forwarded", "generic", "tag1", "tag2", "tag3"] + + logstash_message = self.logstash.get_messages(expected=4) + assert len(logstash_message) == 4 + res["hits"]["hits"][3]["_source"]["tags"].remove("generic") + assert res["hits"]["hits"][3]["_source"]["aws"] == logstash_message[3]["aws"] + assert res["hits"]["hits"][3]["_source"]["cloud"] == logstash_message[3]["cloud"] + assert res["hits"]["hits"][3]["_source"]["log"] == logstash_message[3]["log"] + assert res["hits"]["hits"][3]["_source"]["message"] == logstash_message[3]["message"] + assert res["hits"]["hits"][3]["_source"]["tags"] == logstash_message[3]["tags"] + + continued_events, _ = _sqs_get_messages( + self.sqs_client, os.environ["SQS_CONTINUE_URL"], self.sqs_continue_queue_arn + ) + + fifth_call = handler(continued_events, ctx) # type:ignore + + assert fifth_call == "continuing" + + self.elasticsearch.refresh(index="logs-generic-default") + assert self.elasticsearch.count(index="logs-generic-default")["count"] == 5 + + res = self.elasticsearch.search(index="logs-generic-default", sort="_seq_no") + assert res["hits"]["total"] == {"value": 5, "relation": "eq"} + + assert res["hits"]["hits"][4]["_source"]["message"] == fixtures[1].rstrip("\n") + assert res["hits"]["hits"][4]["_source"]["log"]["offset"] == 94 + assert ( + res["hits"]["hits"][4]["_source"]["log"]["file"]["path"] + == f"https://{s3_bucket_name}.s3.eu-central-1.amazonaws.com/{first_filename}" + ) + assert res["hits"]["hits"][4]["_source"]["aws"]["s3"]["bucket"]["name"] == s3_bucket_name + assert res["hits"]["hits"][4]["_source"]["aws"]["s3"]["bucket"]["arn"] == f"arn:aws:s3:::{s3_bucket_name}" + assert res["hits"]["hits"][4]["_source"]["aws"]["s3"]["object"]["key"] == first_filename + assert res["hits"]["hits"][4]["_source"]["cloud"]["provider"] == "aws" + assert res["hits"]["hits"][4]["_source"]["cloud"]["region"] == "eu-central-1" + assert res["hits"]["hits"][4]["_source"]["cloud"]["account"]["id"] == "000000000000" + assert res["hits"]["hits"][4]["_source"]["tags"] == ["forwarded", "generic", "tag1", "tag2", "tag3"] + + logstash_message = self.logstash.get_messages(expected=5) + assert len(logstash_message) == 5 + res["hits"]["hits"][4]["_source"]["tags"].remove("generic") + assert res["hits"]["hits"][4]["_source"]["aws"] == logstash_message[4]["aws"] + assert res["hits"]["hits"][4]["_source"]["cloud"] == logstash_message[4]["cloud"] + assert res["hits"]["hits"][4]["_source"]["log"] == logstash_message[4]["log"] + assert res["hits"]["hits"][4]["_source"]["message"] == logstash_message[4]["message"] + assert res["hits"]["hits"][4]["_source"]["tags"] == logstash_message[4]["tags"] + + ctx = ContextMock(remaining_time_in_millis=_OVER_COMPLETION_GRACE_PERIOD_2m) + + continued_events, _ = _sqs_get_messages( + self.sqs_client, os.environ["SQS_CONTINUE_URL"], self.sqs_continue_queue_arn + ) + sixth_call = handler(continued_events, ctx) # type:ignore + + assert sixth_call == "completed" + + self.elasticsearch.refresh(index="logs-generic-default") + assert self.elasticsearch.count(index="logs-generic-default")["count"] == 8 + + res = self.elasticsearch.search(index="logs-generic-default", sort="_seq_no") + assert res["hits"]["total"] == {"value": 8, "relation": "eq"} + + assert res["hits"]["hits"][5]["_source"]["message"] == fixtures[1].rstrip("\n") + assert res["hits"]["hits"][5]["_source"]["log"]["offset"] == 94 + assert res["hits"]["hits"][5]["_source"]["log"]["file"]["path"] == sqs_queue_url_path + assert res["hits"]["hits"][5]["_source"]["aws"]["sqs"]["name"] == sqs_queue_name + assert res["hits"]["hits"][5]["_source"]["aws"]["sqs"]["message_id"] == message_id + assert res["hits"]["hits"][5]["_source"]["cloud"]["provider"] == "aws" + assert res["hits"]["hits"][5]["_source"]["cloud"]["region"] == "us-east-1" + assert res["hits"]["hits"][5]["_source"]["cloud"]["account"]["id"] == "000000000000" + assert res["hits"]["hits"][5]["_source"]["tags"] == ["forwarded", "generic", "tag1", "tag2", "tag3"] + + assert res["hits"]["hits"][6]["_source"]["message"] == fixtures[1].rstrip("\n") + assert res["hits"]["hits"][6]["_source"]["log"]["offset"] == 94 + assert ( + res["hits"]["hits"][6]["_source"]["log"]["file"]["path"] + == f"{cloudwatch_group_name}/{cloudwatch_stream_name}" + ) + assert res["hits"]["hits"][6]["_source"]["aws"]["cloudwatch"]["log_group"] == cloudwatch_group_name + assert res["hits"]["hits"][6]["_source"]["aws"]["cloudwatch"]["log_stream"] == cloudwatch_stream_name + assert res["hits"]["hits"][6]["_source"]["aws"]["cloudwatch"]["event_id"] == event_ids_cloudwatch_logs[0] + assert res["hits"]["hits"][6]["_source"]["cloud"]["provider"] == "aws" + assert res["hits"]["hits"][6]["_source"]["cloud"]["region"] == "us-east-1" + assert res["hits"]["hits"][6]["_source"]["cloud"]["account"]["id"] == "000000000000" + assert res["hits"]["hits"][6]["_source"]["tags"] == ["forwarded", "generic", "tag1", "tag2", "tag3"] + + assert res["hits"]["hits"][7]["_source"]["message"] == fixtures[1].rstrip("\n") + assert res["hits"]["hits"][7]["_source"]["log"]["offset"] == 94 + assert res["hits"]["hits"][7]["_source"]["log"]["file"]["path"] == kinesis_stream_arn + assert res["hits"]["hits"][7]["_source"]["aws"]["kinesis"]["type"] == "stream" + assert res["hits"]["hits"][7]["_source"]["aws"]["kinesis"]["partition_key"] == "PartitionKey" + assert res["hits"]["hits"][7]["_source"]["aws"]["kinesis"]["name"] == kinesis_stream_name + assert ( + res["hits"]["hits"][7]["_source"]["aws"]["kinesis"]["sequence_number"] + == events_kinesis["Records"][0]["kinesis"]["sequenceNumber"] + ) + assert res["hits"]["hits"][7]["_source"]["cloud"]["provider"] == "aws" + assert res["hits"]["hits"][7]["_source"]["cloud"]["region"] == "us-east-1" + assert res["hits"]["hits"][7]["_source"]["cloud"]["account"]["id"] == "000000000000" + assert res["hits"]["hits"][7]["_source"]["tags"] == ["forwarded", "generic", "tag1", "tag2", "tag3"] + + logstash_message = self.logstash.get_messages(expected=8) + assert len(logstash_message) == 8 + res["hits"]["hits"][5]["_source"]["tags"].remove("generic") + res["hits"]["hits"][6]["_source"]["tags"].remove("generic") + res["hits"]["hits"][7]["_source"]["tags"].remove("generic") + + assert res["hits"]["hits"][5]["_source"]["aws"] == logstash_message[5]["aws"] + assert res["hits"]["hits"][5]["_source"]["cloud"] == logstash_message[5]["cloud"] + assert res["hits"]["hits"][5]["_source"]["log"] == logstash_message[5]["log"] + assert res["hits"]["hits"][5]["_source"]["message"] == logstash_message[5]["message"] + assert res["hits"]["hits"][5]["_source"]["tags"] == logstash_message[5]["tags"] + + assert res["hits"]["hits"][6]["_source"]["aws"] == logstash_message[6]["aws"] + assert res["hits"]["hits"][6]["_source"]["cloud"] == logstash_message[6]["cloud"] + assert res["hits"]["hits"][6]["_source"]["log"] == logstash_message[6]["log"] + assert res["hits"]["hits"][6]["_source"]["message"] == logstash_message[6]["message"] + assert res["hits"]["hits"][6]["_source"]["tags"] == logstash_message[6]["tags"] + + assert res["hits"]["hits"][7]["_source"]["aws"] == logstash_message[7]["aws"] + assert res["hits"]["hits"][7]["_source"]["cloud"] == logstash_message[7]["cloud"] + assert res["hits"]["hits"][7]["_source"]["log"] == logstash_message[7]["log"] + assert res["hits"]["hits"][7]["_source"]["message"] == logstash_message[7]["message"] + assert res["hits"]["hits"][7]["_source"]["tags"] == logstash_message[7]["tags"] + + def test_continuing_no_timeout_input_from_originalEventSourceARN_message_attribute(self) -> None: + assert isinstance(self.logstash, LogstashContainer) + assert isinstance(self.localstack, LocalStackContainer) + + fixtures = [ + _load_file_fixture("cloudwatch-log-1.json"), + _load_file_fixture("cloudwatch-log-2.json"), + _load_file_fixture("cloudwatch-log-3.json"), + ] + + sqs_queue_name = _time_based_id(suffix="source-sqs") + + sqs_queue = _sqs_create_queue(self.sqs_client, sqs_queue_name, self.localstack.get_url()) + + sqs_queue_arn = sqs_queue["QueueArn"] + sqs_queue_url = sqs_queue["QueueUrl"] + sqs_queue_url_path = sqs_queue["QueueUrlPath"] + + _sqs_send_messages(self.sqs_client, sqs_queue_url, fixtures[0]) + _sqs_send_messages(self.sqs_client, sqs_queue_url, fixtures[1]) + _sqs_send_messages(self.sqs_client, sqs_queue_url, fixtures[2]) + + config_yaml: str = f""" + inputs: + - type: sqs + id: "{sqs_queue_arn}" + tags: {self.default_tags} + outputs: + - type: "logstash" + args: + logstash_url: "{self.logstash.get_url()}" + ssl_assert_fingerprint: {self.logstash.ssl_assert_fingerprint} + username: "{self.logstash.logstash_user}" + password: "{self.logstash.logstash_password}" + """ + + config_file_path = "config.yaml" + config_bucket_name = _time_based_id(suffix="config-bucket") + _s3_upload_content_to_bucket( + client=self.s3_client, + content=config_yaml.encode("utf-8"), + content_type="text/plain", + bucket_name=config_bucket_name, + key=config_file_path, + ) + + os.environ["S3_CONFIG_FILE"] = f"s3://{config_bucket_name}/{config_file_path}" + + events_sqs, _ = _sqs_get_messages(self.sqs_client, sqs_queue_url, sqs_queue_arn) + + first_message_id = events_sqs["Records"][0]["messageId"] + second_message_id = events_sqs["Records"][1]["messageId"] + + ctx = ContextMock() + first_call = handler(events_sqs, ctx) # type:ignore + + assert first_call == "continuing" + + logstash_message = self.logstash.get_messages(expected=1) + assert len(logstash_message) == 1 + + assert logstash_message[0]["message"] == fixtures[0].rstrip("\n") + assert logstash_message[0]["log"]["offset"] == 0 + assert logstash_message[0]["log"]["file"]["path"] == sqs_queue_url_path + assert logstash_message[0]["aws"]["sqs"]["name"] == sqs_queue_name + assert logstash_message[0]["aws"]["sqs"]["message_id"] == first_message_id + assert logstash_message[0]["cloud"]["provider"] == "aws" + assert logstash_message[0]["cloud"]["region"] == "us-east-1" + assert logstash_message[0]["cloud"]["account"]["id"] == "000000000000" + assert logstash_message[0]["tags"] == ["forwarded", "tag1", "tag2", "tag3"] + + continued_events, _ = _sqs_get_messages( + self.sqs_client, os.environ["SQS_CONTINUE_URL"], self.sqs_continue_queue_arn + ) + + continued_events["Records"][2]["messageAttributes"]["originalEventSourceARN"][ + "stringValue" + ] += "-not-configured-arn" + second_call = handler(continued_events, ctx) # type:ignore + + assert second_call == "continuing" + + logstash_message = self.logstash.get_messages(expected=2) + assert len(logstash_message) == 2 + + assert logstash_message[1]["message"] == fixtures[1].rstrip("\n") + assert logstash_message[1]["log"]["offset"] == 0 + assert logstash_message[1]["log"]["file"]["path"] == sqs_queue_url_path + assert logstash_message[1]["aws"]["sqs"]["name"] == sqs_queue_name + assert logstash_message[1]["aws"]["sqs"]["message_id"] == second_message_id + assert logstash_message[1]["cloud"]["provider"] == "aws" + assert logstash_message[1]["cloud"]["region"] == "us-east-1" + assert logstash_message[1]["cloud"]["account"]["id"] == "000000000000" + assert logstash_message[1]["tags"] == ["forwarded", "tag1", "tag2", "tag3"] + + ctx = ContextMock(remaining_time_in_millis=_OVER_COMPLETION_GRACE_PERIOD_2m) + continued_events, _ = _sqs_get_messages( + self.sqs_client, os.environ["SQS_CONTINUE_URL"], self.sqs_continue_queue_arn + ) + + third_call = handler(continued_events, ctx) # type:ignore + + assert third_call == "completed" + + logstash_message = self.logstash.get_messages(expected=2) + assert len(logstash_message) == 2 + + def test_replay(self) -> None: + assert isinstance(self.elasticsearch, ElasticsearchContainer) + assert isinstance(self.logstash, LogstashContainer) + assert isinstance(self.localstack, LocalStackContainer) + + fixtures = [ + _load_file_fixture("cloudwatch-log-1.json"), + _load_file_fixture("cloudwatch-log-2.json"), + ] + + s3_bucket_name = _time_based_id(suffix="test-bucket") + first_filename = "exportedlog/uuid/yyyy-mm-dd-[$LATEST]hash/000000.gz" + _s3_upload_content_to_bucket( + client=self.s3_client, + content=gzip.compress("".join(fixtures).encode("utf-8")), + content_type="application/x-gzip", + bucket_name=s3_bucket_name, + key=first_filename, + ) + + cloudwatch_group_name = _time_based_id(suffix="source-group") + cloudwatch_group = _logs_create_cloudwatch_logs_group(self.logs_client, group_name=cloudwatch_group_name) + + cloudwatch_stream_name = _time_based_id(suffix="source-stream") + _logs_create_cloudwatch_logs_stream( + self.logs_client, group_name=cloudwatch_group_name, stream_name=cloudwatch_stream_name + ) + + _logs_upload_event_to_cloudwatch_logs( + self.logs_client, + group_name=cloudwatch_group_name, + stream_name=cloudwatch_stream_name, + messages_body=["".join(fixtures)], + ) + + cloudwatch_group_arn = cloudwatch_group["arn"] + + cloudwatch_group_name = cloudwatch_group_name + cloudwatch_stream_name = cloudwatch_stream_name + + sqs_queue_name = _time_based_id(suffix="source-sqs") + s3_sqs_queue_name = _time_based_id(suffix="source-s3-sqs") + + sqs_queue = _sqs_create_queue(self.sqs_client, sqs_queue_name, self.localstack.get_url()) + s3_sqs_queue = _sqs_create_queue(self.sqs_client, s3_sqs_queue_name, self.localstack.get_url()) + + sqs_queue_arn = sqs_queue["QueueArn"] + sqs_queue_url = sqs_queue["QueueUrl"] + sqs_queue_url_path = sqs_queue["QueueUrlPath"] + + s3_sqs_queue_arn = s3_sqs_queue["QueueArn"] + s3_sqs_queue_url = s3_sqs_queue["QueueUrl"] + + _sqs_send_messages(self.sqs_client, sqs_queue_url, "".join(fixtures)) + _sqs_send_s3_notifications(self.sqs_client, s3_sqs_queue_url, s3_bucket_name, [first_filename]) + + kinesis_stream_name = _time_based_id(suffix="source-kinesis") + kinesis_stream = _kinesis_create_stream(self.kinesis_client, kinesis_stream_name) + kinesis_stream_arn = kinesis_stream["StreamDescription"]["StreamARN"] + + _kinesis_put_records(self.kinesis_client, kinesis_stream_name, ["".join(fixtures)]) + + # the way to let logstash fail is to give wrong credentials + config_yaml: str = f""" + inputs: + - type: "kinesis-data-stream" + id: "{kinesis_stream_arn}" + tags: {self.default_tags} + outputs: + - type: "elasticsearch" + args: + elasticsearch_url: "{self.elasticsearch.get_url()}" + ssl_assert_fingerprint: {self.elasticsearch.ssl_assert_fingerprint} + username: "{self.secret_arn}:username" + password: "{self.secret_arn}:password" + - type: "logstash" + args: + logstash_url: "{self.logstash.get_url()}" + ssl_assert_fingerprint: {self.logstash.ssl_assert_fingerprint} + username: "wrong_username" + password: "wrong_username" + - type: "cloudwatch-logs" + id: "{cloudwatch_group_arn}" + tags: {self.default_tags} + outputs: + - type: "elasticsearch" + args: + elasticsearch_url: "{self.elasticsearch.get_url()}" + ssl_assert_fingerprint: {self.elasticsearch.ssl_assert_fingerprint} + username: "{self.secret_arn}:username" + password: "{self.secret_arn}:password" + - type: "logstash" + args: + logstash_url: "{self.logstash.get_url()}" + ssl_assert_fingerprint: {self.logstash.ssl_assert_fingerprint} + username: "wrong_username" + password: "wrong_username" + - type: sqs + id: "{sqs_queue_arn}" + tags: {self.default_tags} + outputs: + - type: "elasticsearch" + args: + elasticsearch_url: "{self.elasticsearch.get_url()}" + ssl_assert_fingerprint: {self.elasticsearch.ssl_assert_fingerprint} + username: "{self.secret_arn}:username" + password: "{self.secret_arn}:password" + - type: "logstash" + args: + logstash_url: "{self.logstash.get_url()}" + ssl_assert_fingerprint: {self.logstash.ssl_assert_fingerprint} + username: "wrong_username" + password: "wrong_username" + - type: s3-sqs + id: "{s3_sqs_queue_arn}" + tags: {self.default_tags} + outputs: + - type: "elasticsearch" + args: + elasticsearch_url: "{self.elasticsearch.get_url()}" + ssl_assert_fingerprint: {self.elasticsearch.ssl_assert_fingerprint} + username: "{self.secret_arn}:username" + password: "{self.secret_arn}:password" + - type: "logstash" + args: + logstash_url: "{self.logstash.get_url()}" + ssl_assert_fingerprint: {self.logstash.ssl_assert_fingerprint} + username: "wrong_username" + password: "wrong_username" + """ + + config_file_path = "config.yaml" + config_bucket_name = _time_based_id(suffix="config-bucket") + _s3_upload_content_to_bucket( + client=self.s3_client, + content=config_yaml.encode("utf-8"), + content_type="text/plain", + bucket_name=config_bucket_name, + key=config_file_path, + ) + + os.environ["S3_CONFIG_FILE"] = f"s3://{config_bucket_name}/{config_file_path}" + + events_s3, _ = _sqs_get_messages(self.sqs_client, s3_sqs_queue_url, s3_sqs_queue_arn) + + bucket_arn: str = f"arn:aws:s3:::{s3_bucket_name}" + event_time = int( + datetime.datetime.strptime(_S3_NOTIFICATION_EVENT_TIME, "%Y-%m-%dT%H:%M:%S.%fZ").timestamp() * 1000 + ) + + hash_first = get_hex_prefix(f"{bucket_arn}-{first_filename}") + prefix_s3_first = f"{event_time}-{hash_first}" + + events_sqs, events_sent_timestamps_sqs = _sqs_get_messages(self.sqs_client, sqs_queue_url, sqs_queue_arn) + + message_id = events_sqs["Records"][0]["messageId"] + hash_sqs = get_hex_prefix(f"{sqs_queue_name}-{message_id}") + prefix_sqs: str = f"{events_sent_timestamps_sqs[0]}-{hash_sqs}" + + ( + events_cloudwatch_logs, + event_ids_cloudwatch_logs, + event_timestamps_cloudwatch_logs, + ) = _logs_retrieve_event_from_cloudwatch_logs(self.logs_client, cloudwatch_group_name, cloudwatch_stream_name) + + hash_cw_logs = get_hex_prefix( + f"{cloudwatch_group_name}-{cloudwatch_stream_name}-{event_ids_cloudwatch_logs[0]}" + ) + prefix_cloudwatch_logs = f"{event_timestamps_cloudwatch_logs[0]}-{hash_cw_logs}" + + events_kinesis, event_timestamps_kinesis_records = _kinesis_retrieve_event_from_kinesis_stream( + self.kinesis_client, kinesis_stream_name, kinesis_stream_arn + ) + sequence_number = events_kinesis["Records"][0]["kinesis"]["sequenceNumber"] + hash_kinesis_record = get_hex_prefix(f"stream-{kinesis_stream_name}-PartitionKey-{sequence_number}") + prefix_kinesis = f"{int(float(event_timestamps_kinesis_records[0]) * 1000)}-{hash_kinesis_record}" + + # Create an expected id for s3-sqs so that es.send will fail + self.elasticsearch.index( + index="logs-generic-default", + op_type="create", + id=f"{prefix_s3_first}-000000000000", + document={"@timestamp": datetime.datetime.utcnow().strftime("%Y-%m-%dT%H:%M:%S.%fZ")}, + ) + + # Create an expected id for sqs so that es.send will fail + self.elasticsearch.index( + index="logs-generic-default", + op_type="create", + id=f"{prefix_sqs}-000000000000", + document={"@timestamp": datetime.datetime.utcnow().strftime("%Y-%m-%dT%H:%M:%S.%fZ")}, + ) + + # Create an expected id for cloudwatch-logs so that es.send will fail + self.elasticsearch.index( + index="logs-generic-default", + op_type="create", + id=f"{prefix_cloudwatch_logs}-000000000000", + document={"@timestamp": datetime.datetime.utcnow().strftime("%Y-%m-%dT%H:%M:%S.%fZ")}, + ) + + # Create an expected id for kinesis-data-stream so that es.send will fail + self.elasticsearch.index( + index="logs-generic-default", + op_type="create", + id=f"{prefix_kinesis}-000000000000", + document={"@timestamp": datetime.datetime.utcnow().strftime("%Y-%m-%dT%H:%M:%S.%fZ")}, + ) + + self.elasticsearch.refresh(index="logs-generic-default") + + res = self.elasticsearch.search(index="logs-generic-default") + assert res["hits"]["total"] == {"value": 4, "relation": "eq"} + + ctx = ContextMock(remaining_time_in_millis=_OVER_COMPLETION_GRACE_PERIOD_2m) + + first_call = handler(events_s3, ctx) # type:ignore + + assert first_call == "completed" + + self.elasticsearch.refresh(index="logs-generic-default") + res = self.elasticsearch.search( + index="logs-generic-default", + query={ + "bool": { + "must_not": { + "ids": { + "values": [ + f"{prefix_s3_first}-000000000000", + f"{prefix_sqs}-000000000000", + f"{prefix_cloudwatch_logs}-000000000000", + f"{prefix_kinesis}-000000000000", + ] + } + } + } + }, + sort="_seq_no", + ) + + assert res["hits"]["total"] == {"value": 1, "relation": "eq"} + + assert res["hits"]["hits"][0]["_source"]["message"] == fixtures[1].rstrip("\n") + assert res["hits"]["hits"][0]["_source"]["log"]["offset"] == 94 + assert ( + res["hits"]["hits"][0]["_source"]["log"]["file"]["path"] + == f"https://{s3_bucket_name}.s3.eu-central-1.amazonaws.com/{first_filename}" + ) + assert res["hits"]["hits"][0]["_source"]["aws"]["s3"]["bucket"]["name"] == s3_bucket_name + assert res["hits"]["hits"][0]["_source"]["aws"]["s3"]["bucket"]["arn"] == f"arn:aws:s3:::{s3_bucket_name}" + assert res["hits"]["hits"][0]["_source"]["aws"]["s3"]["object"]["key"] == first_filename + assert res["hits"]["hits"][0]["_source"]["cloud"]["provider"] == "aws" + assert res["hits"]["hits"][0]["_source"]["cloud"]["region"] == "eu-central-1" + assert res["hits"]["hits"][0]["_source"]["cloud"]["account"]["id"] == "000000000000" + assert res["hits"]["hits"][0]["_source"]["tags"] == ["forwarded", "generic", "tag1", "tag2", "tag3"] + + logstash_message = self.logstash.get_messages(expected=0) + assert len(logstash_message) == 0 + + second_call = handler(events_sqs, ctx) # type:ignore + + assert second_call == "completed" + + self.elasticsearch.refresh(index="logs-generic-default") + res = self.elasticsearch.search( + index="logs-generic-default", + query={ + "bool": { + "must_not": { + "ids": { + "values": [ + f"{prefix_s3_first}-000000000000", + f"{prefix_sqs}-000000000000", + f"{prefix_cloudwatch_logs}-000000000000", + f"{prefix_kinesis}-000000000000", + ] + } + } + } + }, + sort="_seq_no", + ) + + assert res["hits"]["total"] == {"value": 2, "relation": "eq"} + + assert res["hits"]["hits"][1]["_source"]["message"] == fixtures[1].rstrip("\n") + assert res["hits"]["hits"][1]["_source"]["log"]["offset"] == 94 + assert res["hits"]["hits"][1]["_source"]["log"]["file"]["path"] == sqs_queue_url_path + assert res["hits"]["hits"][1]["_source"]["aws"]["sqs"]["name"] == sqs_queue_name + assert res["hits"]["hits"][1]["_source"]["aws"]["sqs"]["message_id"] == message_id + assert res["hits"]["hits"][1]["_source"]["cloud"]["provider"] == "aws" + assert res["hits"]["hits"][1]["_source"]["cloud"]["region"] == "us-east-1" + assert res["hits"]["hits"][1]["_source"]["cloud"]["account"]["id"] == "000000000000" + assert res["hits"]["hits"][1]["_source"]["tags"] == ["forwarded", "generic", "tag1", "tag2", "tag3"] + + logstash_message = self.logstash.get_messages(expected=0) + assert len(logstash_message) == 0 + + third_call = handler(events_cloudwatch_logs, ctx) # type:ignore + + assert third_call == "completed" + + self.elasticsearch.refresh(index="logs-generic-default") + res = self.elasticsearch.search( + index="logs-generic-default", + query={ + "bool": { + "must_not": { + "ids": { + "values": [ + f"{prefix_s3_first}-000000000000", + f"{prefix_sqs}-000000000000", + f"{prefix_cloudwatch_logs}-000000000000", + f"{prefix_kinesis}-000000000000", + ] + } + } + } + }, + sort="_seq_no", + ) + + assert res["hits"]["total"] == {"value": 3, "relation": "eq"} + + assert res["hits"]["hits"][2]["_source"]["message"] == fixtures[1].rstrip("\n") + assert res["hits"]["hits"][2]["_source"]["log"]["offset"] == 94 + assert ( + res["hits"]["hits"][2]["_source"]["log"]["file"]["path"] + == f"{cloudwatch_group_name}/{cloudwatch_stream_name}" + ) + assert res["hits"]["hits"][2]["_source"]["aws"]["cloudwatch"]["log_group"] == cloudwatch_group_name + assert res["hits"]["hits"][2]["_source"]["aws"]["cloudwatch"]["log_stream"] == cloudwatch_stream_name + assert res["hits"]["hits"][2]["_source"]["aws"]["cloudwatch"]["event_id"] == event_ids_cloudwatch_logs[0] + assert res["hits"]["hits"][2]["_source"]["cloud"]["provider"] == "aws" + assert res["hits"]["hits"][2]["_source"]["cloud"]["region"] == "us-east-1" + assert res["hits"]["hits"][2]["_source"]["cloud"]["account"]["id"] == "000000000000" + assert res["hits"]["hits"][2]["_source"]["tags"] == ["forwarded", "generic", "tag1", "tag2", "tag3"] + + logstash_message = self.logstash.get_messages(expected=0) + assert len(logstash_message) == 0 + + fourth_call = handler(events_kinesis, ctx) # type:ignore + + assert fourth_call == "completed" + + self.elasticsearch.refresh(index="logs-generic-default") + res = self.elasticsearch.search( + index="logs-generic-default", + query={ + "bool": { + "must_not": { + "ids": { + "values": [ + f"{prefix_s3_first}-000000000000", + f"{prefix_sqs}-000000000000", + f"{prefix_cloudwatch_logs}-000000000000", + f"{prefix_kinesis}-000000000000", + ] + } + } + } + }, + sort="_seq_no", + ) + + assert res["hits"]["total"] == {"value": 4, "relation": "eq"} + + assert res["hits"]["hits"][3]["_source"]["message"] == fixtures[1].rstrip("\n") + assert res["hits"]["hits"][3]["_source"]["log"]["offset"] == 94 + assert res["hits"]["hits"][3]["_source"]["log"]["file"]["path"] == kinesis_stream_arn + assert res["hits"]["hits"][3]["_source"]["aws"]["kinesis"]["type"] == "stream" + assert res["hits"]["hits"][3]["_source"]["aws"]["kinesis"]["partition_key"] == "PartitionKey" + assert res["hits"]["hits"][3]["_source"]["aws"]["kinesis"]["name"] == kinesis_stream_name + assert ( + res["hits"]["hits"][3]["_source"]["aws"]["kinesis"]["sequence_number"] + == events_kinesis["Records"][0]["kinesis"]["sequenceNumber"] + ) + assert res["hits"]["hits"][3]["_source"]["cloud"]["provider"] == "aws" + assert res["hits"]["hits"][3]["_source"]["cloud"]["region"] == "us-east-1" + assert res["hits"]["hits"][3]["_source"]["cloud"]["account"]["id"] == "000000000000" + assert res["hits"]["hits"][3]["_source"]["tags"] == ["forwarded", "generic", "tag1", "tag2", "tag3"] + + logstash_message = self.logstash.get_messages(expected=0) + assert len(logstash_message) == 0 + + replayed_events, _ = _sqs_get_messages(self.sqs_client, os.environ["SQS_REPLAY_URL"], self.sqs_replay_queue_arn) + with self.assertRaises(ReplayHandlerException): + handler(replayed_events, ctx) # type:ignore + + self.elasticsearch.refresh(index="logs-generic-default") + + # Remove the expected id for s3-sqs so that it can be replayed + self.elasticsearch.delete_by_query( + index="logs-generic-default", body={"query": {"ids": {"values": [f"{prefix_s3_first}-000000000000"]}}} + ) + + # Remove the expected id for sqs so that it can be replayed + self.elasticsearch.delete_by_query( + index="logs-generic-default", body={"query": {"ids": {"values": [f"{prefix_sqs}-000000000000"]}}} + ) + + # Remove the expected id for cloudwatch logs so that it can be replayed + self.elasticsearch.delete_by_query( + index="logs-generic-default", + body={"query": {"ids": {"values": [f"{prefix_cloudwatch_logs}-000000000000"]}}}, + ) + + # Remove the expected id for kinesis data stream so that it can be replayed + self.elasticsearch.delete_by_query( + index="logs-generic-default", + body={"query": {"ids": {"values": [f"{prefix_kinesis}-000000000000"]}}}, + ) + + self.elasticsearch.refresh(index="logs-generic-default") + + # let's update the config file so that logstash won't fail anymore + config_yaml = f""" + inputs: + - type: "kinesis-data-stream" + id: "{kinesis_stream_arn}" + tags: {self.default_tags} + outputs: {self.default_outputs} + - type: "cloudwatch-logs" + id: "{cloudwatch_group_arn}" + tags: {self.default_tags} + outputs: {self.default_outputs} + - type: sqs + id: "{sqs_queue_arn}" + tags: {self.default_tags} + outputs: {self.default_outputs} + - type: s3-sqs + id: "{s3_sqs_queue_arn}" + tags: {self.default_tags} + outputs: {self.default_outputs} + """ + + _s3_upload_content_to_bucket( + client=self.s3_client, + content=config_yaml.encode("utf-8"), + content_type="text/plain", + bucket_name=config_bucket_name, + key=config_file_path, + create_bucket=False, + ) + + ctx = ContextMock(remaining_time_in_millis=_REMAINING_TIME_FORCE_CONTINUE_0ms) + + # implicit wait for the message to be back on the queue + time.sleep(35) + replayed_events, _ = _sqs_get_messages(self.sqs_client, os.environ["SQS_REPLAY_URL"], self.sqs_replay_queue_arn) + fifth_call = handler(replayed_events, ctx) # type:ignore + + assert fifth_call == "replayed" + + self.elasticsearch.refresh(index="logs-generic-default") + assert self.elasticsearch.count(index="logs-generic-default")["count"] == 5 + + self.elasticsearch.refresh(index="logs-generic-default") + res = self.elasticsearch.search(index="logs-generic-default", sort="_seq_no") + + assert res["hits"]["total"] == {"value": 5, "relation": "eq"} + + assert res["hits"]["hits"][4]["_source"]["message"] == fixtures[0].rstrip("\n") + assert res["hits"]["hits"][4]["_source"]["log"]["offset"] == 0 + assert ( + res["hits"]["hits"][4]["_source"]["log"]["file"]["path"] + == f"https://{s3_bucket_name}.s3.eu-central-1.amazonaws.com/{first_filename}" + ) + assert res["hits"]["hits"][4]["_source"]["aws"]["s3"]["bucket"]["name"] == s3_bucket_name + assert res["hits"]["hits"][4]["_source"]["aws"]["s3"]["bucket"]["arn"] == f"arn:aws:s3:::{s3_bucket_name}" + assert res["hits"]["hits"][4]["_source"]["aws"]["s3"]["object"]["key"] == first_filename + assert res["hits"]["hits"][4]["_source"]["cloud"]["provider"] == "aws" + assert res["hits"]["hits"][4]["_source"]["cloud"]["region"] == "eu-central-1" + assert res["hits"]["hits"][4]["_source"]["cloud"]["account"]["id"] == "000000000000" + assert res["hits"]["hits"][4]["_source"]["tags"] == ["forwarded", "generic", "tag1", "tag2", "tag3"] + + logstash_message = self.logstash.get_messages(expected=0) + assert len(logstash_message) == 0 + + # implicit wait for the message to be back on the queue + time.sleep(35) + replayed_events, _ = _sqs_get_messages(self.sqs_client, os.environ["SQS_REPLAY_URL"], self.sqs_replay_queue_arn) + sixth_call = handler(replayed_events, ctx) # type:ignore + + assert sixth_call == "replayed" + + self.elasticsearch.refresh(index="logs-generic-default") + assert self.elasticsearch.count(index="logs-generic-default")["count"] == 5 + + res = self.elasticsearch.search(index="logs-generic-default", sort="_seq_no") + assert res["hits"]["total"] == {"value": 5, "relation": "eq"} + + logstash_message = self.logstash.get_messages(expected=1) + assert len(logstash_message) == 1 + # positions on res["hits"]["hits"] are skewed compared to logstash_message + # in elasticsearch we inserted the second event of each input before the first one + res["hits"]["hits"][4]["_source"]["tags"].remove("generic") + assert res["hits"]["hits"][4]["_source"]["aws"] == logstash_message[0]["aws"] + assert res["hits"]["hits"][4]["_source"]["cloud"] == logstash_message[0]["cloud"] + assert res["hits"]["hits"][4]["_source"]["log"] == logstash_message[0]["log"] + assert res["hits"]["hits"][4]["_source"]["message"] == logstash_message[0]["message"] + assert res["hits"]["hits"][4]["_source"]["tags"] == logstash_message[0]["tags"] + + ctx = ContextMock(remaining_time_in_millis=_OVER_COMPLETION_GRACE_PERIOD_2m) + + # implicit wait for the message to be back on the queue + time.sleep(35) + replayed_events, _ = _sqs_get_messages(self.sqs_client, os.environ["SQS_REPLAY_URL"], self.sqs_replay_queue_arn) + seventh_call = handler(replayed_events, ctx) # type:ignore + + assert seventh_call == "replayed" + + self.elasticsearch.refresh(index="logs-generic-default") + assert self.elasticsearch.count(index="logs-generic-default")["count"] == 8 + + self.elasticsearch.refresh(index="logs-generic-default") + res = self.elasticsearch.search(index="logs-generic-default", sort="_seq_no") + + assert res["hits"]["total"] == {"value": 8, "relation": "eq"} + + assert res["hits"]["hits"][5]["_source"]["message"] == fixtures[0].rstrip("\n") + assert res["hits"]["hits"][5]["_source"]["log"]["offset"] == 0 + assert res["hits"]["hits"][5]["_source"]["log"]["file"]["path"] == sqs_queue_url_path + assert res["hits"]["hits"][5]["_source"]["aws"]["sqs"]["name"] == sqs_queue_name + assert res["hits"]["hits"][5]["_source"]["aws"]["sqs"]["message_id"] == message_id + assert res["hits"]["hits"][5]["_source"]["cloud"]["provider"] == "aws" + assert res["hits"]["hits"][5]["_source"]["cloud"]["region"] == "us-east-1" + assert res["hits"]["hits"][5]["_source"]["cloud"]["account"]["id"] == "000000000000" + assert res["hits"]["hits"][5]["_source"]["tags"] == ["forwarded", "generic", "tag1", "tag2", "tag3"] + + assert res["hits"]["hits"][6]["_source"]["message"] == fixtures[0].rstrip("\n") + assert res["hits"]["hits"][6]["_source"]["log"]["offset"] == 0 + assert ( + res["hits"]["hits"][6]["_source"]["log"]["file"]["path"] + == f"{cloudwatch_group_name}/{cloudwatch_stream_name}" + ) + assert res["hits"]["hits"][6]["_source"]["aws"]["cloudwatch"]["log_group"] == cloudwatch_group_name + assert res["hits"]["hits"][6]["_source"]["aws"]["cloudwatch"]["log_stream"] == cloudwatch_stream_name + assert res["hits"]["hits"][6]["_source"]["aws"]["cloudwatch"]["event_id"] == event_ids_cloudwatch_logs[0] + assert res["hits"]["hits"][6]["_source"]["cloud"]["provider"] == "aws" + assert res["hits"]["hits"][6]["_source"]["cloud"]["region"] == "us-east-1" + assert res["hits"]["hits"][6]["_source"]["cloud"]["account"]["id"] == "000000000000" + assert res["hits"]["hits"][6]["_source"]["tags"] == ["forwarded", "generic", "tag1", "tag2", "tag3"] + + assert res["hits"]["hits"][7]["_source"]["message"] == fixtures[0].rstrip("\n") + assert res["hits"]["hits"][7]["_source"]["log"]["offset"] == 0 + assert res["hits"]["hits"][7]["_source"]["log"]["file"]["path"] == kinesis_stream_arn + assert res["hits"]["hits"][7]["_source"]["aws"]["kinesis"]["type"] == "stream" + assert res["hits"]["hits"][7]["_source"]["aws"]["kinesis"]["partition_key"] == "PartitionKey" + assert res["hits"]["hits"][7]["_source"]["aws"]["kinesis"]["name"] == kinesis_stream_name + assert ( + res["hits"]["hits"][7]["_source"]["aws"]["kinesis"]["sequence_number"] + == events_kinesis["Records"][0]["kinesis"]["sequenceNumber"] + ) + assert res["hits"]["hits"][7]["_source"]["cloud"]["provider"] == "aws" + assert res["hits"]["hits"][7]["_source"]["cloud"]["region"] == "us-east-1" + assert res["hits"]["hits"][7]["_source"]["cloud"]["account"]["id"] == "000000000000" + assert res["hits"]["hits"][7]["_source"]["tags"] == ["forwarded", "generic", "tag1", "tag2", "tag3"] + + logstash_message = self.logstash.get_messages(expected=8) + assert len(logstash_message) == 8 + res["hits"]["hits"][0]["_source"]["tags"].remove("generic") + res["hits"]["hits"][1]["_source"]["tags"].remove("generic") + res["hits"]["hits"][2]["_source"]["tags"].remove("generic") + res["hits"]["hits"][3]["_source"]["tags"].remove("generic") + res["hits"]["hits"][5]["_source"]["tags"].remove("generic") + res["hits"]["hits"][6]["_source"]["tags"].remove("generic") + res["hits"]["hits"][7]["_source"]["tags"].remove("generic") + + # positions on res["hits"]["hits"] are skewed compared to logstash_message + # in elasticsearch we inserted the second event of each input before the first one + assert res["hits"]["hits"][0]["_source"]["aws"] == logstash_message[1]["aws"] + assert res["hits"]["hits"][0]["_source"]["cloud"] == logstash_message[1]["cloud"] + assert res["hits"]["hits"][0]["_source"]["log"] == logstash_message[1]["log"] + assert res["hits"]["hits"][0]["_source"]["message"] == logstash_message[1]["message"] + assert res["hits"]["hits"][0]["_source"]["tags"] == logstash_message[1]["tags"] + + assert res["hits"]["hits"][5]["_source"]["aws"] == logstash_message[2]["aws"] + assert res["hits"]["hits"][5]["_source"]["cloud"] == logstash_message[2]["cloud"] + assert res["hits"]["hits"][5]["_source"]["log"] == logstash_message[2]["log"] + assert res["hits"]["hits"][5]["_source"]["message"] == logstash_message[2]["message"] + assert res["hits"]["hits"][5]["_source"]["tags"] == logstash_message[2]["tags"] + + assert res["hits"]["hits"][1]["_source"]["aws"] == logstash_message[3]["aws"] + assert res["hits"]["hits"][1]["_source"]["cloud"] == logstash_message[3]["cloud"] + assert res["hits"]["hits"][1]["_source"]["log"] == logstash_message[3]["log"] + assert res["hits"]["hits"][1]["_source"]["message"] == logstash_message[3]["message"] + assert res["hits"]["hits"][1]["_source"]["tags"] == logstash_message[3]["tags"] + + assert res["hits"]["hits"][6]["_source"]["aws"] == logstash_message[4]["aws"] + assert res["hits"]["hits"][6]["_source"]["cloud"] == logstash_message[4]["cloud"] + assert res["hits"]["hits"][6]["_source"]["log"] == logstash_message[4]["log"] + assert res["hits"]["hits"][6]["_source"]["message"] == logstash_message[4]["message"] + assert res["hits"]["hits"][6]["_source"]["tags"] == logstash_message[4]["tags"] + + assert res["hits"]["hits"][2]["_source"]["aws"] == logstash_message[5]["aws"] + assert res["hits"]["hits"][2]["_source"]["cloud"] == logstash_message[5]["cloud"] + assert res["hits"]["hits"][2]["_source"]["log"] == logstash_message[5]["log"] + assert res["hits"]["hits"][2]["_source"]["message"] == logstash_message[5]["message"] + assert res["hits"]["hits"][2]["_source"]["tags"] == logstash_message[5]["tags"] + + assert res["hits"]["hits"][7]["_source"]["aws"] == logstash_message[6]["aws"] + assert res["hits"]["hits"][7]["_source"]["cloud"] == logstash_message[6]["cloud"] + assert res["hits"]["hits"][7]["_source"]["log"] == logstash_message[6]["log"] + assert res["hits"]["hits"][7]["_source"]["message"] == logstash_message[6]["message"] + assert res["hits"]["hits"][7]["_source"]["tags"] == logstash_message[6]["tags"] + + assert res["hits"]["hits"][3]["_source"]["aws"] == logstash_message[7]["aws"] + assert res["hits"]["hits"][3]["_source"]["cloud"] == logstash_message[7]["cloud"] + assert res["hits"]["hits"][3]["_source"]["log"] == logstash_message[7]["log"] + assert res["hits"]["hits"][3]["_source"]["message"] == logstash_message[7]["message"] + assert res["hits"]["hits"][3]["_source"]["tags"] == logstash_message[7]["tags"] + + def test_empty(self) -> None: + assert isinstance(self.elasticsearch, ElasticsearchContainer) + assert isinstance(self.logstash, LogstashContainer) + assert isinstance(self.localstack, LocalStackContainer) + + fixtures = [" \n"] # once stripped it is an empty event + + s3_bucket_name = _time_based_id(suffix="test-bucket") + first_filename = "exportedlog/uuid/yyyy-mm-dd-[$LATEST]hash/000000.gz" + _s3_upload_content_to_bucket( + client=self.s3_client, + content=gzip.compress("".join(fixtures).encode("utf-8")), + content_type="application/x-gzip", + bucket_name=s3_bucket_name, + key=first_filename, + ) + + cloudwatch_group_name = _time_based_id(suffix="source-group") + cloudwatch_group = _logs_create_cloudwatch_logs_group(self.logs_client, group_name=cloudwatch_group_name) + + cloudwatch_stream_name = _time_based_id(suffix="source-stream") + _logs_create_cloudwatch_logs_stream( + self.logs_client, group_name=cloudwatch_group_name, stream_name=cloudwatch_stream_name + ) + + _logs_upload_event_to_cloudwatch_logs( + self.logs_client, + group_name=cloudwatch_group_name, + stream_name=cloudwatch_stream_name, + messages_body=["".join(fixtures)], + ) + + cloudwatch_group_arn = cloudwatch_group["arn"] + + cloudwatch_group_name = cloudwatch_group_name + cloudwatch_stream_name = cloudwatch_stream_name + + sqs_queue_name = _time_based_id(suffix="source-sqs") + s3_sqs_queue_name = _time_based_id(suffix="source-s3-sqs") + + sqs_queue = _sqs_create_queue(self.sqs_client, sqs_queue_name, self.localstack.get_url()) + s3_sqs_queue = _sqs_create_queue(self.sqs_client, s3_sqs_queue_name, self.localstack.get_url()) + + sqs_queue_arn = sqs_queue["QueueArn"] + sqs_queue_url = sqs_queue["QueueUrl"] + + s3_sqs_queue_arn = s3_sqs_queue["QueueArn"] + s3_sqs_queue_url = s3_sqs_queue["QueueUrl"] + + _sqs_send_messages(self.sqs_client, sqs_queue_url, "".join(fixtures)) + _sqs_send_s3_notifications(self.sqs_client, s3_sqs_queue_url, s3_bucket_name, [first_filename]) + + kinesis_stream_name = _time_based_id(suffix="source-kinesis") + kinesis_stream = _kinesis_create_stream(self.kinesis_client, kinesis_stream_name) + kinesis_stream_arn = kinesis_stream["StreamDescription"]["StreamARN"] + + _kinesis_put_records(self.kinesis_client, kinesis_stream_name, ["".join(fixtures)]) + + config_yaml: str = f""" + inputs: + - type: "kinesis-data-stream" + id: "{kinesis_stream_arn}" + outputs: {self.default_outputs} + - type: "cloudwatch-logs" + id: "{cloudwatch_group_arn}" + outputs: {self.default_outputs} + - type: sqs + id: "{sqs_queue_arn}" + outputs: {self.default_outputs} + - type: s3-sqs + id: "{s3_sqs_queue_arn}" + outputs: {self.default_outputs} + """ + + config_file_path = "config.yaml" + config_bucket_name = _time_based_id(suffix="config-bucket") + _s3_upload_content_to_bucket( + client=self.s3_client, + content=config_yaml.encode("utf-8"), + content_type="text/plain", + bucket_name=config_bucket_name, + key=config_file_path, + ) + + os.environ["S3_CONFIG_FILE"] = f"s3://{config_bucket_name}/{config_file_path}" + + events_s3, _ = _sqs_get_messages(self.sqs_client, s3_sqs_queue_url, s3_sqs_queue_arn) + + events_sqs, _ = _sqs_get_messages(self.sqs_client, sqs_queue_url, sqs_queue_arn) + + events_cloudwatch_logs, _, _ = _logs_retrieve_event_from_cloudwatch_logs( + self.logs_client, cloudwatch_group_name, cloudwatch_stream_name + ) + + events_kinesis, _ = _kinesis_retrieve_event_from_kinesis_stream( + self.kinesis_client, kinesis_stream_name, kinesis_stream_arn + ) + + ctx = ContextMock(remaining_time_in_millis=_OVER_COMPLETION_GRACE_PERIOD_2m) + first_call = handler(events_s3, ctx) # type:ignore + + assert first_call == "completed" + + self.elasticsearch.refresh(index="logs-generic-default", ignore_unavailable=True) + assert self.elasticsearch.count(index="logs-generic-default", ignore_unavailable=True)["count"] == 0 + + logstash_message = self.logstash.get_messages(expected=0) + assert len(logstash_message) == 0 + + second_call = handler(events_sqs, ctx) # type:ignore + + assert second_call == "completed" + + self.elasticsearch.refresh(index="logs-generic-default", ignore_unavailable=True) + assert self.elasticsearch.count(index="logs-generic-default", ignore_unavailable=True)["count"] == 0 + + logstash_message = self.logstash.get_messages(expected=0) + assert len(logstash_message) == 0 + + third_call = handler(events_cloudwatch_logs, ctx) # type:ignore + + assert third_call == "completed" + + self.elasticsearch.refresh(index="logs-generic-default", ignore_unavailable=True) + assert self.elasticsearch.count(index="logs-generic-default", ignore_unavailable=True)["count"] == 0 + + logstash_message = self.logstash.get_messages(expected=0) + assert len(logstash_message) == 0 + + fourth_call = handler(events_kinesis, ctx) # type:ignore + + assert fourth_call == "completed" + + self.elasticsearch.refresh(index="logs-generic-default", ignore_unavailable=True) + assert self.elasticsearch.count(index="logs-generic-default", ignore_unavailable=True)["count"] == 0 + + logstash_message = self.logstash.get_messages(expected=0) + assert len(logstash_message) == 0 + + def test_filtered(self) -> None: + assert isinstance(self.elasticsearch, ElasticsearchContainer) + assert isinstance(self.logstash, LogstashContainer) + assert isinstance(self.localstack, LocalStackContainer) + + fixtures = ["excluded"] + + s3_bucket_name = _time_based_id(suffix="test-bucket") + first_filename = "exportedlog/uuid/yyyy-mm-dd-[$LATEST]hash/000000.gz" + _s3_upload_content_to_bucket( + client=self.s3_client, + content=gzip.compress("".join(fixtures).encode("utf-8")), + content_type="application/x-gzip", + bucket_name=s3_bucket_name, + key=first_filename, + ) + + cloudwatch_group_name = _time_based_id(suffix="source-group") + cloudwatch_group = _logs_create_cloudwatch_logs_group(self.logs_client, group_name=cloudwatch_group_name) + + cloudwatch_stream_name = _time_based_id(suffix="source-stream") + _logs_create_cloudwatch_logs_stream( + self.logs_client, group_name=cloudwatch_group_name, stream_name=cloudwatch_stream_name + ) + + _logs_upload_event_to_cloudwatch_logs( + self.logs_client, + group_name=cloudwatch_group_name, + stream_name=cloudwatch_stream_name, + messages_body=["".join(fixtures)], + ) + + cloudwatch_group_arn = cloudwatch_group["arn"] + + cloudwatch_group_name = cloudwatch_group_name + cloudwatch_stream_name = cloudwatch_stream_name + + sqs_queue_name = _time_based_id(suffix="source-sqs") + s3_sqs_queue_name = _time_based_id(suffix="source-s3-sqs") + + sqs_queue = _sqs_create_queue(self.sqs_client, sqs_queue_name, self.localstack.get_url()) + s3_sqs_queue = _sqs_create_queue(self.sqs_client, s3_sqs_queue_name, self.localstack.get_url()) + + sqs_queue_arn = sqs_queue["QueueArn"] + sqs_queue_url = sqs_queue["QueueUrl"] + + s3_sqs_queue_arn = s3_sqs_queue["QueueArn"] + s3_sqs_queue_url = s3_sqs_queue["QueueUrl"] + + _sqs_send_messages(self.sqs_client, sqs_queue_url, "".join(fixtures)) + _sqs_send_s3_notifications(self.sqs_client, s3_sqs_queue_url, s3_bucket_name, [first_filename]) + + kinesis_stream_name = _time_based_id(suffix="source-kinesis") + kinesis_stream = _kinesis_create_stream(self.kinesis_client, kinesis_stream_name) + kinesis_stream_arn = kinesis_stream["StreamDescription"]["StreamARN"] + + _kinesis_put_records(self.kinesis_client, kinesis_stream_name, ["".join(fixtures)]) + + config_yaml: str = f""" + inputs: + - type: "kinesis-data-stream" + id: "{kinesis_stream_arn}" + exclude: + - "excluded" + outputs: {self.default_outputs} + - type: "cloudwatch-logs" + id: "{cloudwatch_group_arn}" + exclude: + - "excluded" + outputs: {self.default_outputs} + - type: sqs + id: "{sqs_queue_arn}" + exclude: + - "excluded" + outputs: {self.default_outputs} + - type: s3-sqs + id: "{s3_sqs_queue_arn}" + exclude: + - "excluded" + outputs: {self.default_outputs} + """ + + config_file_path = "config.yaml" + config_bucket_name = _time_based_id(suffix="config-bucket") + _s3_upload_content_to_bucket( + client=self.s3_client, + content=config_yaml.encode("utf-8"), + content_type="text/plain", + bucket_name=config_bucket_name, + key=config_file_path, + ) + + os.environ["S3_CONFIG_FILE"] = f"s3://{config_bucket_name}/{config_file_path}" + + events_s3, _ = _sqs_get_messages(self.sqs_client, s3_sqs_queue_url, s3_sqs_queue_arn) + + events_sqs, _ = _sqs_get_messages(self.sqs_client, sqs_queue_url, sqs_queue_arn) + + events_cloudwatch_logs, _, _ = _logs_retrieve_event_from_cloudwatch_logs( + self.logs_client, cloudwatch_group_name, cloudwatch_stream_name + ) + + events_kinesis, _ = _kinesis_retrieve_event_from_kinesis_stream( + self.kinesis_client, kinesis_stream_name, kinesis_stream_arn + ) + + ctx = ContextMock(remaining_time_in_millis=_OVER_COMPLETION_GRACE_PERIOD_2m) + first_call = handler(events_s3, ctx) # type:ignore + + assert first_call == "completed" + + self.elasticsearch.refresh(index="logs-generic-default", ignore_unavailable=True) + assert self.elasticsearch.count(index="logs-generic-default", ignore_unavailable=True)["count"] == 0 + + logstash_message = self.logstash.get_messages(expected=0) + assert len(logstash_message) == 0 + + second_call = handler(events_sqs, ctx) # type:ignore + + assert second_call == "completed" + + self.elasticsearch.refresh(index="logs-generic-default", ignore_unavailable=True) + assert self.elasticsearch.count(index="logs-generic-default", ignore_unavailable=True)["count"] == 0 + + logstash_message = self.logstash.get_messages(expected=0) + assert len(logstash_message) == 0 + + third_call = handler(events_cloudwatch_logs, ctx) # type:ignore + + assert third_call == "completed" + + self.elasticsearch.refresh(index="logs-generic-default", ignore_unavailable=True) + assert self.elasticsearch.count(index="logs-generic-default", ignore_unavailable=True)["count"] == 0 + + logstash_message = self.logstash.get_messages(expected=0) + assert len(logstash_message) == 0 + + fourth_call = handler(events_kinesis, ctx) # type:ignore + + assert fourth_call == "completed" + + self.elasticsearch.refresh(index="logs-generic-default", ignore_unavailable=True) + assert self.elasticsearch.count(index="logs-generic-default", ignore_unavailable=True)["count"] == 0 + + logstash_message = self.logstash.get_messages(expected=0) + assert len(logstash_message) == 0 + + def test_expand_event_from_list_empty_line(self) -> None: + assert isinstance(self.logstash, LogstashContainer) + assert isinstance(self.localstack, LocalStackContainer) + + first_expanded_event: str = _load_file_fixture("cloudwatch-log-1.json") + second_expanded_event: str = _load_file_fixture("cloudwatch-log-2.json") + third_expanded_event: str = _load_file_fixture("cloudwatch-log-3.json") + + fixtures = [ + f"""{{"aField": [{first_expanded_event},{second_expanded_event}]}}\n""" + f"""\n{{"aField": [{third_expanded_event}]}}""" + ] + + sqs_queue_name = _time_based_id(suffix="source-sqs") + + sqs_queue = _sqs_create_queue(self.sqs_client, sqs_queue_name, self.localstack.get_url()) + + sqs_queue_arn = sqs_queue["QueueArn"] + sqs_queue_url = sqs_queue["QueueUrl"] + sqs_queue_url_path = sqs_queue["QueueUrlPath"] + + _sqs_send_messages(self.sqs_client, sqs_queue_url, "".join(fixtures)) + + config_yaml: str = f""" + inputs: + - type: "sqs" + id: "{sqs_queue_arn}" + expand_event_list_from_field: aField + tags: {self.default_tags} + outputs: + - type: "logstash" + args: + logstash_url: "{self.logstash.get_url()}" + ssl_assert_fingerprint: {self.logstash.ssl_assert_fingerprint} + username: "{self.logstash.logstash_user}" + password: "{self.logstash.logstash_password}" + """ + + config_file_path = "config.yaml" + config_bucket_name = _time_based_id(suffix="config-bucket") + _s3_upload_content_to_bucket( + client=self.s3_client, + content=config_yaml.encode("utf-8"), + content_type="text/plain", + bucket_name=config_bucket_name, + key=config_file_path, + ) + + os.environ["S3_CONFIG_FILE"] = f"s3://{config_bucket_name}/{config_file_path}" + + events_sqs, _ = _sqs_get_messages(self.sqs_client, sqs_queue_url, sqs_queue_arn) + + message_id = events_sqs["Records"][0]["messageId"] + + ctx = ContextMock(remaining_time_in_millis=_OVER_COMPLETION_GRACE_PERIOD_2m) + + first_call = handler(events_sqs, ctx) # type:ignore + + assert first_call == "completed" + + logstash_message = self.logstash.get_messages(expected=3) + assert len(logstash_message) == 3 + + assert logstash_message[0]["message"] == json_dumper(json_parser(first_expanded_event)) + assert logstash_message[0]["log"]["offset"] == 0 + assert logstash_message[0]["log"]["file"]["path"] == sqs_queue_url_path + assert logstash_message[0]["aws"]["sqs"]["name"] == sqs_queue_name + assert logstash_message[0]["aws"]["sqs"]["message_id"] == message_id + assert logstash_message[0]["cloud"]["provider"] == "aws" + assert logstash_message[0]["cloud"]["region"] == "us-east-1" + assert logstash_message[0]["cloud"]["account"]["id"] == "000000000000" + assert logstash_message[0]["tags"] == ["forwarded", "tag1", "tag2", "tag3"] + + assert logstash_message[1]["message"] == json_dumper(json_parser(second_expanded_event)) + assert logstash_message[1]["log"]["offset"] == 174 + assert logstash_message[1]["log"]["file"]["path"] == sqs_queue_url_path + assert logstash_message[1]["aws"]["sqs"]["name"] == sqs_queue_name + assert logstash_message[1]["aws"]["sqs"]["message_id"] == message_id + assert logstash_message[1]["cloud"]["provider"] == "aws" + assert logstash_message[1]["cloud"]["region"] == "us-east-1" + assert logstash_message[1]["cloud"]["account"]["id"] == "000000000000" + assert logstash_message[1]["tags"] == ["forwarded", "tag1", "tag2", "tag3"] + + assert logstash_message[2]["message"] == json_dumper(json_parser(third_expanded_event)) + assert logstash_message[2]["log"]["offset"] == 349 + assert logstash_message[2]["log"]["file"]["path"] == sqs_queue_url_path + assert logstash_message[2]["aws"]["sqs"]["name"] == sqs_queue_name + assert logstash_message[2]["aws"]["sqs"]["message_id"] == message_id + assert logstash_message[2]["cloud"]["provider"] == "aws" + assert logstash_message[2]["cloud"]["region"] == "us-east-1" + assert logstash_message[2]["cloud"]["account"]["id"] == "000000000000" + assert logstash_message[2]["tags"] == ["forwarded", "tag1", "tag2", "tag3"] + + def test_expand_event_from_list_empty_event_not_expanded(self) -> None: + assert isinstance(self.logstash, LogstashContainer) + assert isinstance(self.localstack, LocalStackContainer) + + first_expanded_event: str = _load_file_fixture("cloudwatch-log-1.json") + second_expanded_event: str = _load_file_fixture("cloudwatch-log-2.json") + + fixtures = [f"""{{"aField": [{first_expanded_event},"",{second_expanded_event}]}}"""] + + sqs_queue_name = _time_based_id(suffix="source-sqs") + + sqs_queue = _sqs_create_queue(self.sqs_client, sqs_queue_name, self.localstack.get_url()) + + sqs_queue_arn = sqs_queue["QueueArn"] + sqs_queue_url = sqs_queue["QueueUrl"] + sqs_queue_url_path = sqs_queue["QueueUrlPath"] + + _sqs_send_messages(self.sqs_client, sqs_queue_url, "".join(fixtures)) + + config_yaml: str = f""" + inputs: + - type: "sqs" + id: "{sqs_queue_arn}" + expand_event_list_from_field: aField + tags: {self.default_tags} + outputs: + - type: "logstash" + args: + logstash_url: "{self.logstash.get_url()}" + ssl_assert_fingerprint: {self.logstash.ssl_assert_fingerprint} + username: "{self.logstash.logstash_user}" + password: "{self.logstash.logstash_password}" + """ + + config_file_path = "config.yaml" + config_bucket_name = _time_based_id(suffix="config-bucket") + _s3_upload_content_to_bucket( + client=self.s3_client, + content=config_yaml.encode("utf-8"), + content_type="text/plain", + bucket_name=config_bucket_name, + key=config_file_path, + ) + + os.environ["S3_CONFIG_FILE"] = f"s3://{config_bucket_name}/{config_file_path}" + + events_sqs, _ = _sqs_get_messages(self.sqs_client, sqs_queue_url, sqs_queue_arn) + + message_id = events_sqs["Records"][0]["messageId"] + + ctx = ContextMock(remaining_time_in_millis=_OVER_COMPLETION_GRACE_PERIOD_2m) + + first_call = handler(events_sqs, ctx) # type:ignore + + assert first_call == "completed" + + logstash_message = self.logstash.get_messages(expected=2) + assert len(logstash_message) == 2 + + assert logstash_message[0]["message"] == json_dumper(json_parser(first_expanded_event)) + assert logstash_message[0]["log"]["offset"] == 0 + assert logstash_message[0]["log"]["file"]["path"] == sqs_queue_url_path + assert logstash_message[0]["aws"]["sqs"]["name"] == sqs_queue_name + assert logstash_message[0]["aws"]["sqs"]["message_id"] == message_id + assert logstash_message[0]["cloud"]["provider"] == "aws" + assert logstash_message[0]["cloud"]["region"] == "us-east-1" + assert logstash_message[0]["cloud"]["account"]["id"] == "000000000000" + assert logstash_message[0]["tags"] == ["forwarded", "tag1", "tag2", "tag3"] + + assert logstash_message[1]["message"] == json_dumper(json_parser(second_expanded_event)) + assert logstash_message[1]["log"]["offset"] == 233 + assert logstash_message[1]["log"]["file"]["path"] == sqs_queue_url_path + assert logstash_message[1]["aws"]["sqs"]["name"] == sqs_queue_name + assert logstash_message[1]["aws"]["sqs"]["message_id"] == message_id + assert logstash_message[1]["cloud"]["provider"] == "aws" + assert logstash_message[1]["cloud"]["region"] == "us-east-1" + assert logstash_message[1]["cloud"]["account"]["id"] == "000000000000" + assert logstash_message[1]["tags"] == ["forwarded", "tag1", "tag2", "tag3"] + + def test_root_fields_to_add_to_expanded_event_no_dict_event(self) -> None: + assert isinstance(self.logstash, LogstashContainer) + assert isinstance(self.localstack, LocalStackContainer) + + first_expanded_event: str = '"first_expanded_event"' + second_expanded_event: str = '"second_expanded_event"' + third_expanded_event: str = '"third_expanded_event"' + + fixtures = [ + f"""{{"firstRootField": "firstRootField", "secondRootField":"secondRootField", + "aField": [{first_expanded_event},{second_expanded_event},{third_expanded_event}]}}""" + ] + + sqs_queue_name = _time_based_id(suffix="source-sqs") + + sqs_queue = _sqs_create_queue(self.sqs_client, sqs_queue_name, self.localstack.get_url()) + + sqs_queue_arn = sqs_queue["QueueArn"] + sqs_queue_url = sqs_queue["QueueUrl"] + sqs_queue_url_path = sqs_queue["QueueUrlPath"] + + _sqs_send_messages(self.sqs_client, sqs_queue_url, "".join(fixtures)) + + config_yaml: str = f""" + inputs: + - type: "sqs" + id: "{sqs_queue_arn}" + expand_event_list_from_field: aField + root_fields_to_add_to_expanded_event: ["secondRootField"] + tags: {self.default_tags} + outputs: + - type: "logstash" + args: + logstash_url: "{self.logstash.get_url()}" + ssl_assert_fingerprint: {self.logstash.ssl_assert_fingerprint} + username: "{self.logstash.logstash_user}" + password: "{self.logstash.logstash_password}" + """ + + config_file_path = "config.yaml" + config_bucket_name = _time_based_id(suffix="config-bucket") + _s3_upload_content_to_bucket( + client=self.s3_client, + content=config_yaml.encode("utf-8"), + content_type="text/plain", + bucket_name=config_bucket_name, + key=config_file_path, + ) + + os.environ["S3_CONFIG_FILE"] = f"s3://{config_bucket_name}/{config_file_path}" + + events_sqs, _ = _sqs_get_messages(self.sqs_client, sqs_queue_url, sqs_queue_arn) + + message_id = events_sqs["Records"][0]["messageId"] + + ctx = ContextMock() + first_call = handler(events_sqs, ctx) # type:ignore + + assert first_call == "continuing" + + logstash_message = self.logstash.get_messages(expected=1) + assert len(logstash_message) == 1 + + assert logstash_message[0]["message"] == first_expanded_event + assert logstash_message[0]["log"]["offset"] == 0 + assert logstash_message[0]["log"]["file"]["path"] == sqs_queue_url_path + assert logstash_message[0]["aws"]["sqs"]["name"] == sqs_queue_name + assert logstash_message[0]["aws"]["sqs"]["message_id"] == message_id + assert logstash_message[0]["cloud"]["provider"] == "aws" + assert logstash_message[0]["cloud"]["region"] == "us-east-1" + assert logstash_message[0]["cloud"]["account"]["id"] == "000000000000" + assert logstash_message[0]["tags"] == ["forwarded", "tag1", "tag2", "tag3"] + + ctx = ContextMock(remaining_time_in_millis=_OVER_COMPLETION_GRACE_PERIOD_2m) + + continued_events, _ = _sqs_get_messages( + self.sqs_client, os.environ["SQS_CONTINUE_URL"], self.sqs_continue_queue_arn + ) + second_call = handler(continued_events, ctx) # type:ignore + + assert second_call == "completed" + + logstash_message = self.logstash.get_messages(expected=3) + assert len(logstash_message) == 3 + + assert logstash_message[1]["message"] == second_expanded_event + assert logstash_message[1]["log"]["offset"] == 56 + assert logstash_message[1]["log"]["file"]["path"] == sqs_queue_url_path + assert logstash_message[1]["aws"]["sqs"]["name"] == sqs_queue_name + assert logstash_message[1]["aws"]["sqs"]["message_id"] == message_id + assert logstash_message[1]["cloud"]["provider"] == "aws" + assert logstash_message[1]["cloud"]["region"] == "us-east-1" + assert logstash_message[1]["cloud"]["account"]["id"] == "000000000000" + assert logstash_message[1]["tags"] == ["forwarded", "tag1", "tag2", "tag3"] + + assert logstash_message[2]["message"] == third_expanded_event + assert logstash_message[2]["log"]["offset"] == 112 + assert logstash_message[2]["log"]["file"]["path"] == sqs_queue_url_path + assert logstash_message[2]["aws"]["sqs"]["name"] == sqs_queue_name + assert logstash_message[2]["aws"]["sqs"]["message_id"] == message_id + assert logstash_message[2]["cloud"]["provider"] == "aws" + assert logstash_message[2]["cloud"]["region"] == "us-east-1" + assert logstash_message[2]["cloud"]["account"]["id"] == "000000000000" + assert logstash_message[2]["tags"] == ["forwarded", "tag1", "tag2", "tag3"] + + def test_root_fields_to_add_to_expanded_event_event_not_expanded(self) -> None: + assert isinstance(self.logstash, LogstashContainer) + assert isinstance(self.localstack, LocalStackContainer) + + first_expanded_event: str = _load_file_fixture("cloudwatch-log-1.json") + first_expanded_with_root_fields: dict[str, Any] = json_parser(first_expanded_event) + first_expanded_with_root_fields["secondRootField"] = "secondRootField" + + second_expanded_event: str = _load_file_fixture("cloudwatch-log-3.json") + second_expanded_with_root_fields: dict[str, Any] = json_parser(second_expanded_event) + second_expanded_with_root_fields["secondRootField"] = "secondRootField" + + fixtures = [ + f"""{{"firstRootField": "firstRootField", "secondRootField":"secondRootField", + "aField": [{first_expanded_event},{{}},{second_expanded_event}]}}""" + ] + + sqs_queue_name = _time_based_id(suffix="source-sqs") + + sqs_queue = _sqs_create_queue(self.sqs_client, sqs_queue_name, self.localstack.get_url()) + + sqs_queue_arn = sqs_queue["QueueArn"] + sqs_queue_url = sqs_queue["QueueUrl"] + sqs_queue_url_path = sqs_queue["QueueUrlPath"] + + _sqs_send_messages(self.sqs_client, sqs_queue_url, "".join(fixtures)) + + config_yaml: str = f""" + inputs: + - type: "sqs" + id: "{sqs_queue_arn}" + expand_event_list_from_field: aField + root_fields_to_add_to_expanded_event: ["secondRootField"] + tags: {self.default_tags} + outputs: + - type: "logstash" + args: + logstash_url: "{self.logstash.get_url()}" + ssl_assert_fingerprint: {self.logstash.ssl_assert_fingerprint} + username: "{self.logstash.logstash_user}" + password: "{self.logstash.logstash_password}" + """ + + config_file_path = "config.yaml" + config_bucket_name = _time_based_id(suffix="config-bucket") + _s3_upload_content_to_bucket( + client=self.s3_client, + content=config_yaml.encode("utf-8"), + content_type="text/plain", + bucket_name=config_bucket_name, + key=config_file_path, + ) + + os.environ["S3_CONFIG_FILE"] = f"s3://{config_bucket_name}/{config_file_path}" + + events_sqs, _ = _sqs_get_messages(self.sqs_client, sqs_queue_url, sqs_queue_arn) + + message_id = events_sqs["Records"][0]["messageId"] + + ctx = ContextMock(remaining_time_in_millis=_OVER_COMPLETION_GRACE_PERIOD_2m) + + first_call = handler(events_sqs, ctx) # type:ignore + + assert first_call == "completed" + + logstash_message = self.logstash.get_messages(expected=2) + assert len(logstash_message) == 2 + + assert logstash_message[0]["message"] == json_dumper(first_expanded_with_root_fields) + assert logstash_message[0]["log"]["offset"] == 0 + assert logstash_message[0]["log"]["file"]["path"] == sqs_queue_url_path + assert logstash_message[0]["aws"]["sqs"]["name"] == sqs_queue_name + assert logstash_message[0]["aws"]["sqs"]["message_id"] == message_id + assert logstash_message[0]["cloud"]["provider"] == "aws" + assert logstash_message[0]["cloud"]["region"] == "us-east-1" + assert logstash_message[0]["cloud"]["account"]["id"] == "000000000000" + assert logstash_message[0]["tags"] == ["forwarded", "tag1", "tag2", "tag3"] + + assert logstash_message[1]["message"] == json_dumper(second_expanded_with_root_fields) + assert logstash_message[1]["log"]["offset"] == 180 + assert logstash_message[1]["log"]["file"]["path"] == sqs_queue_url_path + assert logstash_message[1]["aws"]["sqs"]["name"] == sqs_queue_name + assert logstash_message[1]["aws"]["sqs"]["message_id"] == message_id + assert logstash_message[1]["cloud"]["provider"] == "aws" + assert logstash_message[1]["cloud"]["region"] == "us-east-1" + assert logstash_message[1]["cloud"]["account"]["id"] == "000000000000" + assert logstash_message[1]["tags"] == ["forwarded", "tag1", "tag2", "tag3"] + + def test_root_fields_to_add_to_expanded_event_list(self) -> None: + assert isinstance(self.logstash, LogstashContainer) + assert isinstance(self.localstack, LocalStackContainer) + + first_expanded_event: str = _load_file_fixture("cloudwatch-log-1.json") + first_expanded_with_root_fields: dict[str, Any] = json_parser(first_expanded_event) + first_expanded_with_root_fields["secondRootField"] = "secondRootField" + + second_expanded_event: str = _load_file_fixture("cloudwatch-log-3.json") + second_expanded_with_root_fields: dict[str, Any] = json_parser(second_expanded_event) + second_expanded_with_root_fields["secondRootField"] = "secondRootField" + + third_expanded_event: str = _load_file_fixture("cloudwatch-log-3.json") + third_expanded_event_with_root_fields: dict[str, Any] = json_parser(third_expanded_event) + third_expanded_event_with_root_fields["secondRootField"] = "secondRootField" + + fixtures = [ + f"""{{"firstRootField": "firstRootField", "secondRootField":"secondRootField", + "aField": [{first_expanded_event},{second_expanded_event},{third_expanded_event}]}}""" + ] + + sqs_queue_name = _time_based_id(suffix="source-sqs") + + sqs_queue = _sqs_create_queue(self.sqs_client, sqs_queue_name, self.localstack.get_url()) + + sqs_queue_arn = sqs_queue["QueueArn"] + sqs_queue_url = sqs_queue["QueueUrl"] + sqs_queue_url_path = sqs_queue["QueueUrlPath"] + + _sqs_send_messages(self.sqs_client, sqs_queue_url, "".join(fixtures)) + + config_yaml: str = f""" + inputs: + - type: "sqs" + id: "{sqs_queue_arn}" + expand_event_list_from_field: aField + root_fields_to_add_to_expanded_event: ["secondRootField"] + tags: {self.default_tags} + outputs: + - type: "logstash" + args: + logstash_url: "{self.logstash.get_url()}" + ssl_assert_fingerprint: {self.logstash.ssl_assert_fingerprint} + username: "{self.logstash.logstash_user}" + password: "{self.logstash.logstash_password}" + """ + + config_file_path = "config.yaml" + config_bucket_name = _time_based_id(suffix="config-bucket") + _s3_upload_content_to_bucket( + client=self.s3_client, + content=config_yaml.encode("utf-8"), + content_type="text/plain", + bucket_name=config_bucket_name, + key=config_file_path, + ) + + os.environ["S3_CONFIG_FILE"] = f"s3://{config_bucket_name}/{config_file_path}" + + events_sqs, _ = _sqs_get_messages(self.sqs_client, sqs_queue_url, sqs_queue_arn) + + message_id = events_sqs["Records"][0]["messageId"] + + ctx = ContextMock() + first_call = handler(events_sqs, ctx) # type:ignore + + assert first_call == "continuing" + + logstash_message = self.logstash.get_messages(expected=1) + assert len(logstash_message) == 1 + + assert logstash_message[0]["message"] == json_dumper(first_expanded_with_root_fields) + assert logstash_message[0]["log"]["offset"] == 0 + assert logstash_message[0]["log"]["file"]["path"] == sqs_queue_url_path + assert logstash_message[0]["aws"]["sqs"]["name"] == sqs_queue_name + assert logstash_message[0]["aws"]["sqs"]["message_id"] == message_id + assert logstash_message[0]["cloud"]["provider"] == "aws" + assert logstash_message[0]["cloud"]["region"] == "us-east-1" + assert logstash_message[0]["cloud"]["account"]["id"] == "000000000000" + assert logstash_message[0]["tags"] == ["forwarded", "tag1", "tag2", "tag3"] + + ctx = ContextMock(remaining_time_in_millis=_OVER_COMPLETION_GRACE_PERIOD_2m) + + continued_events, _ = _sqs_get_messages( + self.sqs_client, os.environ["SQS_CONTINUE_URL"], self.sqs_continue_queue_arn + ) + second_call = handler(continued_events, ctx) # type:ignore + + assert second_call == "completed" + + logstash_message = self.logstash.get_messages(expected=3) + assert len(logstash_message) == 3 + + assert logstash_message[1]["message"] == json_dumper(second_expanded_with_root_fields) + assert logstash_message[1]["log"]["offset"] == 114 + assert logstash_message[1]["log"]["file"]["path"] == sqs_queue_url_path + assert logstash_message[1]["aws"]["sqs"]["name"] == sqs_queue_name + assert logstash_message[1]["aws"]["sqs"]["message_id"] == message_id + assert logstash_message[1]["cloud"]["provider"] == "aws" + assert logstash_message[1]["cloud"]["region"] == "us-east-1" + assert logstash_message[1]["cloud"]["account"]["id"] == "000000000000" + assert logstash_message[1]["tags"] == ["forwarded", "tag1", "tag2", "tag3"] + + assert logstash_message[2]["message"] == json_dumper(third_expanded_event_with_root_fields) + assert logstash_message[2]["log"]["offset"] == 228 + assert logstash_message[2]["log"]["file"]["path"] == sqs_queue_url_path + assert logstash_message[2]["aws"]["sqs"]["name"] == sqs_queue_name + assert logstash_message[2]["aws"]["sqs"]["message_id"] == message_id + assert logstash_message[2]["cloud"]["provider"] == "aws" + assert logstash_message[2]["cloud"]["region"] == "us-east-1" + assert logstash_message[2]["cloud"]["account"]["id"] == "000000000000" + assert logstash_message[2]["tags"] == ["forwarded", "tag1", "tag2", "tag3"] + + def test_root_fields_to_add_to_expanded_event_list_no_fields_in_root(self) -> None: + assert isinstance(self.logstash, LogstashContainer) + assert isinstance(self.localstack, LocalStackContainer) + + first_expanded_event: str = _load_file_fixture("cloudwatch-log-1.json") + first_expanded_with_root_fields: dict[str, Any] = json_parser(first_expanded_event) + first_expanded_with_root_fields["secondRootField"] = "secondRootField" + + second_expanded_event: str = _load_file_fixture("cloudwatch-log-3.json") + second_expanded_with_root_fields: dict[str, Any] = json_parser(second_expanded_event) + second_expanded_with_root_fields["secondRootField"] = "secondRootField" + + third_expanded_event: str = _load_file_fixture("cloudwatch-log-3.json") + third_expanded_event_with_root_fields: dict[str, Any] = json_parser(third_expanded_event) + third_expanded_event_with_root_fields["secondRootField"] = "secondRootField" + + fixtures = [ + f"""{{"firstRootField": "firstRootField", "secondRootField":"secondRootField", + "aField": [{first_expanded_event},{second_expanded_event},{third_expanded_event}]}}""" + ] + + sqs_queue_name = _time_based_id(suffix="source-sqs") + + sqs_queue = _sqs_create_queue(self.sqs_client, sqs_queue_name, self.localstack.get_url()) + + sqs_queue_arn = sqs_queue["QueueArn"] + sqs_queue_url = sqs_queue["QueueUrl"] + sqs_queue_url_path = sqs_queue["QueueUrlPath"] + + _sqs_send_messages(self.sqs_client, sqs_queue_url, "".join(fixtures)) + + config_yaml: str = f""" + inputs: + - type: "sqs" + id: "{sqs_queue_arn}" + expand_event_list_from_field: aField + root_fields_to_add_to_expanded_event: ["secondRootField", "thirdRootField"] + tags: {self.default_tags} + outputs: + - type: "logstash" + args: + logstash_url: "{self.logstash.get_url()}" + ssl_assert_fingerprint: {self.logstash.ssl_assert_fingerprint} + username: "{self.logstash.logstash_user}" + password: "{self.logstash.logstash_password}" + """ + + config_file_path = "config.yaml" + config_bucket_name = _time_based_id(suffix="config-bucket") + _s3_upload_content_to_bucket( + client=self.s3_client, + content=config_yaml.encode("utf-8"), + content_type="text/plain", + bucket_name=config_bucket_name, + key=config_file_path, + ) + + os.environ["S3_CONFIG_FILE"] = f"s3://{config_bucket_name}/{config_file_path}" + + events_sqs, _ = _sqs_get_messages(self.sqs_client, sqs_queue_url, sqs_queue_arn) + + message_id = events_sqs["Records"][0]["messageId"] + + ctx = ContextMock() + first_call = handler(events_sqs, ctx) # type:ignore + + assert first_call == "continuing" + + logstash_message = self.logstash.get_messages(expected=1) + assert len(logstash_message) == 1 + + assert logstash_message[0]["message"] == json_dumper(first_expanded_with_root_fields) + assert logstash_message[0]["log"]["offset"] == 0 + assert logstash_message[0]["log"]["file"]["path"] == sqs_queue_url_path + assert logstash_message[0]["aws"]["sqs"]["name"] == sqs_queue_name + assert logstash_message[0]["aws"]["sqs"]["message_id"] == message_id + assert logstash_message[0]["cloud"]["provider"] == "aws" + assert logstash_message[0]["cloud"]["region"] == "us-east-1" + assert logstash_message[0]["cloud"]["account"]["id"] == "000000000000" + assert logstash_message[0]["tags"] == ["forwarded", "tag1", "tag2", "tag3"] + + ctx = ContextMock(remaining_time_in_millis=_OVER_COMPLETION_GRACE_PERIOD_2m) + + continued_events, _ = _sqs_get_messages( + self.sqs_client, os.environ["SQS_CONTINUE_URL"], self.sqs_continue_queue_arn + ) + second_call = handler(continued_events, ctx) # type:ignore + + assert second_call == "completed" + + logstash_message = self.logstash.get_messages(expected=3) + assert len(logstash_message) == 3 + + assert logstash_message[1]["message"] == json_dumper(second_expanded_with_root_fields) + assert logstash_message[1]["log"]["offset"] == 114 + assert logstash_message[1]["log"]["file"]["path"] == sqs_queue_url_path + assert logstash_message[1]["aws"]["sqs"]["name"] == sqs_queue_name + assert logstash_message[1]["aws"]["sqs"]["message_id"] == message_id + assert logstash_message[1]["cloud"]["provider"] == "aws" + assert logstash_message[1]["cloud"]["region"] == "us-east-1" + assert logstash_message[1]["cloud"]["account"]["id"] == "000000000000" + assert logstash_message[1]["tags"] == ["forwarded", "tag1", "tag2", "tag3"] + + assert logstash_message[2]["message"] == json_dumper(third_expanded_event_with_root_fields) + assert logstash_message[2]["log"]["offset"] == 228 + assert logstash_message[2]["log"]["file"]["path"] == sqs_queue_url_path + assert logstash_message[2]["aws"]["sqs"]["name"] == sqs_queue_name + assert logstash_message[2]["aws"]["sqs"]["message_id"] == message_id + assert logstash_message[2]["cloud"]["provider"] == "aws" + assert logstash_message[2]["cloud"]["region"] == "us-east-1" + assert logstash_message[2]["cloud"]["account"]["id"] == "000000000000" + assert logstash_message[2]["tags"] == ["forwarded", "tag1", "tag2", "tag3"] + + def test_root_fields_to_add_to_expanded_event_all(self) -> None: + assert isinstance(self.logstash, LogstashContainer) + assert isinstance(self.localstack, LocalStackContainer) + + first_expanded_event: str = _load_file_fixture("cloudwatch-log-1.json") + first_expanded_with_root_fields: dict[str, Any] = json_parser(first_expanded_event) + first_expanded_with_root_fields["firstRootField"] = "firstRootField" + first_expanded_with_root_fields["secondRootField"] = "secondRootField" + + second_expanded_event: str = _load_file_fixture("cloudwatch-log-3.json") + second_expanded_with_root_fields: dict[str, Any] = json_parser(second_expanded_event) + second_expanded_with_root_fields["firstRootField"] = "firstRootField" + second_expanded_with_root_fields["secondRootField"] = "secondRootField" + + third_expanded_event: str = _load_file_fixture("cloudwatch-log-3.json") + third_expanded_event_with_root_fields: dict[str, Any] = json_parser(third_expanded_event) + third_expanded_event_with_root_fields["firstRootField"] = "firstRootField" + third_expanded_event_with_root_fields["secondRootField"] = "secondRootField" + + fixtures = [ + f"""{{"firstRootField": "firstRootField", "secondRootField":"secondRootField", + "aField": [{first_expanded_event},{second_expanded_event},{third_expanded_event}]}}""" + ] + + sqs_queue_name = _time_based_id(suffix="source-sqs") + + sqs_queue = _sqs_create_queue(self.sqs_client, sqs_queue_name, self.localstack.get_url()) + + sqs_queue_arn = sqs_queue["QueueArn"] + sqs_queue_url = sqs_queue["QueueUrl"] + sqs_queue_url_path = sqs_queue["QueueUrlPath"] + + _sqs_send_messages(self.sqs_client, sqs_queue_url, "".join(fixtures)) + + config_yaml: str = f""" + inputs: + - type: "sqs" + id: "{sqs_queue_arn}" + expand_event_list_from_field: aField + root_fields_to_add_to_expanded_event: all + tags: {self.default_tags} + outputs: + - type: "logstash" + args: + logstash_url: "{self.logstash.get_url()}" + ssl_assert_fingerprint: {self.logstash.ssl_assert_fingerprint} + username: "{self.logstash.logstash_user}" + password: "{self.logstash.logstash_password}" + """ + + config_file_path = "config.yaml" + config_bucket_name = _time_based_id(suffix="config-bucket") + _s3_upload_content_to_bucket( + client=self.s3_client, + content=config_yaml.encode("utf-8"), + content_type="text/plain", + bucket_name=config_bucket_name, + key=config_file_path, + ) + + os.environ["S3_CONFIG_FILE"] = f"s3://{config_bucket_name}/{config_file_path}" + + events_sqs, _ = _sqs_get_messages(self.sqs_client, sqs_queue_url, sqs_queue_arn) + + message_id = events_sqs["Records"][0]["messageId"] + + ctx = ContextMock() + first_call = handler(events_sqs, ctx) # type:ignore + + assert first_call == "continuing" + + logstash_message = self.logstash.get_messages(expected=1) + assert len(logstash_message) == 1 + + assert logstash_message[0]["message"] == json_dumper(first_expanded_with_root_fields) + assert logstash_message[0]["log"]["offset"] == 0 + assert logstash_message[0]["log"]["file"]["path"] == sqs_queue_url_path + assert logstash_message[0]["aws"]["sqs"]["name"] == sqs_queue_name + assert logstash_message[0]["aws"]["sqs"]["message_id"] == message_id + assert logstash_message[0]["cloud"]["provider"] == "aws" + assert logstash_message[0]["cloud"]["region"] == "us-east-1" + assert logstash_message[0]["cloud"]["account"]["id"] == "000000000000" + assert logstash_message[0]["tags"] == ["forwarded", "tag1", "tag2", "tag3"] + + ctx = ContextMock(remaining_time_in_millis=_OVER_COMPLETION_GRACE_PERIOD_2m) + + continued_events, _ = _sqs_get_messages( + self.sqs_client, os.environ["SQS_CONTINUE_URL"], self.sqs_continue_queue_arn + ) + second_call = handler(continued_events, ctx) # type:ignore + + assert second_call == "completed" + + logstash_message = self.logstash.get_messages(expected=3) + assert len(logstash_message) == 3 + + assert logstash_message[1]["message"] == json_dumper(second_expanded_with_root_fields) + assert logstash_message[1]["log"]["offset"] == 114 + assert logstash_message[1]["log"]["file"]["path"] == sqs_queue_url_path + assert logstash_message[1]["aws"]["sqs"]["name"] == sqs_queue_name + assert logstash_message[1]["aws"]["sqs"]["message_id"] == message_id + assert logstash_message[1]["cloud"]["provider"] == "aws" + assert logstash_message[1]["cloud"]["region"] == "us-east-1" + assert logstash_message[1]["cloud"]["account"]["id"] == "000000000000" + assert logstash_message[1]["tags"] == ["forwarded", "tag1", "tag2", "tag3"] + + assert logstash_message[2]["message"] == json_dumper(third_expanded_event_with_root_fields) + assert logstash_message[2]["log"]["offset"] == 228 + assert logstash_message[2]["log"]["file"]["path"] == sqs_queue_url_path + assert logstash_message[2]["aws"]["sqs"]["name"] == sqs_queue_name + assert logstash_message[2]["aws"]["sqs"]["message_id"] == message_id + assert logstash_message[2]["cloud"]["provider"] == "aws" + assert logstash_message[2]["cloud"]["region"] == "us-east-1" + assert logstash_message[2]["cloud"]["account"]["id"] == "000000000000" + assert logstash_message[2]["tags"] == ["forwarded", "tag1", "tag2", "tag3"] + From 710fee651195a3bf5e4da0c7299a7c7a431efc13 Mon Sep 17 00:00:00 2001 From: constanca Date: Wed, 17 Apr 2024 11:52:34 +0200 Subject: [PATCH 04/26] . --- tests/handlers/aws/test_integrations.py | 1713 ----------------------- 1 file changed, 1713 deletions(-) diff --git a/tests/handlers/aws/test_integrations.py b/tests/handlers/aws/test_integrations.py index 3390a568..7efffb58 100644 --- a/tests/handlers/aws/test_integrations.py +++ b/tests/handlers/aws/test_integrations.py @@ -672,1716 +672,3 @@ def test_continuing(self) -> None: assert res["hits"]["hits"][7]["_source"]["message"] == logstash_message[7]["message"] assert res["hits"]["hits"][7]["_source"]["tags"] == logstash_message[7]["tags"] - def test_continuing_no_timeout_input_from_originalEventSourceARN_message_attribute(self) -> None: - assert isinstance(self.logstash, LogstashContainer) - assert isinstance(self.localstack, LocalStackContainer) - - fixtures = [ - _load_file_fixture("cloudwatch-log-1.json"), - _load_file_fixture("cloudwatch-log-2.json"), - _load_file_fixture("cloudwatch-log-3.json"), - ] - - sqs_queue_name = _time_based_id(suffix="source-sqs") - - sqs_queue = _sqs_create_queue(self.sqs_client, sqs_queue_name, self.localstack.get_url()) - - sqs_queue_arn = sqs_queue["QueueArn"] - sqs_queue_url = sqs_queue["QueueUrl"] - sqs_queue_url_path = sqs_queue["QueueUrlPath"] - - _sqs_send_messages(self.sqs_client, sqs_queue_url, fixtures[0]) - _sqs_send_messages(self.sqs_client, sqs_queue_url, fixtures[1]) - _sqs_send_messages(self.sqs_client, sqs_queue_url, fixtures[2]) - - config_yaml: str = f""" - inputs: - - type: sqs - id: "{sqs_queue_arn}" - tags: {self.default_tags} - outputs: - - type: "logstash" - args: - logstash_url: "{self.logstash.get_url()}" - ssl_assert_fingerprint: {self.logstash.ssl_assert_fingerprint} - username: "{self.logstash.logstash_user}" - password: "{self.logstash.logstash_password}" - """ - - config_file_path = "config.yaml" - config_bucket_name = _time_based_id(suffix="config-bucket") - _s3_upload_content_to_bucket( - client=self.s3_client, - content=config_yaml.encode("utf-8"), - content_type="text/plain", - bucket_name=config_bucket_name, - key=config_file_path, - ) - - os.environ["S3_CONFIG_FILE"] = f"s3://{config_bucket_name}/{config_file_path}" - - events_sqs, _ = _sqs_get_messages(self.sqs_client, sqs_queue_url, sqs_queue_arn) - - first_message_id = events_sqs["Records"][0]["messageId"] - second_message_id = events_sqs["Records"][1]["messageId"] - - ctx = ContextMock() - first_call = handler(events_sqs, ctx) # type:ignore - - assert first_call == "continuing" - - logstash_message = self.logstash.get_messages(expected=1) - assert len(logstash_message) == 1 - - assert logstash_message[0]["message"] == fixtures[0].rstrip("\n") - assert logstash_message[0]["log"]["offset"] == 0 - assert logstash_message[0]["log"]["file"]["path"] == sqs_queue_url_path - assert logstash_message[0]["aws"]["sqs"]["name"] == sqs_queue_name - assert logstash_message[0]["aws"]["sqs"]["message_id"] == first_message_id - assert logstash_message[0]["cloud"]["provider"] == "aws" - assert logstash_message[0]["cloud"]["region"] == "us-east-1" - assert logstash_message[0]["cloud"]["account"]["id"] == "000000000000" - assert logstash_message[0]["tags"] == ["forwarded", "tag1", "tag2", "tag3"] - - continued_events, _ = _sqs_get_messages( - self.sqs_client, os.environ["SQS_CONTINUE_URL"], self.sqs_continue_queue_arn - ) - - continued_events["Records"][2]["messageAttributes"]["originalEventSourceARN"][ - "stringValue" - ] += "-not-configured-arn" - second_call = handler(continued_events, ctx) # type:ignore - - assert second_call == "continuing" - - logstash_message = self.logstash.get_messages(expected=2) - assert len(logstash_message) == 2 - - assert logstash_message[1]["message"] == fixtures[1].rstrip("\n") - assert logstash_message[1]["log"]["offset"] == 0 - assert logstash_message[1]["log"]["file"]["path"] == sqs_queue_url_path - assert logstash_message[1]["aws"]["sqs"]["name"] == sqs_queue_name - assert logstash_message[1]["aws"]["sqs"]["message_id"] == second_message_id - assert logstash_message[1]["cloud"]["provider"] == "aws" - assert logstash_message[1]["cloud"]["region"] == "us-east-1" - assert logstash_message[1]["cloud"]["account"]["id"] == "000000000000" - assert logstash_message[1]["tags"] == ["forwarded", "tag1", "tag2", "tag3"] - - ctx = ContextMock(remaining_time_in_millis=_OVER_COMPLETION_GRACE_PERIOD_2m) - continued_events, _ = _sqs_get_messages( - self.sqs_client, os.environ["SQS_CONTINUE_URL"], self.sqs_continue_queue_arn - ) - - third_call = handler(continued_events, ctx) # type:ignore - - assert third_call == "completed" - - logstash_message = self.logstash.get_messages(expected=2) - assert len(logstash_message) == 2 - - def test_replay(self) -> None: - assert isinstance(self.elasticsearch, ElasticsearchContainer) - assert isinstance(self.logstash, LogstashContainer) - assert isinstance(self.localstack, LocalStackContainer) - - fixtures = [ - _load_file_fixture("cloudwatch-log-1.json"), - _load_file_fixture("cloudwatch-log-2.json"), - ] - - s3_bucket_name = _time_based_id(suffix="test-bucket") - first_filename = "exportedlog/uuid/yyyy-mm-dd-[$LATEST]hash/000000.gz" - _s3_upload_content_to_bucket( - client=self.s3_client, - content=gzip.compress("".join(fixtures).encode("utf-8")), - content_type="application/x-gzip", - bucket_name=s3_bucket_name, - key=first_filename, - ) - - cloudwatch_group_name = _time_based_id(suffix="source-group") - cloudwatch_group = _logs_create_cloudwatch_logs_group(self.logs_client, group_name=cloudwatch_group_name) - - cloudwatch_stream_name = _time_based_id(suffix="source-stream") - _logs_create_cloudwatch_logs_stream( - self.logs_client, group_name=cloudwatch_group_name, stream_name=cloudwatch_stream_name - ) - - _logs_upload_event_to_cloudwatch_logs( - self.logs_client, - group_name=cloudwatch_group_name, - stream_name=cloudwatch_stream_name, - messages_body=["".join(fixtures)], - ) - - cloudwatch_group_arn = cloudwatch_group["arn"] - - cloudwatch_group_name = cloudwatch_group_name - cloudwatch_stream_name = cloudwatch_stream_name - - sqs_queue_name = _time_based_id(suffix="source-sqs") - s3_sqs_queue_name = _time_based_id(suffix="source-s3-sqs") - - sqs_queue = _sqs_create_queue(self.sqs_client, sqs_queue_name, self.localstack.get_url()) - s3_sqs_queue = _sqs_create_queue(self.sqs_client, s3_sqs_queue_name, self.localstack.get_url()) - - sqs_queue_arn = sqs_queue["QueueArn"] - sqs_queue_url = sqs_queue["QueueUrl"] - sqs_queue_url_path = sqs_queue["QueueUrlPath"] - - s3_sqs_queue_arn = s3_sqs_queue["QueueArn"] - s3_sqs_queue_url = s3_sqs_queue["QueueUrl"] - - _sqs_send_messages(self.sqs_client, sqs_queue_url, "".join(fixtures)) - _sqs_send_s3_notifications(self.sqs_client, s3_sqs_queue_url, s3_bucket_name, [first_filename]) - - kinesis_stream_name = _time_based_id(suffix="source-kinesis") - kinesis_stream = _kinesis_create_stream(self.kinesis_client, kinesis_stream_name) - kinesis_stream_arn = kinesis_stream["StreamDescription"]["StreamARN"] - - _kinesis_put_records(self.kinesis_client, kinesis_stream_name, ["".join(fixtures)]) - - # the way to let logstash fail is to give wrong credentials - config_yaml: str = f""" - inputs: - - type: "kinesis-data-stream" - id: "{kinesis_stream_arn}" - tags: {self.default_tags} - outputs: - - type: "elasticsearch" - args: - elasticsearch_url: "{self.elasticsearch.get_url()}" - ssl_assert_fingerprint: {self.elasticsearch.ssl_assert_fingerprint} - username: "{self.secret_arn}:username" - password: "{self.secret_arn}:password" - - type: "logstash" - args: - logstash_url: "{self.logstash.get_url()}" - ssl_assert_fingerprint: {self.logstash.ssl_assert_fingerprint} - username: "wrong_username" - password: "wrong_username" - - type: "cloudwatch-logs" - id: "{cloudwatch_group_arn}" - tags: {self.default_tags} - outputs: - - type: "elasticsearch" - args: - elasticsearch_url: "{self.elasticsearch.get_url()}" - ssl_assert_fingerprint: {self.elasticsearch.ssl_assert_fingerprint} - username: "{self.secret_arn}:username" - password: "{self.secret_arn}:password" - - type: "logstash" - args: - logstash_url: "{self.logstash.get_url()}" - ssl_assert_fingerprint: {self.logstash.ssl_assert_fingerprint} - username: "wrong_username" - password: "wrong_username" - - type: sqs - id: "{sqs_queue_arn}" - tags: {self.default_tags} - outputs: - - type: "elasticsearch" - args: - elasticsearch_url: "{self.elasticsearch.get_url()}" - ssl_assert_fingerprint: {self.elasticsearch.ssl_assert_fingerprint} - username: "{self.secret_arn}:username" - password: "{self.secret_arn}:password" - - type: "logstash" - args: - logstash_url: "{self.logstash.get_url()}" - ssl_assert_fingerprint: {self.logstash.ssl_assert_fingerprint} - username: "wrong_username" - password: "wrong_username" - - type: s3-sqs - id: "{s3_sqs_queue_arn}" - tags: {self.default_tags} - outputs: - - type: "elasticsearch" - args: - elasticsearch_url: "{self.elasticsearch.get_url()}" - ssl_assert_fingerprint: {self.elasticsearch.ssl_assert_fingerprint} - username: "{self.secret_arn}:username" - password: "{self.secret_arn}:password" - - type: "logstash" - args: - logstash_url: "{self.logstash.get_url()}" - ssl_assert_fingerprint: {self.logstash.ssl_assert_fingerprint} - username: "wrong_username" - password: "wrong_username" - """ - - config_file_path = "config.yaml" - config_bucket_name = _time_based_id(suffix="config-bucket") - _s3_upload_content_to_bucket( - client=self.s3_client, - content=config_yaml.encode("utf-8"), - content_type="text/plain", - bucket_name=config_bucket_name, - key=config_file_path, - ) - - os.environ["S3_CONFIG_FILE"] = f"s3://{config_bucket_name}/{config_file_path}" - - events_s3, _ = _sqs_get_messages(self.sqs_client, s3_sqs_queue_url, s3_sqs_queue_arn) - - bucket_arn: str = f"arn:aws:s3:::{s3_bucket_name}" - event_time = int( - datetime.datetime.strptime(_S3_NOTIFICATION_EVENT_TIME, "%Y-%m-%dT%H:%M:%S.%fZ").timestamp() * 1000 - ) - - hash_first = get_hex_prefix(f"{bucket_arn}-{first_filename}") - prefix_s3_first = f"{event_time}-{hash_first}" - - events_sqs, events_sent_timestamps_sqs = _sqs_get_messages(self.sqs_client, sqs_queue_url, sqs_queue_arn) - - message_id = events_sqs["Records"][0]["messageId"] - hash_sqs = get_hex_prefix(f"{sqs_queue_name}-{message_id}") - prefix_sqs: str = f"{events_sent_timestamps_sqs[0]}-{hash_sqs}" - - ( - events_cloudwatch_logs, - event_ids_cloudwatch_logs, - event_timestamps_cloudwatch_logs, - ) = _logs_retrieve_event_from_cloudwatch_logs(self.logs_client, cloudwatch_group_name, cloudwatch_stream_name) - - hash_cw_logs = get_hex_prefix( - f"{cloudwatch_group_name}-{cloudwatch_stream_name}-{event_ids_cloudwatch_logs[0]}" - ) - prefix_cloudwatch_logs = f"{event_timestamps_cloudwatch_logs[0]}-{hash_cw_logs}" - - events_kinesis, event_timestamps_kinesis_records = _kinesis_retrieve_event_from_kinesis_stream( - self.kinesis_client, kinesis_stream_name, kinesis_stream_arn - ) - sequence_number = events_kinesis["Records"][0]["kinesis"]["sequenceNumber"] - hash_kinesis_record = get_hex_prefix(f"stream-{kinesis_stream_name}-PartitionKey-{sequence_number}") - prefix_kinesis = f"{int(float(event_timestamps_kinesis_records[0]) * 1000)}-{hash_kinesis_record}" - - # Create an expected id for s3-sqs so that es.send will fail - self.elasticsearch.index( - index="logs-generic-default", - op_type="create", - id=f"{prefix_s3_first}-000000000000", - document={"@timestamp": datetime.datetime.utcnow().strftime("%Y-%m-%dT%H:%M:%S.%fZ")}, - ) - - # Create an expected id for sqs so that es.send will fail - self.elasticsearch.index( - index="logs-generic-default", - op_type="create", - id=f"{prefix_sqs}-000000000000", - document={"@timestamp": datetime.datetime.utcnow().strftime("%Y-%m-%dT%H:%M:%S.%fZ")}, - ) - - # Create an expected id for cloudwatch-logs so that es.send will fail - self.elasticsearch.index( - index="logs-generic-default", - op_type="create", - id=f"{prefix_cloudwatch_logs}-000000000000", - document={"@timestamp": datetime.datetime.utcnow().strftime("%Y-%m-%dT%H:%M:%S.%fZ")}, - ) - - # Create an expected id for kinesis-data-stream so that es.send will fail - self.elasticsearch.index( - index="logs-generic-default", - op_type="create", - id=f"{prefix_kinesis}-000000000000", - document={"@timestamp": datetime.datetime.utcnow().strftime("%Y-%m-%dT%H:%M:%S.%fZ")}, - ) - - self.elasticsearch.refresh(index="logs-generic-default") - - res = self.elasticsearch.search(index="logs-generic-default") - assert res["hits"]["total"] == {"value": 4, "relation": "eq"} - - ctx = ContextMock(remaining_time_in_millis=_OVER_COMPLETION_GRACE_PERIOD_2m) - - first_call = handler(events_s3, ctx) # type:ignore - - assert first_call == "completed" - - self.elasticsearch.refresh(index="logs-generic-default") - res = self.elasticsearch.search( - index="logs-generic-default", - query={ - "bool": { - "must_not": { - "ids": { - "values": [ - f"{prefix_s3_first}-000000000000", - f"{prefix_sqs}-000000000000", - f"{prefix_cloudwatch_logs}-000000000000", - f"{prefix_kinesis}-000000000000", - ] - } - } - } - }, - sort="_seq_no", - ) - - assert res["hits"]["total"] == {"value": 1, "relation": "eq"} - - assert res["hits"]["hits"][0]["_source"]["message"] == fixtures[1].rstrip("\n") - assert res["hits"]["hits"][0]["_source"]["log"]["offset"] == 94 - assert ( - res["hits"]["hits"][0]["_source"]["log"]["file"]["path"] - == f"https://{s3_bucket_name}.s3.eu-central-1.amazonaws.com/{first_filename}" - ) - assert res["hits"]["hits"][0]["_source"]["aws"]["s3"]["bucket"]["name"] == s3_bucket_name - assert res["hits"]["hits"][0]["_source"]["aws"]["s3"]["bucket"]["arn"] == f"arn:aws:s3:::{s3_bucket_name}" - assert res["hits"]["hits"][0]["_source"]["aws"]["s3"]["object"]["key"] == first_filename - assert res["hits"]["hits"][0]["_source"]["cloud"]["provider"] == "aws" - assert res["hits"]["hits"][0]["_source"]["cloud"]["region"] == "eu-central-1" - assert res["hits"]["hits"][0]["_source"]["cloud"]["account"]["id"] == "000000000000" - assert res["hits"]["hits"][0]["_source"]["tags"] == ["forwarded", "generic", "tag1", "tag2", "tag3"] - - logstash_message = self.logstash.get_messages(expected=0) - assert len(logstash_message) == 0 - - second_call = handler(events_sqs, ctx) # type:ignore - - assert second_call == "completed" - - self.elasticsearch.refresh(index="logs-generic-default") - res = self.elasticsearch.search( - index="logs-generic-default", - query={ - "bool": { - "must_not": { - "ids": { - "values": [ - f"{prefix_s3_first}-000000000000", - f"{prefix_sqs}-000000000000", - f"{prefix_cloudwatch_logs}-000000000000", - f"{prefix_kinesis}-000000000000", - ] - } - } - } - }, - sort="_seq_no", - ) - - assert res["hits"]["total"] == {"value": 2, "relation": "eq"} - - assert res["hits"]["hits"][1]["_source"]["message"] == fixtures[1].rstrip("\n") - assert res["hits"]["hits"][1]["_source"]["log"]["offset"] == 94 - assert res["hits"]["hits"][1]["_source"]["log"]["file"]["path"] == sqs_queue_url_path - assert res["hits"]["hits"][1]["_source"]["aws"]["sqs"]["name"] == sqs_queue_name - assert res["hits"]["hits"][1]["_source"]["aws"]["sqs"]["message_id"] == message_id - assert res["hits"]["hits"][1]["_source"]["cloud"]["provider"] == "aws" - assert res["hits"]["hits"][1]["_source"]["cloud"]["region"] == "us-east-1" - assert res["hits"]["hits"][1]["_source"]["cloud"]["account"]["id"] == "000000000000" - assert res["hits"]["hits"][1]["_source"]["tags"] == ["forwarded", "generic", "tag1", "tag2", "tag3"] - - logstash_message = self.logstash.get_messages(expected=0) - assert len(logstash_message) == 0 - - third_call = handler(events_cloudwatch_logs, ctx) # type:ignore - - assert third_call == "completed" - - self.elasticsearch.refresh(index="logs-generic-default") - res = self.elasticsearch.search( - index="logs-generic-default", - query={ - "bool": { - "must_not": { - "ids": { - "values": [ - f"{prefix_s3_first}-000000000000", - f"{prefix_sqs}-000000000000", - f"{prefix_cloudwatch_logs}-000000000000", - f"{prefix_kinesis}-000000000000", - ] - } - } - } - }, - sort="_seq_no", - ) - - assert res["hits"]["total"] == {"value": 3, "relation": "eq"} - - assert res["hits"]["hits"][2]["_source"]["message"] == fixtures[1].rstrip("\n") - assert res["hits"]["hits"][2]["_source"]["log"]["offset"] == 94 - assert ( - res["hits"]["hits"][2]["_source"]["log"]["file"]["path"] - == f"{cloudwatch_group_name}/{cloudwatch_stream_name}" - ) - assert res["hits"]["hits"][2]["_source"]["aws"]["cloudwatch"]["log_group"] == cloudwatch_group_name - assert res["hits"]["hits"][2]["_source"]["aws"]["cloudwatch"]["log_stream"] == cloudwatch_stream_name - assert res["hits"]["hits"][2]["_source"]["aws"]["cloudwatch"]["event_id"] == event_ids_cloudwatch_logs[0] - assert res["hits"]["hits"][2]["_source"]["cloud"]["provider"] == "aws" - assert res["hits"]["hits"][2]["_source"]["cloud"]["region"] == "us-east-1" - assert res["hits"]["hits"][2]["_source"]["cloud"]["account"]["id"] == "000000000000" - assert res["hits"]["hits"][2]["_source"]["tags"] == ["forwarded", "generic", "tag1", "tag2", "tag3"] - - logstash_message = self.logstash.get_messages(expected=0) - assert len(logstash_message) == 0 - - fourth_call = handler(events_kinesis, ctx) # type:ignore - - assert fourth_call == "completed" - - self.elasticsearch.refresh(index="logs-generic-default") - res = self.elasticsearch.search( - index="logs-generic-default", - query={ - "bool": { - "must_not": { - "ids": { - "values": [ - f"{prefix_s3_first}-000000000000", - f"{prefix_sqs}-000000000000", - f"{prefix_cloudwatch_logs}-000000000000", - f"{prefix_kinesis}-000000000000", - ] - } - } - } - }, - sort="_seq_no", - ) - - assert res["hits"]["total"] == {"value": 4, "relation": "eq"} - - assert res["hits"]["hits"][3]["_source"]["message"] == fixtures[1].rstrip("\n") - assert res["hits"]["hits"][3]["_source"]["log"]["offset"] == 94 - assert res["hits"]["hits"][3]["_source"]["log"]["file"]["path"] == kinesis_stream_arn - assert res["hits"]["hits"][3]["_source"]["aws"]["kinesis"]["type"] == "stream" - assert res["hits"]["hits"][3]["_source"]["aws"]["kinesis"]["partition_key"] == "PartitionKey" - assert res["hits"]["hits"][3]["_source"]["aws"]["kinesis"]["name"] == kinesis_stream_name - assert ( - res["hits"]["hits"][3]["_source"]["aws"]["kinesis"]["sequence_number"] - == events_kinesis["Records"][0]["kinesis"]["sequenceNumber"] - ) - assert res["hits"]["hits"][3]["_source"]["cloud"]["provider"] == "aws" - assert res["hits"]["hits"][3]["_source"]["cloud"]["region"] == "us-east-1" - assert res["hits"]["hits"][3]["_source"]["cloud"]["account"]["id"] == "000000000000" - assert res["hits"]["hits"][3]["_source"]["tags"] == ["forwarded", "generic", "tag1", "tag2", "tag3"] - - logstash_message = self.logstash.get_messages(expected=0) - assert len(logstash_message) == 0 - - replayed_events, _ = _sqs_get_messages(self.sqs_client, os.environ["SQS_REPLAY_URL"], self.sqs_replay_queue_arn) - with self.assertRaises(ReplayHandlerException): - handler(replayed_events, ctx) # type:ignore - - self.elasticsearch.refresh(index="logs-generic-default") - - # Remove the expected id for s3-sqs so that it can be replayed - self.elasticsearch.delete_by_query( - index="logs-generic-default", body={"query": {"ids": {"values": [f"{prefix_s3_first}-000000000000"]}}} - ) - - # Remove the expected id for sqs so that it can be replayed - self.elasticsearch.delete_by_query( - index="logs-generic-default", body={"query": {"ids": {"values": [f"{prefix_sqs}-000000000000"]}}} - ) - - # Remove the expected id for cloudwatch logs so that it can be replayed - self.elasticsearch.delete_by_query( - index="logs-generic-default", - body={"query": {"ids": {"values": [f"{prefix_cloudwatch_logs}-000000000000"]}}}, - ) - - # Remove the expected id for kinesis data stream so that it can be replayed - self.elasticsearch.delete_by_query( - index="logs-generic-default", - body={"query": {"ids": {"values": [f"{prefix_kinesis}-000000000000"]}}}, - ) - - self.elasticsearch.refresh(index="logs-generic-default") - - # let's update the config file so that logstash won't fail anymore - config_yaml = f""" - inputs: - - type: "kinesis-data-stream" - id: "{kinesis_stream_arn}" - tags: {self.default_tags} - outputs: {self.default_outputs} - - type: "cloudwatch-logs" - id: "{cloudwatch_group_arn}" - tags: {self.default_tags} - outputs: {self.default_outputs} - - type: sqs - id: "{sqs_queue_arn}" - tags: {self.default_tags} - outputs: {self.default_outputs} - - type: s3-sqs - id: "{s3_sqs_queue_arn}" - tags: {self.default_tags} - outputs: {self.default_outputs} - """ - - _s3_upload_content_to_bucket( - client=self.s3_client, - content=config_yaml.encode("utf-8"), - content_type="text/plain", - bucket_name=config_bucket_name, - key=config_file_path, - create_bucket=False, - ) - - ctx = ContextMock(remaining_time_in_millis=_REMAINING_TIME_FORCE_CONTINUE_0ms) - - # implicit wait for the message to be back on the queue - time.sleep(35) - replayed_events, _ = _sqs_get_messages(self.sqs_client, os.environ["SQS_REPLAY_URL"], self.sqs_replay_queue_arn) - fifth_call = handler(replayed_events, ctx) # type:ignore - - assert fifth_call == "replayed" - - self.elasticsearch.refresh(index="logs-generic-default") - assert self.elasticsearch.count(index="logs-generic-default")["count"] == 5 - - self.elasticsearch.refresh(index="logs-generic-default") - res = self.elasticsearch.search(index="logs-generic-default", sort="_seq_no") - - assert res["hits"]["total"] == {"value": 5, "relation": "eq"} - - assert res["hits"]["hits"][4]["_source"]["message"] == fixtures[0].rstrip("\n") - assert res["hits"]["hits"][4]["_source"]["log"]["offset"] == 0 - assert ( - res["hits"]["hits"][4]["_source"]["log"]["file"]["path"] - == f"https://{s3_bucket_name}.s3.eu-central-1.amazonaws.com/{first_filename}" - ) - assert res["hits"]["hits"][4]["_source"]["aws"]["s3"]["bucket"]["name"] == s3_bucket_name - assert res["hits"]["hits"][4]["_source"]["aws"]["s3"]["bucket"]["arn"] == f"arn:aws:s3:::{s3_bucket_name}" - assert res["hits"]["hits"][4]["_source"]["aws"]["s3"]["object"]["key"] == first_filename - assert res["hits"]["hits"][4]["_source"]["cloud"]["provider"] == "aws" - assert res["hits"]["hits"][4]["_source"]["cloud"]["region"] == "eu-central-1" - assert res["hits"]["hits"][4]["_source"]["cloud"]["account"]["id"] == "000000000000" - assert res["hits"]["hits"][4]["_source"]["tags"] == ["forwarded", "generic", "tag1", "tag2", "tag3"] - - logstash_message = self.logstash.get_messages(expected=0) - assert len(logstash_message) == 0 - - # implicit wait for the message to be back on the queue - time.sleep(35) - replayed_events, _ = _sqs_get_messages(self.sqs_client, os.environ["SQS_REPLAY_URL"], self.sqs_replay_queue_arn) - sixth_call = handler(replayed_events, ctx) # type:ignore - - assert sixth_call == "replayed" - - self.elasticsearch.refresh(index="logs-generic-default") - assert self.elasticsearch.count(index="logs-generic-default")["count"] == 5 - - res = self.elasticsearch.search(index="logs-generic-default", sort="_seq_no") - assert res["hits"]["total"] == {"value": 5, "relation": "eq"} - - logstash_message = self.logstash.get_messages(expected=1) - assert len(logstash_message) == 1 - # positions on res["hits"]["hits"] are skewed compared to logstash_message - # in elasticsearch we inserted the second event of each input before the first one - res["hits"]["hits"][4]["_source"]["tags"].remove("generic") - assert res["hits"]["hits"][4]["_source"]["aws"] == logstash_message[0]["aws"] - assert res["hits"]["hits"][4]["_source"]["cloud"] == logstash_message[0]["cloud"] - assert res["hits"]["hits"][4]["_source"]["log"] == logstash_message[0]["log"] - assert res["hits"]["hits"][4]["_source"]["message"] == logstash_message[0]["message"] - assert res["hits"]["hits"][4]["_source"]["tags"] == logstash_message[0]["tags"] - - ctx = ContextMock(remaining_time_in_millis=_OVER_COMPLETION_GRACE_PERIOD_2m) - - # implicit wait for the message to be back on the queue - time.sleep(35) - replayed_events, _ = _sqs_get_messages(self.sqs_client, os.environ["SQS_REPLAY_URL"], self.sqs_replay_queue_arn) - seventh_call = handler(replayed_events, ctx) # type:ignore - - assert seventh_call == "replayed" - - self.elasticsearch.refresh(index="logs-generic-default") - assert self.elasticsearch.count(index="logs-generic-default")["count"] == 8 - - self.elasticsearch.refresh(index="logs-generic-default") - res = self.elasticsearch.search(index="logs-generic-default", sort="_seq_no") - - assert res["hits"]["total"] == {"value": 8, "relation": "eq"} - - assert res["hits"]["hits"][5]["_source"]["message"] == fixtures[0].rstrip("\n") - assert res["hits"]["hits"][5]["_source"]["log"]["offset"] == 0 - assert res["hits"]["hits"][5]["_source"]["log"]["file"]["path"] == sqs_queue_url_path - assert res["hits"]["hits"][5]["_source"]["aws"]["sqs"]["name"] == sqs_queue_name - assert res["hits"]["hits"][5]["_source"]["aws"]["sqs"]["message_id"] == message_id - assert res["hits"]["hits"][5]["_source"]["cloud"]["provider"] == "aws" - assert res["hits"]["hits"][5]["_source"]["cloud"]["region"] == "us-east-1" - assert res["hits"]["hits"][5]["_source"]["cloud"]["account"]["id"] == "000000000000" - assert res["hits"]["hits"][5]["_source"]["tags"] == ["forwarded", "generic", "tag1", "tag2", "tag3"] - - assert res["hits"]["hits"][6]["_source"]["message"] == fixtures[0].rstrip("\n") - assert res["hits"]["hits"][6]["_source"]["log"]["offset"] == 0 - assert ( - res["hits"]["hits"][6]["_source"]["log"]["file"]["path"] - == f"{cloudwatch_group_name}/{cloudwatch_stream_name}" - ) - assert res["hits"]["hits"][6]["_source"]["aws"]["cloudwatch"]["log_group"] == cloudwatch_group_name - assert res["hits"]["hits"][6]["_source"]["aws"]["cloudwatch"]["log_stream"] == cloudwatch_stream_name - assert res["hits"]["hits"][6]["_source"]["aws"]["cloudwatch"]["event_id"] == event_ids_cloudwatch_logs[0] - assert res["hits"]["hits"][6]["_source"]["cloud"]["provider"] == "aws" - assert res["hits"]["hits"][6]["_source"]["cloud"]["region"] == "us-east-1" - assert res["hits"]["hits"][6]["_source"]["cloud"]["account"]["id"] == "000000000000" - assert res["hits"]["hits"][6]["_source"]["tags"] == ["forwarded", "generic", "tag1", "tag2", "tag3"] - - assert res["hits"]["hits"][7]["_source"]["message"] == fixtures[0].rstrip("\n") - assert res["hits"]["hits"][7]["_source"]["log"]["offset"] == 0 - assert res["hits"]["hits"][7]["_source"]["log"]["file"]["path"] == kinesis_stream_arn - assert res["hits"]["hits"][7]["_source"]["aws"]["kinesis"]["type"] == "stream" - assert res["hits"]["hits"][7]["_source"]["aws"]["kinesis"]["partition_key"] == "PartitionKey" - assert res["hits"]["hits"][7]["_source"]["aws"]["kinesis"]["name"] == kinesis_stream_name - assert ( - res["hits"]["hits"][7]["_source"]["aws"]["kinesis"]["sequence_number"] - == events_kinesis["Records"][0]["kinesis"]["sequenceNumber"] - ) - assert res["hits"]["hits"][7]["_source"]["cloud"]["provider"] == "aws" - assert res["hits"]["hits"][7]["_source"]["cloud"]["region"] == "us-east-1" - assert res["hits"]["hits"][7]["_source"]["cloud"]["account"]["id"] == "000000000000" - assert res["hits"]["hits"][7]["_source"]["tags"] == ["forwarded", "generic", "tag1", "tag2", "tag3"] - - logstash_message = self.logstash.get_messages(expected=8) - assert len(logstash_message) == 8 - res["hits"]["hits"][0]["_source"]["tags"].remove("generic") - res["hits"]["hits"][1]["_source"]["tags"].remove("generic") - res["hits"]["hits"][2]["_source"]["tags"].remove("generic") - res["hits"]["hits"][3]["_source"]["tags"].remove("generic") - res["hits"]["hits"][5]["_source"]["tags"].remove("generic") - res["hits"]["hits"][6]["_source"]["tags"].remove("generic") - res["hits"]["hits"][7]["_source"]["tags"].remove("generic") - - # positions on res["hits"]["hits"] are skewed compared to logstash_message - # in elasticsearch we inserted the second event of each input before the first one - assert res["hits"]["hits"][0]["_source"]["aws"] == logstash_message[1]["aws"] - assert res["hits"]["hits"][0]["_source"]["cloud"] == logstash_message[1]["cloud"] - assert res["hits"]["hits"][0]["_source"]["log"] == logstash_message[1]["log"] - assert res["hits"]["hits"][0]["_source"]["message"] == logstash_message[1]["message"] - assert res["hits"]["hits"][0]["_source"]["tags"] == logstash_message[1]["tags"] - - assert res["hits"]["hits"][5]["_source"]["aws"] == logstash_message[2]["aws"] - assert res["hits"]["hits"][5]["_source"]["cloud"] == logstash_message[2]["cloud"] - assert res["hits"]["hits"][5]["_source"]["log"] == logstash_message[2]["log"] - assert res["hits"]["hits"][5]["_source"]["message"] == logstash_message[2]["message"] - assert res["hits"]["hits"][5]["_source"]["tags"] == logstash_message[2]["tags"] - - assert res["hits"]["hits"][1]["_source"]["aws"] == logstash_message[3]["aws"] - assert res["hits"]["hits"][1]["_source"]["cloud"] == logstash_message[3]["cloud"] - assert res["hits"]["hits"][1]["_source"]["log"] == logstash_message[3]["log"] - assert res["hits"]["hits"][1]["_source"]["message"] == logstash_message[3]["message"] - assert res["hits"]["hits"][1]["_source"]["tags"] == logstash_message[3]["tags"] - - assert res["hits"]["hits"][6]["_source"]["aws"] == logstash_message[4]["aws"] - assert res["hits"]["hits"][6]["_source"]["cloud"] == logstash_message[4]["cloud"] - assert res["hits"]["hits"][6]["_source"]["log"] == logstash_message[4]["log"] - assert res["hits"]["hits"][6]["_source"]["message"] == logstash_message[4]["message"] - assert res["hits"]["hits"][6]["_source"]["tags"] == logstash_message[4]["tags"] - - assert res["hits"]["hits"][2]["_source"]["aws"] == logstash_message[5]["aws"] - assert res["hits"]["hits"][2]["_source"]["cloud"] == logstash_message[5]["cloud"] - assert res["hits"]["hits"][2]["_source"]["log"] == logstash_message[5]["log"] - assert res["hits"]["hits"][2]["_source"]["message"] == logstash_message[5]["message"] - assert res["hits"]["hits"][2]["_source"]["tags"] == logstash_message[5]["tags"] - - assert res["hits"]["hits"][7]["_source"]["aws"] == logstash_message[6]["aws"] - assert res["hits"]["hits"][7]["_source"]["cloud"] == logstash_message[6]["cloud"] - assert res["hits"]["hits"][7]["_source"]["log"] == logstash_message[6]["log"] - assert res["hits"]["hits"][7]["_source"]["message"] == logstash_message[6]["message"] - assert res["hits"]["hits"][7]["_source"]["tags"] == logstash_message[6]["tags"] - - assert res["hits"]["hits"][3]["_source"]["aws"] == logstash_message[7]["aws"] - assert res["hits"]["hits"][3]["_source"]["cloud"] == logstash_message[7]["cloud"] - assert res["hits"]["hits"][3]["_source"]["log"] == logstash_message[7]["log"] - assert res["hits"]["hits"][3]["_source"]["message"] == logstash_message[7]["message"] - assert res["hits"]["hits"][3]["_source"]["tags"] == logstash_message[7]["tags"] - - def test_empty(self) -> None: - assert isinstance(self.elasticsearch, ElasticsearchContainer) - assert isinstance(self.logstash, LogstashContainer) - assert isinstance(self.localstack, LocalStackContainer) - - fixtures = [" \n"] # once stripped it is an empty event - - s3_bucket_name = _time_based_id(suffix="test-bucket") - first_filename = "exportedlog/uuid/yyyy-mm-dd-[$LATEST]hash/000000.gz" - _s3_upload_content_to_bucket( - client=self.s3_client, - content=gzip.compress("".join(fixtures).encode("utf-8")), - content_type="application/x-gzip", - bucket_name=s3_bucket_name, - key=first_filename, - ) - - cloudwatch_group_name = _time_based_id(suffix="source-group") - cloudwatch_group = _logs_create_cloudwatch_logs_group(self.logs_client, group_name=cloudwatch_group_name) - - cloudwatch_stream_name = _time_based_id(suffix="source-stream") - _logs_create_cloudwatch_logs_stream( - self.logs_client, group_name=cloudwatch_group_name, stream_name=cloudwatch_stream_name - ) - - _logs_upload_event_to_cloudwatch_logs( - self.logs_client, - group_name=cloudwatch_group_name, - stream_name=cloudwatch_stream_name, - messages_body=["".join(fixtures)], - ) - - cloudwatch_group_arn = cloudwatch_group["arn"] - - cloudwatch_group_name = cloudwatch_group_name - cloudwatch_stream_name = cloudwatch_stream_name - - sqs_queue_name = _time_based_id(suffix="source-sqs") - s3_sqs_queue_name = _time_based_id(suffix="source-s3-sqs") - - sqs_queue = _sqs_create_queue(self.sqs_client, sqs_queue_name, self.localstack.get_url()) - s3_sqs_queue = _sqs_create_queue(self.sqs_client, s3_sqs_queue_name, self.localstack.get_url()) - - sqs_queue_arn = sqs_queue["QueueArn"] - sqs_queue_url = sqs_queue["QueueUrl"] - - s3_sqs_queue_arn = s3_sqs_queue["QueueArn"] - s3_sqs_queue_url = s3_sqs_queue["QueueUrl"] - - _sqs_send_messages(self.sqs_client, sqs_queue_url, "".join(fixtures)) - _sqs_send_s3_notifications(self.sqs_client, s3_sqs_queue_url, s3_bucket_name, [first_filename]) - - kinesis_stream_name = _time_based_id(suffix="source-kinesis") - kinesis_stream = _kinesis_create_stream(self.kinesis_client, kinesis_stream_name) - kinesis_stream_arn = kinesis_stream["StreamDescription"]["StreamARN"] - - _kinesis_put_records(self.kinesis_client, kinesis_stream_name, ["".join(fixtures)]) - - config_yaml: str = f""" - inputs: - - type: "kinesis-data-stream" - id: "{kinesis_stream_arn}" - outputs: {self.default_outputs} - - type: "cloudwatch-logs" - id: "{cloudwatch_group_arn}" - outputs: {self.default_outputs} - - type: sqs - id: "{sqs_queue_arn}" - outputs: {self.default_outputs} - - type: s3-sqs - id: "{s3_sqs_queue_arn}" - outputs: {self.default_outputs} - """ - - config_file_path = "config.yaml" - config_bucket_name = _time_based_id(suffix="config-bucket") - _s3_upload_content_to_bucket( - client=self.s3_client, - content=config_yaml.encode("utf-8"), - content_type="text/plain", - bucket_name=config_bucket_name, - key=config_file_path, - ) - - os.environ["S3_CONFIG_FILE"] = f"s3://{config_bucket_name}/{config_file_path}" - - events_s3, _ = _sqs_get_messages(self.sqs_client, s3_sqs_queue_url, s3_sqs_queue_arn) - - events_sqs, _ = _sqs_get_messages(self.sqs_client, sqs_queue_url, sqs_queue_arn) - - events_cloudwatch_logs, _, _ = _logs_retrieve_event_from_cloudwatch_logs( - self.logs_client, cloudwatch_group_name, cloudwatch_stream_name - ) - - events_kinesis, _ = _kinesis_retrieve_event_from_kinesis_stream( - self.kinesis_client, kinesis_stream_name, kinesis_stream_arn - ) - - ctx = ContextMock(remaining_time_in_millis=_OVER_COMPLETION_GRACE_PERIOD_2m) - first_call = handler(events_s3, ctx) # type:ignore - - assert first_call == "completed" - - self.elasticsearch.refresh(index="logs-generic-default", ignore_unavailable=True) - assert self.elasticsearch.count(index="logs-generic-default", ignore_unavailable=True)["count"] == 0 - - logstash_message = self.logstash.get_messages(expected=0) - assert len(logstash_message) == 0 - - second_call = handler(events_sqs, ctx) # type:ignore - - assert second_call == "completed" - - self.elasticsearch.refresh(index="logs-generic-default", ignore_unavailable=True) - assert self.elasticsearch.count(index="logs-generic-default", ignore_unavailable=True)["count"] == 0 - - logstash_message = self.logstash.get_messages(expected=0) - assert len(logstash_message) == 0 - - third_call = handler(events_cloudwatch_logs, ctx) # type:ignore - - assert third_call == "completed" - - self.elasticsearch.refresh(index="logs-generic-default", ignore_unavailable=True) - assert self.elasticsearch.count(index="logs-generic-default", ignore_unavailable=True)["count"] == 0 - - logstash_message = self.logstash.get_messages(expected=0) - assert len(logstash_message) == 0 - - fourth_call = handler(events_kinesis, ctx) # type:ignore - - assert fourth_call == "completed" - - self.elasticsearch.refresh(index="logs-generic-default", ignore_unavailable=True) - assert self.elasticsearch.count(index="logs-generic-default", ignore_unavailable=True)["count"] == 0 - - logstash_message = self.logstash.get_messages(expected=0) - assert len(logstash_message) == 0 - - def test_filtered(self) -> None: - assert isinstance(self.elasticsearch, ElasticsearchContainer) - assert isinstance(self.logstash, LogstashContainer) - assert isinstance(self.localstack, LocalStackContainer) - - fixtures = ["excluded"] - - s3_bucket_name = _time_based_id(suffix="test-bucket") - first_filename = "exportedlog/uuid/yyyy-mm-dd-[$LATEST]hash/000000.gz" - _s3_upload_content_to_bucket( - client=self.s3_client, - content=gzip.compress("".join(fixtures).encode("utf-8")), - content_type="application/x-gzip", - bucket_name=s3_bucket_name, - key=first_filename, - ) - - cloudwatch_group_name = _time_based_id(suffix="source-group") - cloudwatch_group = _logs_create_cloudwatch_logs_group(self.logs_client, group_name=cloudwatch_group_name) - - cloudwatch_stream_name = _time_based_id(suffix="source-stream") - _logs_create_cloudwatch_logs_stream( - self.logs_client, group_name=cloudwatch_group_name, stream_name=cloudwatch_stream_name - ) - - _logs_upload_event_to_cloudwatch_logs( - self.logs_client, - group_name=cloudwatch_group_name, - stream_name=cloudwatch_stream_name, - messages_body=["".join(fixtures)], - ) - - cloudwatch_group_arn = cloudwatch_group["arn"] - - cloudwatch_group_name = cloudwatch_group_name - cloudwatch_stream_name = cloudwatch_stream_name - - sqs_queue_name = _time_based_id(suffix="source-sqs") - s3_sqs_queue_name = _time_based_id(suffix="source-s3-sqs") - - sqs_queue = _sqs_create_queue(self.sqs_client, sqs_queue_name, self.localstack.get_url()) - s3_sqs_queue = _sqs_create_queue(self.sqs_client, s3_sqs_queue_name, self.localstack.get_url()) - - sqs_queue_arn = sqs_queue["QueueArn"] - sqs_queue_url = sqs_queue["QueueUrl"] - - s3_sqs_queue_arn = s3_sqs_queue["QueueArn"] - s3_sqs_queue_url = s3_sqs_queue["QueueUrl"] - - _sqs_send_messages(self.sqs_client, sqs_queue_url, "".join(fixtures)) - _sqs_send_s3_notifications(self.sqs_client, s3_sqs_queue_url, s3_bucket_name, [first_filename]) - - kinesis_stream_name = _time_based_id(suffix="source-kinesis") - kinesis_stream = _kinesis_create_stream(self.kinesis_client, kinesis_stream_name) - kinesis_stream_arn = kinesis_stream["StreamDescription"]["StreamARN"] - - _kinesis_put_records(self.kinesis_client, kinesis_stream_name, ["".join(fixtures)]) - - config_yaml: str = f""" - inputs: - - type: "kinesis-data-stream" - id: "{kinesis_stream_arn}" - exclude: - - "excluded" - outputs: {self.default_outputs} - - type: "cloudwatch-logs" - id: "{cloudwatch_group_arn}" - exclude: - - "excluded" - outputs: {self.default_outputs} - - type: sqs - id: "{sqs_queue_arn}" - exclude: - - "excluded" - outputs: {self.default_outputs} - - type: s3-sqs - id: "{s3_sqs_queue_arn}" - exclude: - - "excluded" - outputs: {self.default_outputs} - """ - - config_file_path = "config.yaml" - config_bucket_name = _time_based_id(suffix="config-bucket") - _s3_upload_content_to_bucket( - client=self.s3_client, - content=config_yaml.encode("utf-8"), - content_type="text/plain", - bucket_name=config_bucket_name, - key=config_file_path, - ) - - os.environ["S3_CONFIG_FILE"] = f"s3://{config_bucket_name}/{config_file_path}" - - events_s3, _ = _sqs_get_messages(self.sqs_client, s3_sqs_queue_url, s3_sqs_queue_arn) - - events_sqs, _ = _sqs_get_messages(self.sqs_client, sqs_queue_url, sqs_queue_arn) - - events_cloudwatch_logs, _, _ = _logs_retrieve_event_from_cloudwatch_logs( - self.logs_client, cloudwatch_group_name, cloudwatch_stream_name - ) - - events_kinesis, _ = _kinesis_retrieve_event_from_kinesis_stream( - self.kinesis_client, kinesis_stream_name, kinesis_stream_arn - ) - - ctx = ContextMock(remaining_time_in_millis=_OVER_COMPLETION_GRACE_PERIOD_2m) - first_call = handler(events_s3, ctx) # type:ignore - - assert first_call == "completed" - - self.elasticsearch.refresh(index="logs-generic-default", ignore_unavailable=True) - assert self.elasticsearch.count(index="logs-generic-default", ignore_unavailable=True)["count"] == 0 - - logstash_message = self.logstash.get_messages(expected=0) - assert len(logstash_message) == 0 - - second_call = handler(events_sqs, ctx) # type:ignore - - assert second_call == "completed" - - self.elasticsearch.refresh(index="logs-generic-default", ignore_unavailable=True) - assert self.elasticsearch.count(index="logs-generic-default", ignore_unavailable=True)["count"] == 0 - - logstash_message = self.logstash.get_messages(expected=0) - assert len(logstash_message) == 0 - - third_call = handler(events_cloudwatch_logs, ctx) # type:ignore - - assert third_call == "completed" - - self.elasticsearch.refresh(index="logs-generic-default", ignore_unavailable=True) - assert self.elasticsearch.count(index="logs-generic-default", ignore_unavailable=True)["count"] == 0 - - logstash_message = self.logstash.get_messages(expected=0) - assert len(logstash_message) == 0 - - fourth_call = handler(events_kinesis, ctx) # type:ignore - - assert fourth_call == "completed" - - self.elasticsearch.refresh(index="logs-generic-default", ignore_unavailable=True) - assert self.elasticsearch.count(index="logs-generic-default", ignore_unavailable=True)["count"] == 0 - - logstash_message = self.logstash.get_messages(expected=0) - assert len(logstash_message) == 0 - - def test_expand_event_from_list_empty_line(self) -> None: - assert isinstance(self.logstash, LogstashContainer) - assert isinstance(self.localstack, LocalStackContainer) - - first_expanded_event: str = _load_file_fixture("cloudwatch-log-1.json") - second_expanded_event: str = _load_file_fixture("cloudwatch-log-2.json") - third_expanded_event: str = _load_file_fixture("cloudwatch-log-3.json") - - fixtures = [ - f"""{{"aField": [{first_expanded_event},{second_expanded_event}]}}\n""" - f"""\n{{"aField": [{third_expanded_event}]}}""" - ] - - sqs_queue_name = _time_based_id(suffix="source-sqs") - - sqs_queue = _sqs_create_queue(self.sqs_client, sqs_queue_name, self.localstack.get_url()) - - sqs_queue_arn = sqs_queue["QueueArn"] - sqs_queue_url = sqs_queue["QueueUrl"] - sqs_queue_url_path = sqs_queue["QueueUrlPath"] - - _sqs_send_messages(self.sqs_client, sqs_queue_url, "".join(fixtures)) - - config_yaml: str = f""" - inputs: - - type: "sqs" - id: "{sqs_queue_arn}" - expand_event_list_from_field: aField - tags: {self.default_tags} - outputs: - - type: "logstash" - args: - logstash_url: "{self.logstash.get_url()}" - ssl_assert_fingerprint: {self.logstash.ssl_assert_fingerprint} - username: "{self.logstash.logstash_user}" - password: "{self.logstash.logstash_password}" - """ - - config_file_path = "config.yaml" - config_bucket_name = _time_based_id(suffix="config-bucket") - _s3_upload_content_to_bucket( - client=self.s3_client, - content=config_yaml.encode("utf-8"), - content_type="text/plain", - bucket_name=config_bucket_name, - key=config_file_path, - ) - - os.environ["S3_CONFIG_FILE"] = f"s3://{config_bucket_name}/{config_file_path}" - - events_sqs, _ = _sqs_get_messages(self.sqs_client, sqs_queue_url, sqs_queue_arn) - - message_id = events_sqs["Records"][0]["messageId"] - - ctx = ContextMock(remaining_time_in_millis=_OVER_COMPLETION_GRACE_PERIOD_2m) - - first_call = handler(events_sqs, ctx) # type:ignore - - assert first_call == "completed" - - logstash_message = self.logstash.get_messages(expected=3) - assert len(logstash_message) == 3 - - assert logstash_message[0]["message"] == json_dumper(json_parser(first_expanded_event)) - assert logstash_message[0]["log"]["offset"] == 0 - assert logstash_message[0]["log"]["file"]["path"] == sqs_queue_url_path - assert logstash_message[0]["aws"]["sqs"]["name"] == sqs_queue_name - assert logstash_message[0]["aws"]["sqs"]["message_id"] == message_id - assert logstash_message[0]["cloud"]["provider"] == "aws" - assert logstash_message[0]["cloud"]["region"] == "us-east-1" - assert logstash_message[0]["cloud"]["account"]["id"] == "000000000000" - assert logstash_message[0]["tags"] == ["forwarded", "tag1", "tag2", "tag3"] - - assert logstash_message[1]["message"] == json_dumper(json_parser(second_expanded_event)) - assert logstash_message[1]["log"]["offset"] == 174 - assert logstash_message[1]["log"]["file"]["path"] == sqs_queue_url_path - assert logstash_message[1]["aws"]["sqs"]["name"] == sqs_queue_name - assert logstash_message[1]["aws"]["sqs"]["message_id"] == message_id - assert logstash_message[1]["cloud"]["provider"] == "aws" - assert logstash_message[1]["cloud"]["region"] == "us-east-1" - assert logstash_message[1]["cloud"]["account"]["id"] == "000000000000" - assert logstash_message[1]["tags"] == ["forwarded", "tag1", "tag2", "tag3"] - - assert logstash_message[2]["message"] == json_dumper(json_parser(third_expanded_event)) - assert logstash_message[2]["log"]["offset"] == 349 - assert logstash_message[2]["log"]["file"]["path"] == sqs_queue_url_path - assert logstash_message[2]["aws"]["sqs"]["name"] == sqs_queue_name - assert logstash_message[2]["aws"]["sqs"]["message_id"] == message_id - assert logstash_message[2]["cloud"]["provider"] == "aws" - assert logstash_message[2]["cloud"]["region"] == "us-east-1" - assert logstash_message[2]["cloud"]["account"]["id"] == "000000000000" - assert logstash_message[2]["tags"] == ["forwarded", "tag1", "tag2", "tag3"] - - def test_expand_event_from_list_empty_event_not_expanded(self) -> None: - assert isinstance(self.logstash, LogstashContainer) - assert isinstance(self.localstack, LocalStackContainer) - - first_expanded_event: str = _load_file_fixture("cloudwatch-log-1.json") - second_expanded_event: str = _load_file_fixture("cloudwatch-log-2.json") - - fixtures = [f"""{{"aField": [{first_expanded_event},"",{second_expanded_event}]}}"""] - - sqs_queue_name = _time_based_id(suffix="source-sqs") - - sqs_queue = _sqs_create_queue(self.sqs_client, sqs_queue_name, self.localstack.get_url()) - - sqs_queue_arn = sqs_queue["QueueArn"] - sqs_queue_url = sqs_queue["QueueUrl"] - sqs_queue_url_path = sqs_queue["QueueUrlPath"] - - _sqs_send_messages(self.sqs_client, sqs_queue_url, "".join(fixtures)) - - config_yaml: str = f""" - inputs: - - type: "sqs" - id: "{sqs_queue_arn}" - expand_event_list_from_field: aField - tags: {self.default_tags} - outputs: - - type: "logstash" - args: - logstash_url: "{self.logstash.get_url()}" - ssl_assert_fingerprint: {self.logstash.ssl_assert_fingerprint} - username: "{self.logstash.logstash_user}" - password: "{self.logstash.logstash_password}" - """ - - config_file_path = "config.yaml" - config_bucket_name = _time_based_id(suffix="config-bucket") - _s3_upload_content_to_bucket( - client=self.s3_client, - content=config_yaml.encode("utf-8"), - content_type="text/plain", - bucket_name=config_bucket_name, - key=config_file_path, - ) - - os.environ["S3_CONFIG_FILE"] = f"s3://{config_bucket_name}/{config_file_path}" - - events_sqs, _ = _sqs_get_messages(self.sqs_client, sqs_queue_url, sqs_queue_arn) - - message_id = events_sqs["Records"][0]["messageId"] - - ctx = ContextMock(remaining_time_in_millis=_OVER_COMPLETION_GRACE_PERIOD_2m) - - first_call = handler(events_sqs, ctx) # type:ignore - - assert first_call == "completed" - - logstash_message = self.logstash.get_messages(expected=2) - assert len(logstash_message) == 2 - - assert logstash_message[0]["message"] == json_dumper(json_parser(first_expanded_event)) - assert logstash_message[0]["log"]["offset"] == 0 - assert logstash_message[0]["log"]["file"]["path"] == sqs_queue_url_path - assert logstash_message[0]["aws"]["sqs"]["name"] == sqs_queue_name - assert logstash_message[0]["aws"]["sqs"]["message_id"] == message_id - assert logstash_message[0]["cloud"]["provider"] == "aws" - assert logstash_message[0]["cloud"]["region"] == "us-east-1" - assert logstash_message[0]["cloud"]["account"]["id"] == "000000000000" - assert logstash_message[0]["tags"] == ["forwarded", "tag1", "tag2", "tag3"] - - assert logstash_message[1]["message"] == json_dumper(json_parser(second_expanded_event)) - assert logstash_message[1]["log"]["offset"] == 233 - assert logstash_message[1]["log"]["file"]["path"] == sqs_queue_url_path - assert logstash_message[1]["aws"]["sqs"]["name"] == sqs_queue_name - assert logstash_message[1]["aws"]["sqs"]["message_id"] == message_id - assert logstash_message[1]["cloud"]["provider"] == "aws" - assert logstash_message[1]["cloud"]["region"] == "us-east-1" - assert logstash_message[1]["cloud"]["account"]["id"] == "000000000000" - assert logstash_message[1]["tags"] == ["forwarded", "tag1", "tag2", "tag3"] - - def test_root_fields_to_add_to_expanded_event_no_dict_event(self) -> None: - assert isinstance(self.logstash, LogstashContainer) - assert isinstance(self.localstack, LocalStackContainer) - - first_expanded_event: str = '"first_expanded_event"' - second_expanded_event: str = '"second_expanded_event"' - third_expanded_event: str = '"third_expanded_event"' - - fixtures = [ - f"""{{"firstRootField": "firstRootField", "secondRootField":"secondRootField", - "aField": [{first_expanded_event},{second_expanded_event},{third_expanded_event}]}}""" - ] - - sqs_queue_name = _time_based_id(suffix="source-sqs") - - sqs_queue = _sqs_create_queue(self.sqs_client, sqs_queue_name, self.localstack.get_url()) - - sqs_queue_arn = sqs_queue["QueueArn"] - sqs_queue_url = sqs_queue["QueueUrl"] - sqs_queue_url_path = sqs_queue["QueueUrlPath"] - - _sqs_send_messages(self.sqs_client, sqs_queue_url, "".join(fixtures)) - - config_yaml: str = f""" - inputs: - - type: "sqs" - id: "{sqs_queue_arn}" - expand_event_list_from_field: aField - root_fields_to_add_to_expanded_event: ["secondRootField"] - tags: {self.default_tags} - outputs: - - type: "logstash" - args: - logstash_url: "{self.logstash.get_url()}" - ssl_assert_fingerprint: {self.logstash.ssl_assert_fingerprint} - username: "{self.logstash.logstash_user}" - password: "{self.logstash.logstash_password}" - """ - - config_file_path = "config.yaml" - config_bucket_name = _time_based_id(suffix="config-bucket") - _s3_upload_content_to_bucket( - client=self.s3_client, - content=config_yaml.encode("utf-8"), - content_type="text/plain", - bucket_name=config_bucket_name, - key=config_file_path, - ) - - os.environ["S3_CONFIG_FILE"] = f"s3://{config_bucket_name}/{config_file_path}" - - events_sqs, _ = _sqs_get_messages(self.sqs_client, sqs_queue_url, sqs_queue_arn) - - message_id = events_sqs["Records"][0]["messageId"] - - ctx = ContextMock() - first_call = handler(events_sqs, ctx) # type:ignore - - assert first_call == "continuing" - - logstash_message = self.logstash.get_messages(expected=1) - assert len(logstash_message) == 1 - - assert logstash_message[0]["message"] == first_expanded_event - assert logstash_message[0]["log"]["offset"] == 0 - assert logstash_message[0]["log"]["file"]["path"] == sqs_queue_url_path - assert logstash_message[0]["aws"]["sqs"]["name"] == sqs_queue_name - assert logstash_message[0]["aws"]["sqs"]["message_id"] == message_id - assert logstash_message[0]["cloud"]["provider"] == "aws" - assert logstash_message[0]["cloud"]["region"] == "us-east-1" - assert logstash_message[0]["cloud"]["account"]["id"] == "000000000000" - assert logstash_message[0]["tags"] == ["forwarded", "tag1", "tag2", "tag3"] - - ctx = ContextMock(remaining_time_in_millis=_OVER_COMPLETION_GRACE_PERIOD_2m) - - continued_events, _ = _sqs_get_messages( - self.sqs_client, os.environ["SQS_CONTINUE_URL"], self.sqs_continue_queue_arn - ) - second_call = handler(continued_events, ctx) # type:ignore - - assert second_call == "completed" - - logstash_message = self.logstash.get_messages(expected=3) - assert len(logstash_message) == 3 - - assert logstash_message[1]["message"] == second_expanded_event - assert logstash_message[1]["log"]["offset"] == 56 - assert logstash_message[1]["log"]["file"]["path"] == sqs_queue_url_path - assert logstash_message[1]["aws"]["sqs"]["name"] == sqs_queue_name - assert logstash_message[1]["aws"]["sqs"]["message_id"] == message_id - assert logstash_message[1]["cloud"]["provider"] == "aws" - assert logstash_message[1]["cloud"]["region"] == "us-east-1" - assert logstash_message[1]["cloud"]["account"]["id"] == "000000000000" - assert logstash_message[1]["tags"] == ["forwarded", "tag1", "tag2", "tag3"] - - assert logstash_message[2]["message"] == third_expanded_event - assert logstash_message[2]["log"]["offset"] == 112 - assert logstash_message[2]["log"]["file"]["path"] == sqs_queue_url_path - assert logstash_message[2]["aws"]["sqs"]["name"] == sqs_queue_name - assert logstash_message[2]["aws"]["sqs"]["message_id"] == message_id - assert logstash_message[2]["cloud"]["provider"] == "aws" - assert logstash_message[2]["cloud"]["region"] == "us-east-1" - assert logstash_message[2]["cloud"]["account"]["id"] == "000000000000" - assert logstash_message[2]["tags"] == ["forwarded", "tag1", "tag2", "tag3"] - - def test_root_fields_to_add_to_expanded_event_event_not_expanded(self) -> None: - assert isinstance(self.logstash, LogstashContainer) - assert isinstance(self.localstack, LocalStackContainer) - - first_expanded_event: str = _load_file_fixture("cloudwatch-log-1.json") - first_expanded_with_root_fields: dict[str, Any] = json_parser(first_expanded_event) - first_expanded_with_root_fields["secondRootField"] = "secondRootField" - - second_expanded_event: str = _load_file_fixture("cloudwatch-log-3.json") - second_expanded_with_root_fields: dict[str, Any] = json_parser(second_expanded_event) - second_expanded_with_root_fields["secondRootField"] = "secondRootField" - - fixtures = [ - f"""{{"firstRootField": "firstRootField", "secondRootField":"secondRootField", - "aField": [{first_expanded_event},{{}},{second_expanded_event}]}}""" - ] - - sqs_queue_name = _time_based_id(suffix="source-sqs") - - sqs_queue = _sqs_create_queue(self.sqs_client, sqs_queue_name, self.localstack.get_url()) - - sqs_queue_arn = sqs_queue["QueueArn"] - sqs_queue_url = sqs_queue["QueueUrl"] - sqs_queue_url_path = sqs_queue["QueueUrlPath"] - - _sqs_send_messages(self.sqs_client, sqs_queue_url, "".join(fixtures)) - - config_yaml: str = f""" - inputs: - - type: "sqs" - id: "{sqs_queue_arn}" - expand_event_list_from_field: aField - root_fields_to_add_to_expanded_event: ["secondRootField"] - tags: {self.default_tags} - outputs: - - type: "logstash" - args: - logstash_url: "{self.logstash.get_url()}" - ssl_assert_fingerprint: {self.logstash.ssl_assert_fingerprint} - username: "{self.logstash.logstash_user}" - password: "{self.logstash.logstash_password}" - """ - - config_file_path = "config.yaml" - config_bucket_name = _time_based_id(suffix="config-bucket") - _s3_upload_content_to_bucket( - client=self.s3_client, - content=config_yaml.encode("utf-8"), - content_type="text/plain", - bucket_name=config_bucket_name, - key=config_file_path, - ) - - os.environ["S3_CONFIG_FILE"] = f"s3://{config_bucket_name}/{config_file_path}" - - events_sqs, _ = _sqs_get_messages(self.sqs_client, sqs_queue_url, sqs_queue_arn) - - message_id = events_sqs["Records"][0]["messageId"] - - ctx = ContextMock(remaining_time_in_millis=_OVER_COMPLETION_GRACE_PERIOD_2m) - - first_call = handler(events_sqs, ctx) # type:ignore - - assert first_call == "completed" - - logstash_message = self.logstash.get_messages(expected=2) - assert len(logstash_message) == 2 - - assert logstash_message[0]["message"] == json_dumper(first_expanded_with_root_fields) - assert logstash_message[0]["log"]["offset"] == 0 - assert logstash_message[0]["log"]["file"]["path"] == sqs_queue_url_path - assert logstash_message[0]["aws"]["sqs"]["name"] == sqs_queue_name - assert logstash_message[0]["aws"]["sqs"]["message_id"] == message_id - assert logstash_message[0]["cloud"]["provider"] == "aws" - assert logstash_message[0]["cloud"]["region"] == "us-east-1" - assert logstash_message[0]["cloud"]["account"]["id"] == "000000000000" - assert logstash_message[0]["tags"] == ["forwarded", "tag1", "tag2", "tag3"] - - assert logstash_message[1]["message"] == json_dumper(second_expanded_with_root_fields) - assert logstash_message[1]["log"]["offset"] == 180 - assert logstash_message[1]["log"]["file"]["path"] == sqs_queue_url_path - assert logstash_message[1]["aws"]["sqs"]["name"] == sqs_queue_name - assert logstash_message[1]["aws"]["sqs"]["message_id"] == message_id - assert logstash_message[1]["cloud"]["provider"] == "aws" - assert logstash_message[1]["cloud"]["region"] == "us-east-1" - assert logstash_message[1]["cloud"]["account"]["id"] == "000000000000" - assert logstash_message[1]["tags"] == ["forwarded", "tag1", "tag2", "tag3"] - - def test_root_fields_to_add_to_expanded_event_list(self) -> None: - assert isinstance(self.logstash, LogstashContainer) - assert isinstance(self.localstack, LocalStackContainer) - - first_expanded_event: str = _load_file_fixture("cloudwatch-log-1.json") - first_expanded_with_root_fields: dict[str, Any] = json_parser(first_expanded_event) - first_expanded_with_root_fields["secondRootField"] = "secondRootField" - - second_expanded_event: str = _load_file_fixture("cloudwatch-log-3.json") - second_expanded_with_root_fields: dict[str, Any] = json_parser(second_expanded_event) - second_expanded_with_root_fields["secondRootField"] = "secondRootField" - - third_expanded_event: str = _load_file_fixture("cloudwatch-log-3.json") - third_expanded_event_with_root_fields: dict[str, Any] = json_parser(third_expanded_event) - third_expanded_event_with_root_fields["secondRootField"] = "secondRootField" - - fixtures = [ - f"""{{"firstRootField": "firstRootField", "secondRootField":"secondRootField", - "aField": [{first_expanded_event},{second_expanded_event},{third_expanded_event}]}}""" - ] - - sqs_queue_name = _time_based_id(suffix="source-sqs") - - sqs_queue = _sqs_create_queue(self.sqs_client, sqs_queue_name, self.localstack.get_url()) - - sqs_queue_arn = sqs_queue["QueueArn"] - sqs_queue_url = sqs_queue["QueueUrl"] - sqs_queue_url_path = sqs_queue["QueueUrlPath"] - - _sqs_send_messages(self.sqs_client, sqs_queue_url, "".join(fixtures)) - - config_yaml: str = f""" - inputs: - - type: "sqs" - id: "{sqs_queue_arn}" - expand_event_list_from_field: aField - root_fields_to_add_to_expanded_event: ["secondRootField"] - tags: {self.default_tags} - outputs: - - type: "logstash" - args: - logstash_url: "{self.logstash.get_url()}" - ssl_assert_fingerprint: {self.logstash.ssl_assert_fingerprint} - username: "{self.logstash.logstash_user}" - password: "{self.logstash.logstash_password}" - """ - - config_file_path = "config.yaml" - config_bucket_name = _time_based_id(suffix="config-bucket") - _s3_upload_content_to_bucket( - client=self.s3_client, - content=config_yaml.encode("utf-8"), - content_type="text/plain", - bucket_name=config_bucket_name, - key=config_file_path, - ) - - os.environ["S3_CONFIG_FILE"] = f"s3://{config_bucket_name}/{config_file_path}" - - events_sqs, _ = _sqs_get_messages(self.sqs_client, sqs_queue_url, sqs_queue_arn) - - message_id = events_sqs["Records"][0]["messageId"] - - ctx = ContextMock() - first_call = handler(events_sqs, ctx) # type:ignore - - assert first_call == "continuing" - - logstash_message = self.logstash.get_messages(expected=1) - assert len(logstash_message) == 1 - - assert logstash_message[0]["message"] == json_dumper(first_expanded_with_root_fields) - assert logstash_message[0]["log"]["offset"] == 0 - assert logstash_message[0]["log"]["file"]["path"] == sqs_queue_url_path - assert logstash_message[0]["aws"]["sqs"]["name"] == sqs_queue_name - assert logstash_message[0]["aws"]["sqs"]["message_id"] == message_id - assert logstash_message[0]["cloud"]["provider"] == "aws" - assert logstash_message[0]["cloud"]["region"] == "us-east-1" - assert logstash_message[0]["cloud"]["account"]["id"] == "000000000000" - assert logstash_message[0]["tags"] == ["forwarded", "tag1", "tag2", "tag3"] - - ctx = ContextMock(remaining_time_in_millis=_OVER_COMPLETION_GRACE_PERIOD_2m) - - continued_events, _ = _sqs_get_messages( - self.sqs_client, os.environ["SQS_CONTINUE_URL"], self.sqs_continue_queue_arn - ) - second_call = handler(continued_events, ctx) # type:ignore - - assert second_call == "completed" - - logstash_message = self.logstash.get_messages(expected=3) - assert len(logstash_message) == 3 - - assert logstash_message[1]["message"] == json_dumper(second_expanded_with_root_fields) - assert logstash_message[1]["log"]["offset"] == 114 - assert logstash_message[1]["log"]["file"]["path"] == sqs_queue_url_path - assert logstash_message[1]["aws"]["sqs"]["name"] == sqs_queue_name - assert logstash_message[1]["aws"]["sqs"]["message_id"] == message_id - assert logstash_message[1]["cloud"]["provider"] == "aws" - assert logstash_message[1]["cloud"]["region"] == "us-east-1" - assert logstash_message[1]["cloud"]["account"]["id"] == "000000000000" - assert logstash_message[1]["tags"] == ["forwarded", "tag1", "tag2", "tag3"] - - assert logstash_message[2]["message"] == json_dumper(third_expanded_event_with_root_fields) - assert logstash_message[2]["log"]["offset"] == 228 - assert logstash_message[2]["log"]["file"]["path"] == sqs_queue_url_path - assert logstash_message[2]["aws"]["sqs"]["name"] == sqs_queue_name - assert logstash_message[2]["aws"]["sqs"]["message_id"] == message_id - assert logstash_message[2]["cloud"]["provider"] == "aws" - assert logstash_message[2]["cloud"]["region"] == "us-east-1" - assert logstash_message[2]["cloud"]["account"]["id"] == "000000000000" - assert logstash_message[2]["tags"] == ["forwarded", "tag1", "tag2", "tag3"] - - def test_root_fields_to_add_to_expanded_event_list_no_fields_in_root(self) -> None: - assert isinstance(self.logstash, LogstashContainer) - assert isinstance(self.localstack, LocalStackContainer) - - first_expanded_event: str = _load_file_fixture("cloudwatch-log-1.json") - first_expanded_with_root_fields: dict[str, Any] = json_parser(first_expanded_event) - first_expanded_with_root_fields["secondRootField"] = "secondRootField" - - second_expanded_event: str = _load_file_fixture("cloudwatch-log-3.json") - second_expanded_with_root_fields: dict[str, Any] = json_parser(second_expanded_event) - second_expanded_with_root_fields["secondRootField"] = "secondRootField" - - third_expanded_event: str = _load_file_fixture("cloudwatch-log-3.json") - third_expanded_event_with_root_fields: dict[str, Any] = json_parser(third_expanded_event) - third_expanded_event_with_root_fields["secondRootField"] = "secondRootField" - - fixtures = [ - f"""{{"firstRootField": "firstRootField", "secondRootField":"secondRootField", - "aField": [{first_expanded_event},{second_expanded_event},{third_expanded_event}]}}""" - ] - - sqs_queue_name = _time_based_id(suffix="source-sqs") - - sqs_queue = _sqs_create_queue(self.sqs_client, sqs_queue_name, self.localstack.get_url()) - - sqs_queue_arn = sqs_queue["QueueArn"] - sqs_queue_url = sqs_queue["QueueUrl"] - sqs_queue_url_path = sqs_queue["QueueUrlPath"] - - _sqs_send_messages(self.sqs_client, sqs_queue_url, "".join(fixtures)) - - config_yaml: str = f""" - inputs: - - type: "sqs" - id: "{sqs_queue_arn}" - expand_event_list_from_field: aField - root_fields_to_add_to_expanded_event: ["secondRootField", "thirdRootField"] - tags: {self.default_tags} - outputs: - - type: "logstash" - args: - logstash_url: "{self.logstash.get_url()}" - ssl_assert_fingerprint: {self.logstash.ssl_assert_fingerprint} - username: "{self.logstash.logstash_user}" - password: "{self.logstash.logstash_password}" - """ - - config_file_path = "config.yaml" - config_bucket_name = _time_based_id(suffix="config-bucket") - _s3_upload_content_to_bucket( - client=self.s3_client, - content=config_yaml.encode("utf-8"), - content_type="text/plain", - bucket_name=config_bucket_name, - key=config_file_path, - ) - - os.environ["S3_CONFIG_FILE"] = f"s3://{config_bucket_name}/{config_file_path}" - - events_sqs, _ = _sqs_get_messages(self.sqs_client, sqs_queue_url, sqs_queue_arn) - - message_id = events_sqs["Records"][0]["messageId"] - - ctx = ContextMock() - first_call = handler(events_sqs, ctx) # type:ignore - - assert first_call == "continuing" - - logstash_message = self.logstash.get_messages(expected=1) - assert len(logstash_message) == 1 - - assert logstash_message[0]["message"] == json_dumper(first_expanded_with_root_fields) - assert logstash_message[0]["log"]["offset"] == 0 - assert logstash_message[0]["log"]["file"]["path"] == sqs_queue_url_path - assert logstash_message[0]["aws"]["sqs"]["name"] == sqs_queue_name - assert logstash_message[0]["aws"]["sqs"]["message_id"] == message_id - assert logstash_message[0]["cloud"]["provider"] == "aws" - assert logstash_message[0]["cloud"]["region"] == "us-east-1" - assert logstash_message[0]["cloud"]["account"]["id"] == "000000000000" - assert logstash_message[0]["tags"] == ["forwarded", "tag1", "tag2", "tag3"] - - ctx = ContextMock(remaining_time_in_millis=_OVER_COMPLETION_GRACE_PERIOD_2m) - - continued_events, _ = _sqs_get_messages( - self.sqs_client, os.environ["SQS_CONTINUE_URL"], self.sqs_continue_queue_arn - ) - second_call = handler(continued_events, ctx) # type:ignore - - assert second_call == "completed" - - logstash_message = self.logstash.get_messages(expected=3) - assert len(logstash_message) == 3 - - assert logstash_message[1]["message"] == json_dumper(second_expanded_with_root_fields) - assert logstash_message[1]["log"]["offset"] == 114 - assert logstash_message[1]["log"]["file"]["path"] == sqs_queue_url_path - assert logstash_message[1]["aws"]["sqs"]["name"] == sqs_queue_name - assert logstash_message[1]["aws"]["sqs"]["message_id"] == message_id - assert logstash_message[1]["cloud"]["provider"] == "aws" - assert logstash_message[1]["cloud"]["region"] == "us-east-1" - assert logstash_message[1]["cloud"]["account"]["id"] == "000000000000" - assert logstash_message[1]["tags"] == ["forwarded", "tag1", "tag2", "tag3"] - - assert logstash_message[2]["message"] == json_dumper(third_expanded_event_with_root_fields) - assert logstash_message[2]["log"]["offset"] == 228 - assert logstash_message[2]["log"]["file"]["path"] == sqs_queue_url_path - assert logstash_message[2]["aws"]["sqs"]["name"] == sqs_queue_name - assert logstash_message[2]["aws"]["sqs"]["message_id"] == message_id - assert logstash_message[2]["cloud"]["provider"] == "aws" - assert logstash_message[2]["cloud"]["region"] == "us-east-1" - assert logstash_message[2]["cloud"]["account"]["id"] == "000000000000" - assert logstash_message[2]["tags"] == ["forwarded", "tag1", "tag2", "tag3"] - - def test_root_fields_to_add_to_expanded_event_all(self) -> None: - assert isinstance(self.logstash, LogstashContainer) - assert isinstance(self.localstack, LocalStackContainer) - - first_expanded_event: str = _load_file_fixture("cloudwatch-log-1.json") - first_expanded_with_root_fields: dict[str, Any] = json_parser(first_expanded_event) - first_expanded_with_root_fields["firstRootField"] = "firstRootField" - first_expanded_with_root_fields["secondRootField"] = "secondRootField" - - second_expanded_event: str = _load_file_fixture("cloudwatch-log-3.json") - second_expanded_with_root_fields: dict[str, Any] = json_parser(second_expanded_event) - second_expanded_with_root_fields["firstRootField"] = "firstRootField" - second_expanded_with_root_fields["secondRootField"] = "secondRootField" - - third_expanded_event: str = _load_file_fixture("cloudwatch-log-3.json") - third_expanded_event_with_root_fields: dict[str, Any] = json_parser(third_expanded_event) - third_expanded_event_with_root_fields["firstRootField"] = "firstRootField" - third_expanded_event_with_root_fields["secondRootField"] = "secondRootField" - - fixtures = [ - f"""{{"firstRootField": "firstRootField", "secondRootField":"secondRootField", - "aField": [{first_expanded_event},{second_expanded_event},{third_expanded_event}]}}""" - ] - - sqs_queue_name = _time_based_id(suffix="source-sqs") - - sqs_queue = _sqs_create_queue(self.sqs_client, sqs_queue_name, self.localstack.get_url()) - - sqs_queue_arn = sqs_queue["QueueArn"] - sqs_queue_url = sqs_queue["QueueUrl"] - sqs_queue_url_path = sqs_queue["QueueUrlPath"] - - _sqs_send_messages(self.sqs_client, sqs_queue_url, "".join(fixtures)) - - config_yaml: str = f""" - inputs: - - type: "sqs" - id: "{sqs_queue_arn}" - expand_event_list_from_field: aField - root_fields_to_add_to_expanded_event: all - tags: {self.default_tags} - outputs: - - type: "logstash" - args: - logstash_url: "{self.logstash.get_url()}" - ssl_assert_fingerprint: {self.logstash.ssl_assert_fingerprint} - username: "{self.logstash.logstash_user}" - password: "{self.logstash.logstash_password}" - """ - - config_file_path = "config.yaml" - config_bucket_name = _time_based_id(suffix="config-bucket") - _s3_upload_content_to_bucket( - client=self.s3_client, - content=config_yaml.encode("utf-8"), - content_type="text/plain", - bucket_name=config_bucket_name, - key=config_file_path, - ) - - os.environ["S3_CONFIG_FILE"] = f"s3://{config_bucket_name}/{config_file_path}" - - events_sqs, _ = _sqs_get_messages(self.sqs_client, sqs_queue_url, sqs_queue_arn) - - message_id = events_sqs["Records"][0]["messageId"] - - ctx = ContextMock() - first_call = handler(events_sqs, ctx) # type:ignore - - assert first_call == "continuing" - - logstash_message = self.logstash.get_messages(expected=1) - assert len(logstash_message) == 1 - - assert logstash_message[0]["message"] == json_dumper(first_expanded_with_root_fields) - assert logstash_message[0]["log"]["offset"] == 0 - assert logstash_message[0]["log"]["file"]["path"] == sqs_queue_url_path - assert logstash_message[0]["aws"]["sqs"]["name"] == sqs_queue_name - assert logstash_message[0]["aws"]["sqs"]["message_id"] == message_id - assert logstash_message[0]["cloud"]["provider"] == "aws" - assert logstash_message[0]["cloud"]["region"] == "us-east-1" - assert logstash_message[0]["cloud"]["account"]["id"] == "000000000000" - assert logstash_message[0]["tags"] == ["forwarded", "tag1", "tag2", "tag3"] - - ctx = ContextMock(remaining_time_in_millis=_OVER_COMPLETION_GRACE_PERIOD_2m) - - continued_events, _ = _sqs_get_messages( - self.sqs_client, os.environ["SQS_CONTINUE_URL"], self.sqs_continue_queue_arn - ) - second_call = handler(continued_events, ctx) # type:ignore - - assert second_call == "completed" - - logstash_message = self.logstash.get_messages(expected=3) - assert len(logstash_message) == 3 - - assert logstash_message[1]["message"] == json_dumper(second_expanded_with_root_fields) - assert logstash_message[1]["log"]["offset"] == 114 - assert logstash_message[1]["log"]["file"]["path"] == sqs_queue_url_path - assert logstash_message[1]["aws"]["sqs"]["name"] == sqs_queue_name - assert logstash_message[1]["aws"]["sqs"]["message_id"] == message_id - assert logstash_message[1]["cloud"]["provider"] == "aws" - assert logstash_message[1]["cloud"]["region"] == "us-east-1" - assert logstash_message[1]["cloud"]["account"]["id"] == "000000000000" - assert logstash_message[1]["tags"] == ["forwarded", "tag1", "tag2", "tag3"] - - assert logstash_message[2]["message"] == json_dumper(third_expanded_event_with_root_fields) - assert logstash_message[2]["log"]["offset"] == 228 - assert logstash_message[2]["log"]["file"]["path"] == sqs_queue_url_path - assert logstash_message[2]["aws"]["sqs"]["name"] == sqs_queue_name - assert logstash_message[2]["aws"]["sqs"]["message_id"] == message_id - assert logstash_message[2]["cloud"]["provider"] == "aws" - assert logstash_message[2]["cloud"]["region"] == "us-east-1" - assert logstash_message[2]["cloud"]["account"]["id"] == "000000000000" - assert logstash_message[2]["tags"] == ["forwarded", "tag1", "tag2", "tag3"] - From 8480bc8ef0f73b3ec3ebfbdf48bc18d5e8dd9ce6 Mon Sep 17 00:00:00 2001 From: constanca Date: Wed, 17 Apr 2024 11:54:25 +0200 Subject: [PATCH 05/26] . --- tests/handlers/aws/test_integrations.py | 500 ------------------------ 1 file changed, 500 deletions(-) diff --git a/tests/handlers/aws/test_integrations.py b/tests/handlers/aws/test_integrations.py index 7efffb58..feda79cb 100644 --- a/tests/handlers/aws/test_integrations.py +++ b/tests/handlers/aws/test_integrations.py @@ -172,503 +172,3 @@ def tearDown(self) -> None: os.environ["S3_CONFIG_FILE"] = "" os.environ["SQS_CONTINUE_URL"] = "" os.environ["SQS_REPLAY_URL"] = "" - - def test_ls_es_output(self) -> None: - assert isinstance(self.elasticsearch, ElasticsearchContainer) - assert isinstance(self.logstash, LogstashContainer) - assert isinstance(self.localstack, LocalStackContainer) - - s3_sqs_queue_name = _time_based_id(suffix="source-s3-sqs") - - s3_sqs_queue = _sqs_create_queue(self.sqs_client, s3_sqs_queue_name, self.localstack.get_url()) - - s3_sqs_queue_arn = s3_sqs_queue["QueueArn"] - s3_sqs_queue_url = s3_sqs_queue["QueueUrl"] - - config_yaml: str = f""" - inputs: - - type: s3-sqs - id: "{s3_sqs_queue_arn}" - tags: {self.default_tags} - outputs: {self.default_outputs} - """ - - config_file_path = "config.yaml" - config_bucket_name = _time_based_id(suffix="config-bucket") - _s3_upload_content_to_bucket( - client=self.s3_client, - content=config_yaml.encode("utf-8"), - content_type="text/plain", - bucket_name=config_bucket_name, - key=config_file_path, - ) - - os.environ["S3_CONFIG_FILE"] = f"s3://{config_bucket_name}/{config_file_path}" - fixtures = [ - _load_file_fixture("cloudwatch-log-1.json"), - _load_file_fixture("cloudwatch-log-2.json"), - ] - - cloudtrail_filename_digest = ( - "AWSLogs/aws-account-id/CloudTrail-Digest/region/yyyy/mm/dd/" - "aws-account-id_CloudTrail-Digest_region_end-time_random-string.log.gz" - ) - cloudtrail_filename_non_digest = ( - "AWSLogs/aws-account-id/CloudTrail/region/yyyy/mm/dd/" - "aws-account-id_CloudTrail_region_end-time_random-string.log.gz" - ) - - s3_bucket_name = _time_based_id(suffix="test-bucket") - - _s3_upload_content_to_bucket( - client=self.s3_client, - content=gzip.compress(fixtures[0].encode("utf-8")), - content_type="application/x-gzip", - bucket_name=s3_bucket_name, - key=cloudtrail_filename_digest, - ) - - _s3_upload_content_to_bucket( - client=self.s3_client, - content=gzip.compress(fixtures[1].encode("utf-8")), - content_type="application/x-gzip", - bucket_name=s3_bucket_name, - key=cloudtrail_filename_non_digest, - ) - - _sqs_send_s3_notifications( - self.sqs_client, - s3_sqs_queue_url, - s3_bucket_name, - [cloudtrail_filename_digest, cloudtrail_filename_non_digest], - ) - - event, _ = _sqs_get_messages(self.sqs_client, s3_sqs_queue_url, s3_sqs_queue_arn) - - ctx = ContextMock(remaining_time_in_millis=_OVER_COMPLETION_GRACE_PERIOD_2m) - first_call = handler(event, ctx) # type:ignore - - assert first_call == "completed" - - self.elasticsearch.refresh(index="logs-aws.cloudtrail-default") - assert self.elasticsearch.count(index="logs-aws.cloudtrail-default")["count"] == 2 - - res = self.elasticsearch.search(index="logs-aws.cloudtrail-default", sort="_seq_no") - assert res["hits"]["total"] == {"value": 2, "relation": "eq"} - - assert res["hits"]["hits"][0]["_source"]["message"] == fixtures[0].rstrip("\n") - assert res["hits"]["hits"][0]["_source"]["log"]["offset"] == 0 - assert ( - res["hits"]["hits"][0]["_source"]["log"]["file"]["path"] - == f"https://{s3_bucket_name}.s3.eu-central-1.amazonaws.com/{cloudtrail_filename_digest}" - ) - assert res["hits"]["hits"][0]["_source"]["aws"]["s3"]["bucket"]["name"] == s3_bucket_name - assert res["hits"]["hits"][0]["_source"]["aws"]["s3"]["bucket"]["arn"] == f"arn:aws:s3:::{s3_bucket_name}" - assert res["hits"]["hits"][0]["_source"]["aws"]["s3"]["object"]["key"] == cloudtrail_filename_digest - assert res["hits"]["hits"][0]["_source"]["cloud"]["provider"] == "aws" - assert res["hits"]["hits"][0]["_source"]["cloud"]["region"] == "eu-central-1" - assert res["hits"]["hits"][0]["_source"]["cloud"]["account"]["id"] == "000000000000" - assert res["hits"]["hits"][0]["_source"]["tags"] == ["forwarded", "aws-cloudtrail", "tag1", "tag2", "tag3"] - - assert res["hits"]["hits"][1]["_source"]["message"] == fixtures[1].rstrip("\n") - assert res["hits"]["hits"][1]["_source"]["log"]["offset"] == 0 - assert ( - res["hits"]["hits"][1]["_source"]["log"]["file"]["path"] - == f"https://{s3_bucket_name}.s3.eu-central-1.amazonaws.com/{cloudtrail_filename_non_digest}" - ) - assert res["hits"]["hits"][1]["_source"]["aws"]["s3"]["bucket"]["name"] == s3_bucket_name - assert res["hits"]["hits"][1]["_source"]["aws"]["s3"]["bucket"]["arn"] == f"arn:aws:s3:::{s3_bucket_name}" - assert res["hits"]["hits"][1]["_source"]["aws"]["s3"]["object"]["key"] == cloudtrail_filename_non_digest - assert res["hits"]["hits"][1]["_source"]["cloud"]["provider"] == "aws" - assert res["hits"]["hits"][1]["_source"]["cloud"]["region"] == "eu-central-1" - assert res["hits"]["hits"][1]["_source"]["cloud"]["account"]["id"] == "000000000000" - assert res["hits"]["hits"][1]["_source"]["tags"] == ["forwarded", "aws-cloudtrail", "tag1", "tag2", "tag3"] - - logstash_message = self.logstash.get_messages(expected=2) - assert len(logstash_message) == 2 - res["hits"]["hits"][0]["_source"]["tags"].remove("aws-cloudtrail") - res["hits"]["hits"][1]["_source"]["tags"].remove("aws-cloudtrail") - - assert res["hits"]["hits"][0]["_source"]["aws"] == logstash_message[0]["aws"] - assert res["hits"]["hits"][0]["_source"]["cloud"] == logstash_message[0]["cloud"] - assert res["hits"]["hits"][0]["_source"]["log"] == logstash_message[0]["log"] - assert res["hits"]["hits"][0]["_source"]["message"] == logstash_message[0]["message"] - assert res["hits"]["hits"][0]["_source"]["tags"] == logstash_message[0]["tags"] - - assert res["hits"]["hits"][1]["_source"]["aws"] == logstash_message[1]["aws"] - assert res["hits"]["hits"][1]["_source"]["cloud"] == logstash_message[1]["cloud"] - assert res["hits"]["hits"][1]["_source"]["log"] == logstash_message[1]["log"] - assert res["hits"]["hits"][1]["_source"]["message"] == logstash_message[1]["message"] - assert res["hits"]["hits"][1]["_source"]["tags"] == logstash_message[1]["tags"] - - self.elasticsearch.refresh(index="logs-stash.elasticsearch-output") - assert self.elasticsearch.count(index="logs-stash.elasticsearch-output")["count"] == 2 - - res = self.elasticsearch.search(index="logs-stash.elasticsearch-output", sort="_seq_no") - assert res["hits"]["total"] == {"value": 2, "relation": "eq"} - - assert res["hits"]["hits"][0]["_source"]["aws"] == logstash_message[0]["aws"] - assert res["hits"]["hits"][0]["_source"]["cloud"] == logstash_message[0]["cloud"] - assert res["hits"]["hits"][0]["_source"]["log"] == logstash_message[0]["log"] - assert res["hits"]["hits"][0]["_source"]["message"] == logstash_message[0]["message"] - assert res["hits"]["hits"][0]["_source"]["tags"] == logstash_message[0]["tags"] - - assert res["hits"]["hits"][1]["_source"]["aws"] == logstash_message[1]["aws"] - assert res["hits"]["hits"][1]["_source"]["cloud"] == logstash_message[1]["cloud"] - assert res["hits"]["hits"][1]["_source"]["log"] == logstash_message[1]["log"] - assert res["hits"]["hits"][1]["_source"]["message"] == logstash_message[1]["message"] - assert res["hits"]["hits"][1]["_source"]["tags"] == logstash_message[1]["tags"] - - def test_continuing(self) -> None: - assert isinstance(self.elasticsearch, ElasticsearchContainer) - assert isinstance(self.logstash, LogstashContainer) - assert isinstance(self.localstack, LocalStackContainer) - - fixtures = [ - _load_file_fixture("cloudwatch-log-1.json"), - _load_file_fixture("cloudwatch-log-2.json"), - ] - - s3_bucket_name = _time_based_id(suffix="test-bucket") - first_filename = "exportedlog/uuid/yyyy-mm-dd-[$LATEST]hash/000000.gz" - _s3_upload_content_to_bucket( - client=self.s3_client, - content=gzip.compress("".join(fixtures).encode("utf-8")), - content_type="application/x-gzip", - bucket_name=s3_bucket_name, - key=first_filename, - ) - - cloudwatch_group_name = _time_based_id(suffix="source-group") - cloudwatch_group = _logs_create_cloudwatch_logs_group(self.logs_client, group_name=cloudwatch_group_name) - - cloudwatch_stream_name = _time_based_id(suffix="source-stream") - _logs_create_cloudwatch_logs_stream( - self.logs_client, group_name=cloudwatch_group_name, stream_name=cloudwatch_stream_name - ) - - _logs_upload_event_to_cloudwatch_logs( - self.logs_client, - group_name=cloudwatch_group_name, - stream_name=cloudwatch_stream_name, - messages_body=["".join(fixtures)], - ) - - cloudwatch_group_arn = cloudwatch_group["arn"] - - cloudwatch_group_name = cloudwatch_group_name - cloudwatch_stream_name = cloudwatch_stream_name - - sqs_queue_name = _time_based_id(suffix="source-sqs") - s3_sqs_queue_name = _time_based_id(suffix="source-s3-sqs") - - sqs_queue = _sqs_create_queue(self.sqs_client, sqs_queue_name, self.localstack.get_url()) - s3_sqs_queue = _sqs_create_queue(self.sqs_client, s3_sqs_queue_name, self.localstack.get_url()) - - sqs_queue_arn = sqs_queue["QueueArn"] - sqs_queue_url = sqs_queue["QueueUrl"] - sqs_queue_url_path = sqs_queue["QueueUrlPath"] - - s3_sqs_queue_arn = s3_sqs_queue["QueueArn"] - s3_sqs_queue_url = s3_sqs_queue["QueueUrl"] - - _sqs_send_messages(self.sqs_client, sqs_queue_url, "".join(fixtures)) - _sqs_send_s3_notifications(self.sqs_client, s3_sqs_queue_url, s3_bucket_name, [first_filename]) - - kinesis_stream_name = _time_based_id(suffix="source-kinesis") - kinesis_stream = _kinesis_create_stream(self.kinesis_client, kinesis_stream_name) - kinesis_stream_arn = kinesis_stream["StreamDescription"]["StreamARN"] - - _kinesis_put_records(self.kinesis_client, kinesis_stream_name, ["".join(fixtures)]) - - config_yaml: str = f""" - inputs: - - type: "kinesis-data-stream" - id: "{kinesis_stream_arn}" - tags: {self.default_tags} - outputs: {self.default_outputs} - - type: "cloudwatch-logs" - id: "{cloudwatch_group_arn}" - tags: {self.default_tags} - outputs: {self.default_outputs} - - type: sqs - id: "{sqs_queue_arn}" - tags: {self.default_tags} - outputs: {self.default_outputs} - - type: s3-sqs - id: "{s3_sqs_queue_arn}" - tags: {self.default_tags} - outputs: {self.default_outputs} - """ - - config_file_path = "config.yaml" - config_bucket_name = _time_based_id(suffix="config-bucket") - _s3_upload_content_to_bucket( - client=self.s3_client, - content=config_yaml.encode("utf-8"), - content_type="text/plain", - bucket_name=config_bucket_name, - key=config_file_path, - ) - - os.environ["S3_CONFIG_FILE"] = f"s3://{config_bucket_name}/{config_file_path}" - - events_s3, _ = _sqs_get_messages(self.sqs_client, s3_sqs_queue_url, s3_sqs_queue_arn) - - events_sqs, _ = _sqs_get_messages(self.sqs_client, sqs_queue_url, sqs_queue_arn) - - message_id = events_sqs["Records"][0]["messageId"] - - events_cloudwatch_logs, event_ids_cloudwatch_logs, _ = _logs_retrieve_event_from_cloudwatch_logs( - self.logs_client, cloudwatch_group_name, cloudwatch_stream_name - ) - - events_kinesis, _ = _kinesis_retrieve_event_from_kinesis_stream( - self.kinesis_client, kinesis_stream_name, kinesis_stream_arn - ) - - ctx = ContextMock() - first_call = handler(events_s3, ctx) # type:ignore - - assert first_call == "continuing" - - self.elasticsearch.refresh(index="logs-generic-default") - assert self.elasticsearch.count(index="logs-generic-default")["count"] == 1 - - res = self.elasticsearch.search(index="logs-generic-default", sort="_seq_no") - assert res["hits"]["total"] == {"value": 1, "relation": "eq"} - - assert res["hits"]["hits"][0]["_source"]["message"] == fixtures[0].rstrip("\n") - assert res["hits"]["hits"][0]["_source"]["log"]["offset"] == 0 - assert ( - res["hits"]["hits"][0]["_source"]["log"]["file"]["path"] - == f"https://{s3_bucket_name}.s3.eu-central-1.amazonaws.com/{first_filename}" - ) - assert res["hits"]["hits"][0]["_source"]["aws"]["s3"]["bucket"]["name"] == s3_bucket_name - assert res["hits"]["hits"][0]["_source"]["aws"]["s3"]["bucket"]["arn"] == f"arn:aws:s3:::{s3_bucket_name}" - assert res["hits"]["hits"][0]["_source"]["aws"]["s3"]["object"]["key"] == first_filename - assert res["hits"]["hits"][0]["_source"]["cloud"]["provider"] == "aws" - assert res["hits"]["hits"][0]["_source"]["cloud"]["region"] == "eu-central-1" - assert res["hits"]["hits"][0]["_source"]["cloud"]["account"]["id"] == "000000000000" - assert res["hits"]["hits"][0]["_source"]["tags"] == ["forwarded", "generic", "tag1", "tag2", "tag3"] - - logstash_message = self.logstash.get_messages(expected=1) - assert len(logstash_message) == 1 - res["hits"]["hits"][0]["_source"]["tags"].remove("generic") - assert res["hits"]["hits"][0]["_source"]["aws"] == logstash_message[0]["aws"] - assert res["hits"]["hits"][0]["_source"]["cloud"] == logstash_message[0]["cloud"] - assert res["hits"]["hits"][0]["_source"]["log"] == logstash_message[0]["log"] - assert res["hits"]["hits"][0]["_source"]["message"] == logstash_message[0]["message"] - assert res["hits"]["hits"][0]["_source"]["tags"] == logstash_message[0]["tags"] - - second_call = handler(events_sqs, ctx) # type:ignore - - assert second_call == "continuing" - - self.elasticsearch.refresh(index="logs-generic-default") - assert self.elasticsearch.count(index="logs-generic-default")["count"] == 2 - - res = self.elasticsearch.search(index="logs-generic-default", sort="_seq_no") - assert res["hits"]["total"] == {"value": 2, "relation": "eq"} - - assert res["hits"]["hits"][1]["_source"]["message"] == fixtures[0].rstrip("\n") - assert res["hits"]["hits"][1]["_source"]["log"]["offset"] == 0 - assert res["hits"]["hits"][1]["_source"]["log"]["file"]["path"] == sqs_queue_url_path - assert res["hits"]["hits"][1]["_source"]["aws"]["sqs"]["name"] == sqs_queue_name - assert res["hits"]["hits"][1]["_source"]["aws"]["sqs"]["message_id"] == message_id - assert res["hits"]["hits"][1]["_source"]["cloud"]["provider"] == "aws" - assert res["hits"]["hits"][1]["_source"]["cloud"]["region"] == "us-east-1" - assert res["hits"]["hits"][1]["_source"]["cloud"]["account"]["id"] == "000000000000" - assert res["hits"]["hits"][1]["_source"]["tags"] == ["forwarded", "generic", "tag1", "tag2", "tag3"] - - logstash_message = self.logstash.get_messages(expected=2) - assert len(logstash_message) == 2 - res["hits"]["hits"][1]["_source"]["tags"].remove("generic") - assert res["hits"]["hits"][1]["_source"]["aws"] == logstash_message[1]["aws"] - assert res["hits"]["hits"][1]["_source"]["cloud"] == logstash_message[1]["cloud"] - assert res["hits"]["hits"][1]["_source"]["log"] == logstash_message[1]["log"] - assert res["hits"]["hits"][1]["_source"]["message"] == logstash_message[1]["message"] - assert res["hits"]["hits"][1]["_source"]["tags"] == logstash_message[1]["tags"] - - third_call = handler(events_cloudwatch_logs, ctx) # type:ignore - - assert third_call == "continuing" - - self.elasticsearch.refresh(index="logs-generic-default") - assert self.elasticsearch.count(index="logs-generic-default")["count"] == 3 - - res = self.elasticsearch.search(index="logs-generic-default", sort="_seq_no") - assert res["hits"]["total"] == {"value": 3, "relation": "eq"} - - assert res["hits"]["hits"][2]["_source"]["message"] == fixtures[0].rstrip("\n") - assert res["hits"]["hits"][2]["_source"]["log"]["offset"] == 0 - assert ( - res["hits"]["hits"][2]["_source"]["log"]["file"]["path"] - == f"{cloudwatch_group_name}/{cloudwatch_stream_name}" - ) - assert res["hits"]["hits"][2]["_source"]["aws"]["cloudwatch"]["log_group"] == cloudwatch_group_name - assert res["hits"]["hits"][2]["_source"]["aws"]["cloudwatch"]["log_stream"] == cloudwatch_stream_name - assert res["hits"]["hits"][2]["_source"]["aws"]["cloudwatch"]["event_id"] == event_ids_cloudwatch_logs[0] - assert res["hits"]["hits"][2]["_source"]["cloud"]["provider"] == "aws" - assert res["hits"]["hits"][2]["_source"]["cloud"]["region"] == "us-east-1" - assert res["hits"]["hits"][2]["_source"]["cloud"]["account"]["id"] == "000000000000" - assert res["hits"]["hits"][2]["_source"]["tags"] == ["forwarded", "generic", "tag1", "tag2", "tag3"] - - logstash_message = self.logstash.get_messages(expected=3) - assert len(logstash_message) == 3 - res["hits"]["hits"][2]["_source"]["tags"].remove("generic") - assert res["hits"]["hits"][2]["_source"]["aws"] == logstash_message[2]["aws"] - assert res["hits"]["hits"][2]["_source"]["cloud"] == logstash_message[2]["cloud"] - assert res["hits"]["hits"][2]["_source"]["log"] == logstash_message[2]["log"] - assert res["hits"]["hits"][2]["_source"]["message"] == logstash_message[2]["message"] - assert res["hits"]["hits"][2]["_source"]["tags"] == logstash_message[2]["tags"] - - fourth_call = handler(events_kinesis, ctx) # type:ignore - - assert fourth_call == "continuing" - - self.elasticsearch.refresh(index="logs-generic-default") - assert self.elasticsearch.count(index="logs-generic-default")["count"] == 4 - - res = self.elasticsearch.search(index="logs-generic-default", sort="_seq_no") - assert res["hits"]["total"] == {"value": 4, "relation": "eq"} - - assert res["hits"]["hits"][3]["_source"]["message"] == fixtures[0].rstrip("\n") - assert res["hits"]["hits"][3]["_source"]["log"]["offset"] == 0 - assert res["hits"]["hits"][3]["_source"]["log"]["file"]["path"] == kinesis_stream_arn - assert res["hits"]["hits"][3]["_source"]["aws"]["kinesis"]["type"] == "stream" - assert res["hits"]["hits"][3]["_source"]["aws"]["kinesis"]["partition_key"] == "PartitionKey" - assert res["hits"]["hits"][3]["_source"]["aws"]["kinesis"]["name"] == kinesis_stream_name - assert ( - res["hits"]["hits"][3]["_source"]["aws"]["kinesis"]["sequence_number"] - == events_kinesis["Records"][0]["kinesis"]["sequenceNumber"] - ) - assert res["hits"]["hits"][3]["_source"]["cloud"]["provider"] == "aws" - assert res["hits"]["hits"][3]["_source"]["cloud"]["region"] == "us-east-1" - assert res["hits"]["hits"][3]["_source"]["cloud"]["account"]["id"] == "000000000000" - assert res["hits"]["hits"][3]["_source"]["tags"] == ["forwarded", "generic", "tag1", "tag2", "tag3"] - - logstash_message = self.logstash.get_messages(expected=4) - assert len(logstash_message) == 4 - res["hits"]["hits"][3]["_source"]["tags"].remove("generic") - assert res["hits"]["hits"][3]["_source"]["aws"] == logstash_message[3]["aws"] - assert res["hits"]["hits"][3]["_source"]["cloud"] == logstash_message[3]["cloud"] - assert res["hits"]["hits"][3]["_source"]["log"] == logstash_message[3]["log"] - assert res["hits"]["hits"][3]["_source"]["message"] == logstash_message[3]["message"] - assert res["hits"]["hits"][3]["_source"]["tags"] == logstash_message[3]["tags"] - - continued_events, _ = _sqs_get_messages( - self.sqs_client, os.environ["SQS_CONTINUE_URL"], self.sqs_continue_queue_arn - ) - - fifth_call = handler(continued_events, ctx) # type:ignore - - assert fifth_call == "continuing" - - self.elasticsearch.refresh(index="logs-generic-default") - assert self.elasticsearch.count(index="logs-generic-default")["count"] == 5 - - res = self.elasticsearch.search(index="logs-generic-default", sort="_seq_no") - assert res["hits"]["total"] == {"value": 5, "relation": "eq"} - - assert res["hits"]["hits"][4]["_source"]["message"] == fixtures[1].rstrip("\n") - assert res["hits"]["hits"][4]["_source"]["log"]["offset"] == 94 - assert ( - res["hits"]["hits"][4]["_source"]["log"]["file"]["path"] - == f"https://{s3_bucket_name}.s3.eu-central-1.amazonaws.com/{first_filename}" - ) - assert res["hits"]["hits"][4]["_source"]["aws"]["s3"]["bucket"]["name"] == s3_bucket_name - assert res["hits"]["hits"][4]["_source"]["aws"]["s3"]["bucket"]["arn"] == f"arn:aws:s3:::{s3_bucket_name}" - assert res["hits"]["hits"][4]["_source"]["aws"]["s3"]["object"]["key"] == first_filename - assert res["hits"]["hits"][4]["_source"]["cloud"]["provider"] == "aws" - assert res["hits"]["hits"][4]["_source"]["cloud"]["region"] == "eu-central-1" - assert res["hits"]["hits"][4]["_source"]["cloud"]["account"]["id"] == "000000000000" - assert res["hits"]["hits"][4]["_source"]["tags"] == ["forwarded", "generic", "tag1", "tag2", "tag3"] - - logstash_message = self.logstash.get_messages(expected=5) - assert len(logstash_message) == 5 - res["hits"]["hits"][4]["_source"]["tags"].remove("generic") - assert res["hits"]["hits"][4]["_source"]["aws"] == logstash_message[4]["aws"] - assert res["hits"]["hits"][4]["_source"]["cloud"] == logstash_message[4]["cloud"] - assert res["hits"]["hits"][4]["_source"]["log"] == logstash_message[4]["log"] - assert res["hits"]["hits"][4]["_source"]["message"] == logstash_message[4]["message"] - assert res["hits"]["hits"][4]["_source"]["tags"] == logstash_message[4]["tags"] - - ctx = ContextMock(remaining_time_in_millis=_OVER_COMPLETION_GRACE_PERIOD_2m) - - continued_events, _ = _sqs_get_messages( - self.sqs_client, os.environ["SQS_CONTINUE_URL"], self.sqs_continue_queue_arn - ) - sixth_call = handler(continued_events, ctx) # type:ignore - - assert sixth_call == "completed" - - self.elasticsearch.refresh(index="logs-generic-default") - assert self.elasticsearch.count(index="logs-generic-default")["count"] == 8 - - res = self.elasticsearch.search(index="logs-generic-default", sort="_seq_no") - assert res["hits"]["total"] == {"value": 8, "relation": "eq"} - - assert res["hits"]["hits"][5]["_source"]["message"] == fixtures[1].rstrip("\n") - assert res["hits"]["hits"][5]["_source"]["log"]["offset"] == 94 - assert res["hits"]["hits"][5]["_source"]["log"]["file"]["path"] == sqs_queue_url_path - assert res["hits"]["hits"][5]["_source"]["aws"]["sqs"]["name"] == sqs_queue_name - assert res["hits"]["hits"][5]["_source"]["aws"]["sqs"]["message_id"] == message_id - assert res["hits"]["hits"][5]["_source"]["cloud"]["provider"] == "aws" - assert res["hits"]["hits"][5]["_source"]["cloud"]["region"] == "us-east-1" - assert res["hits"]["hits"][5]["_source"]["cloud"]["account"]["id"] == "000000000000" - assert res["hits"]["hits"][5]["_source"]["tags"] == ["forwarded", "generic", "tag1", "tag2", "tag3"] - - assert res["hits"]["hits"][6]["_source"]["message"] == fixtures[1].rstrip("\n") - assert res["hits"]["hits"][6]["_source"]["log"]["offset"] == 94 - assert ( - res["hits"]["hits"][6]["_source"]["log"]["file"]["path"] - == f"{cloudwatch_group_name}/{cloudwatch_stream_name}" - ) - assert res["hits"]["hits"][6]["_source"]["aws"]["cloudwatch"]["log_group"] == cloudwatch_group_name - assert res["hits"]["hits"][6]["_source"]["aws"]["cloudwatch"]["log_stream"] == cloudwatch_stream_name - assert res["hits"]["hits"][6]["_source"]["aws"]["cloudwatch"]["event_id"] == event_ids_cloudwatch_logs[0] - assert res["hits"]["hits"][6]["_source"]["cloud"]["provider"] == "aws" - assert res["hits"]["hits"][6]["_source"]["cloud"]["region"] == "us-east-1" - assert res["hits"]["hits"][6]["_source"]["cloud"]["account"]["id"] == "000000000000" - assert res["hits"]["hits"][6]["_source"]["tags"] == ["forwarded", "generic", "tag1", "tag2", "tag3"] - - assert res["hits"]["hits"][7]["_source"]["message"] == fixtures[1].rstrip("\n") - assert res["hits"]["hits"][7]["_source"]["log"]["offset"] == 94 - assert res["hits"]["hits"][7]["_source"]["log"]["file"]["path"] == kinesis_stream_arn - assert res["hits"]["hits"][7]["_source"]["aws"]["kinesis"]["type"] == "stream" - assert res["hits"]["hits"][7]["_source"]["aws"]["kinesis"]["partition_key"] == "PartitionKey" - assert res["hits"]["hits"][7]["_source"]["aws"]["kinesis"]["name"] == kinesis_stream_name - assert ( - res["hits"]["hits"][7]["_source"]["aws"]["kinesis"]["sequence_number"] - == events_kinesis["Records"][0]["kinesis"]["sequenceNumber"] - ) - assert res["hits"]["hits"][7]["_source"]["cloud"]["provider"] == "aws" - assert res["hits"]["hits"][7]["_source"]["cloud"]["region"] == "us-east-1" - assert res["hits"]["hits"][7]["_source"]["cloud"]["account"]["id"] == "000000000000" - assert res["hits"]["hits"][7]["_source"]["tags"] == ["forwarded", "generic", "tag1", "tag2", "tag3"] - - logstash_message = self.logstash.get_messages(expected=8) - assert len(logstash_message) == 8 - res["hits"]["hits"][5]["_source"]["tags"].remove("generic") - res["hits"]["hits"][6]["_source"]["tags"].remove("generic") - res["hits"]["hits"][7]["_source"]["tags"].remove("generic") - - assert res["hits"]["hits"][5]["_source"]["aws"] == logstash_message[5]["aws"] - assert res["hits"]["hits"][5]["_source"]["cloud"] == logstash_message[5]["cloud"] - assert res["hits"]["hits"][5]["_source"]["log"] == logstash_message[5]["log"] - assert res["hits"]["hits"][5]["_source"]["message"] == logstash_message[5]["message"] - assert res["hits"]["hits"][5]["_source"]["tags"] == logstash_message[5]["tags"] - - assert res["hits"]["hits"][6]["_source"]["aws"] == logstash_message[6]["aws"] - assert res["hits"]["hits"][6]["_source"]["cloud"] == logstash_message[6]["cloud"] - assert res["hits"]["hits"][6]["_source"]["log"] == logstash_message[6]["log"] - assert res["hits"]["hits"][6]["_source"]["message"] == logstash_message[6]["message"] - assert res["hits"]["hits"][6]["_source"]["tags"] == logstash_message[6]["tags"] - - assert res["hits"]["hits"][7]["_source"]["aws"] == logstash_message[7]["aws"] - assert res["hits"]["hits"][7]["_source"]["cloud"] == logstash_message[7]["cloud"] - assert res["hits"]["hits"][7]["_source"]["log"] == logstash_message[7]["log"] - assert res["hits"]["hits"][7]["_source"]["message"] == logstash_message[7]["message"] - assert res["hits"]["hits"][7]["_source"]["tags"] == logstash_message[7]["tags"] - From a15a91787d14cdd64cf3c8901186e206d0acb483 Mon Sep 17 00:00:00 2001 From: constanca Date: Wed, 17 Apr 2024 11:56:21 +0200 Subject: [PATCH 06/26] . --- tests/handlers/aws/test_integrations.py | 146 ++++++++++++++++++++++++ 1 file changed, 146 insertions(+) diff --git a/tests/handlers/aws/test_integrations.py b/tests/handlers/aws/test_integrations.py index feda79cb..c4bf6b4f 100644 --- a/tests/handlers/aws/test_integrations.py +++ b/tests/handlers/aws/test_integrations.py @@ -172,3 +172,149 @@ def tearDown(self) -> None: os.environ["S3_CONFIG_FILE"] = "" os.environ["SQS_CONTINUE_URL"] = "" os.environ["SQS_REPLAY_URL"] = "" + + def test_ls_es_output(self) -> None: + assert isinstance(self.elasticsearch, ElasticsearchContainer) + assert isinstance(self.logstash, LogstashContainer) + assert isinstance(self.localstack, LocalStackContainer) + + s3_sqs_queue_name = _time_based_id(suffix="source-s3-sqs") + + s3_sqs_queue = _sqs_create_queue(self.sqs_client, s3_sqs_queue_name, self.localstack.get_url()) + + s3_sqs_queue_arn = s3_sqs_queue["QueueArn"] + s3_sqs_queue_url = s3_sqs_queue["QueueUrl"] + + config_yaml: str = f""" + inputs: + - type: s3-sqs + id: "{s3_sqs_queue_arn}" + tags: {self.default_tags} + outputs: {self.default_outputs} + """ + + config_file_path = "config.yaml" + config_bucket_name = _time_based_id(suffix="config-bucket") + _s3_upload_content_to_bucket( + client=self.s3_client, + content=config_yaml.encode("utf-8"), + content_type="text/plain", + bucket_name=config_bucket_name, + key=config_file_path, + ) + + os.environ["S3_CONFIG_FILE"] = f"s3://{config_bucket_name}/{config_file_path}" + fixtures = [ + _load_file_fixture("cloudwatch-log-1.json"), + _load_file_fixture("cloudwatch-log-2.json"), + ] + + cloudtrail_filename_digest = ( + "AWSLogs/aws-account-id/CloudTrail-Digest/region/yyyy/mm/dd/" + "aws-account-id_CloudTrail-Digest_region_end-time_random-string.log.gz" + ) + cloudtrail_filename_non_digest = ( + "AWSLogs/aws-account-id/CloudTrail/region/yyyy/mm/dd/" + "aws-account-id_CloudTrail_region_end-time_random-string.log.gz" + ) + + s3_bucket_name = _time_based_id(suffix="test-bucket") + + _s3_upload_content_to_bucket( + client=self.s3_client, + content=gzip.compress(fixtures[0].encode("utf-8")), + content_type="application/x-gzip", + bucket_name=s3_bucket_name, + key=cloudtrail_filename_digest, + ) + + _s3_upload_content_to_bucket( + client=self.s3_client, + content=gzip.compress(fixtures[1].encode("utf-8")), + content_type="application/x-gzip", + bucket_name=s3_bucket_name, + key=cloudtrail_filename_non_digest, + ) + + _sqs_send_s3_notifications( + self.sqs_client, + s3_sqs_queue_url, + s3_bucket_name, + [cloudtrail_filename_digest, cloudtrail_filename_non_digest], + ) + + event, _ = _sqs_get_messages(self.sqs_client, s3_sqs_queue_url, s3_sqs_queue_arn) + + ctx = ContextMock(remaining_time_in_millis=_OVER_COMPLETION_GRACE_PERIOD_2m) + first_call = handler(event, ctx) # type:ignore + + assert first_call == "completed" + + self.elasticsearch.refresh(index="logs-aws.cloudtrail-default") + assert self.elasticsearch.count(index="logs-aws.cloudtrail-default")["count"] == 2 + + res = self.elasticsearch.search(index="logs-aws.cloudtrail-default", sort="_seq_no") + assert res["hits"]["total"] == {"value": 2, "relation": "eq"} + + assert res["hits"]["hits"][0]["_source"]["message"] == fixtures[0].rstrip("\n") + assert res["hits"]["hits"][0]["_source"]["log"]["offset"] == 0 + assert ( + res["hits"]["hits"][0]["_source"]["log"]["file"]["path"] + == f"https://{s3_bucket_name}.s3.eu-central-1.amazonaws.com/{cloudtrail_filename_digest}" + ) + assert res["hits"]["hits"][0]["_source"]["aws"]["s3"]["bucket"]["name"] == s3_bucket_name + assert res["hits"]["hits"][0]["_source"]["aws"]["s3"]["bucket"]["arn"] == f"arn:aws:s3:::{s3_bucket_name}" + assert res["hits"]["hits"][0]["_source"]["aws"]["s3"]["object"]["key"] == cloudtrail_filename_digest + assert res["hits"]["hits"][0]["_source"]["cloud"]["provider"] == "aws" + assert res["hits"]["hits"][0]["_source"]["cloud"]["region"] == "eu-central-1" + assert res["hits"]["hits"][0]["_source"]["cloud"]["account"]["id"] == "000000000000" + assert res["hits"]["hits"][0]["_source"]["tags"] == ["forwarded", "aws-cloudtrail", "tag1", "tag2", "tag3"] + + assert res["hits"]["hits"][1]["_source"]["message"] == fixtures[1].rstrip("\n") + assert res["hits"]["hits"][1]["_source"]["log"]["offset"] == 0 + assert ( + res["hits"]["hits"][1]["_source"]["log"]["file"]["path"] + == f"https://{s3_bucket_name}.s3.eu-central-1.amazonaws.com/{cloudtrail_filename_non_digest}" + ) + assert res["hits"]["hits"][1]["_source"]["aws"]["s3"]["bucket"]["name"] == s3_bucket_name + assert res["hits"]["hits"][1]["_source"]["aws"]["s3"]["bucket"]["arn"] == f"arn:aws:s3:::{s3_bucket_name}" + assert res["hits"]["hits"][1]["_source"]["aws"]["s3"]["object"]["key"] == cloudtrail_filename_non_digest + assert res["hits"]["hits"][1]["_source"]["cloud"]["provider"] == "aws" + assert res["hits"]["hits"][1]["_source"]["cloud"]["region"] == "eu-central-1" + assert res["hits"]["hits"][1]["_source"]["cloud"]["account"]["id"] == "000000000000" + assert res["hits"]["hits"][1]["_source"]["tags"] == ["forwarded", "aws-cloudtrail", "tag1", "tag2", "tag3"] + + logstash_message = self.logstash.get_messages(expected=2) + assert len(logstash_message) == 2 + res["hits"]["hits"][0]["_source"]["tags"].remove("aws-cloudtrail") + res["hits"]["hits"][1]["_source"]["tags"].remove("aws-cloudtrail") + + assert res["hits"]["hits"][0]["_source"]["aws"] == logstash_message[0]["aws"] + assert res["hits"]["hits"][0]["_source"]["cloud"] == logstash_message[0]["cloud"] + assert res["hits"]["hits"][0]["_source"]["log"] == logstash_message[0]["log"] + assert res["hits"]["hits"][0]["_source"]["message"] == logstash_message[0]["message"] + assert res["hits"]["hits"][0]["_source"]["tags"] == logstash_message[0]["tags"] + + assert res["hits"]["hits"][1]["_source"]["aws"] == logstash_message[1]["aws"] + assert res["hits"]["hits"][1]["_source"]["cloud"] == logstash_message[1]["cloud"] + assert res["hits"]["hits"][1]["_source"]["log"] == logstash_message[1]["log"] + assert res["hits"]["hits"][1]["_source"]["message"] == logstash_message[1]["message"] + assert res["hits"]["hits"][1]["_source"]["tags"] == logstash_message[1]["tags"] + + self.elasticsearch.refresh(index="logs-stash.elasticsearch-output") + assert self.elasticsearch.count(index="logs-stash.elasticsearch-output")["count"] == 2 + + res = self.elasticsearch.search(index="logs-stash.elasticsearch-output", sort="_seq_no") + assert res["hits"]["total"] == {"value": 2, "relation": "eq"} + + assert res["hits"]["hits"][0]["_source"]["aws"] == logstash_message[0]["aws"] + assert res["hits"]["hits"][0]["_source"]["cloud"] == logstash_message[0]["cloud"] + assert res["hits"]["hits"][0]["_source"]["log"] == logstash_message[0]["log"] + assert res["hits"]["hits"][0]["_source"]["message"] == logstash_message[0]["message"] + assert res["hits"]["hits"][0]["_source"]["tags"] == logstash_message[0]["tags"] + + assert res["hits"]["hits"][1]["_source"]["aws"] == logstash_message[1]["aws"] + assert res["hits"]["hits"][1]["_source"]["cloud"] == logstash_message[1]["cloud"] + assert res["hits"]["hits"][1]["_source"]["log"] == logstash_message[1]["log"] + assert res["hits"]["hits"][1]["_source"]["message"] == logstash_message[1]["message"] + assert res["hits"]["hits"][1]["_source"]["tags"] == logstash_message[1]["tags"] From e9a21f616f9f53c34a3480ec21e4ceb594cd07c0 Mon Sep 17 00:00:00 2001 From: constanca Date: Wed, 17 Apr 2024 12:00:02 +0200 Subject: [PATCH 07/26] . --- tests/handlers/aws/test_integrations.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/handlers/aws/test_integrations.py b/tests/handlers/aws/test_integrations.py index c4bf6b4f..8672e814 100644 --- a/tests/handlers/aws/test_integrations.py +++ b/tests/handlers/aws/test_integrations.py @@ -174,6 +174,7 @@ def tearDown(self) -> None: os.environ["SQS_REPLAY_URL"] = "" def test_ls_es_output(self) -> None: + print("Test ES output") assert isinstance(self.elasticsearch, ElasticsearchContainer) assert isinstance(self.logstash, LogstashContainer) assert isinstance(self.localstack, LocalStackContainer) From b957f37d15fd2d80fff3df903fea044729b1b566 Mon Sep 17 00:00:00 2001 From: constanca Date: Wed, 17 Apr 2024 12:02:15 +0200 Subject: [PATCH 08/26] . --- tests/handlers/aws/test_integrations.py | 10 ++++++++++ tests/scripts/run_tests.sh | 2 +- 2 files changed, 11 insertions(+), 1 deletion(-) diff --git a/tests/handlers/aws/test_integrations.py b/tests/handlers/aws/test_integrations.py index 8672e814..2c61eb8c 100644 --- a/tests/handlers/aws/test_integrations.py +++ b/tests/handlers/aws/test_integrations.py @@ -204,6 +204,8 @@ def test_ls_es_output(self) -> None: key=config_file_path, ) + print("-1") + os.environ["S3_CONFIG_FILE"] = f"s3://{config_bucket_name}/{config_file_path}" fixtures = [ _load_file_fixture("cloudwatch-log-1.json"), @@ -229,6 +231,8 @@ def test_ls_es_output(self) -> None: key=cloudtrail_filename_digest, ) + print("-2") + _s3_upload_content_to_bucket( client=self.s3_client, content=gzip.compress(fixtures[1].encode("utf-8")), @@ -257,6 +261,8 @@ def test_ls_es_output(self) -> None: res = self.elasticsearch.search(index="logs-aws.cloudtrail-default", sort="_seq_no") assert res["hits"]["total"] == {"value": 2, "relation": "eq"} + print("-3") + assert res["hits"]["hits"][0]["_source"]["message"] == fixtures[0].rstrip("\n") assert res["hits"]["hits"][0]["_source"]["log"]["offset"] == 0 assert ( @@ -296,6 +302,8 @@ def test_ls_es_output(self) -> None: assert res["hits"]["hits"][0]["_source"]["message"] == logstash_message[0]["message"] assert res["hits"]["hits"][0]["_source"]["tags"] == logstash_message[0]["tags"] + print("-4") + assert res["hits"]["hits"][1]["_source"]["aws"] == logstash_message[1]["aws"] assert res["hits"]["hits"][1]["_source"]["cloud"] == logstash_message[1]["cloud"] assert res["hits"]["hits"][1]["_source"]["log"] == logstash_message[1]["log"] @@ -308,6 +316,8 @@ def test_ls_es_output(self) -> None: res = self.elasticsearch.search(index="logs-stash.elasticsearch-output", sort="_seq_no") assert res["hits"]["total"] == {"value": 2, "relation": "eq"} + print("-5") + assert res["hits"]["hits"][0]["_source"]["aws"] == logstash_message[0]["aws"] assert res["hits"]["hits"][0]["_source"]["cloud"] == logstash_message[0]["cloud"] assert res["hits"]["hits"][0]["_source"]["log"] == logstash_message[0]["log"] diff --git a/tests/scripts/run_tests.sh b/tests/scripts/run_tests.sh index 6ae8de29..b2ee2f42 100755 --- a/tests/scripts/run_tests.sh +++ b/tests/scripts/run_tests.sh @@ -8,7 +8,7 @@ set -ex # delete any __pycache__ folders to avoid hard-to-debug caching issues find . -name __pycache__ -type d -exec rm -r '{}' + || true PYTEST_ARGS=("${PYTEST_ARGS}") -py.test -vv "${PYTEST_ARGS[*]}" "${PYTEST_JUNIT}" tests +py.test -vv "${PYTEST_ARGS[*]}" "${PYTEST_JUNIT}" tests -s if [[ "${PYTEST_ADDOPTS}" == *"--cov"* ]]; then # Transform coverage to xml so Jenkins can parse and report it From d49b9cf16d0bbf407d8492d0d27b914f6ecbd655 Mon Sep 17 00:00:00 2001 From: constanca Date: Wed, 17 Apr 2024 12:12:18 +0200 Subject: [PATCH 09/26] . --- tests/handlers/aws/test_integrations.py | 240 ++++++++++++------------ 1 file changed, 116 insertions(+), 124 deletions(-) diff --git a/tests/handlers/aws/test_integrations.py b/tests/handlers/aws/test_integrations.py index 2c61eb8c..37844e07 100644 --- a/tests/handlers/aws/test_integrations.py +++ b/tests/handlers/aws/test_integrations.py @@ -174,7 +174,8 @@ def tearDown(self) -> None: os.environ["SQS_REPLAY_URL"] = "" def test_ls_es_output(self) -> None: - print("Test ES output") + print("TEST LS ES OUTPUT.") + assert isinstance(self.elasticsearch, ElasticsearchContainer) assert isinstance(self.logstash, LogstashContainer) assert isinstance(self.localstack, LocalStackContainer) @@ -204,128 +205,119 @@ def test_ls_es_output(self) -> None: key=config_file_path, ) - print("-1") - - os.environ["S3_CONFIG_FILE"] = f"s3://{config_bucket_name}/{config_file_path}" - fixtures = [ - _load_file_fixture("cloudwatch-log-1.json"), - _load_file_fixture("cloudwatch-log-2.json"), - ] - - cloudtrail_filename_digest = ( - "AWSLogs/aws-account-id/CloudTrail-Digest/region/yyyy/mm/dd/" - "aws-account-id_CloudTrail-Digest_region_end-time_random-string.log.gz" - ) - cloudtrail_filename_non_digest = ( - "AWSLogs/aws-account-id/CloudTrail/region/yyyy/mm/dd/" - "aws-account-id_CloudTrail_region_end-time_random-string.log.gz" - ) - - s3_bucket_name = _time_based_id(suffix="test-bucket") - - _s3_upload_content_to_bucket( - client=self.s3_client, - content=gzip.compress(fixtures[0].encode("utf-8")), - content_type="application/x-gzip", - bucket_name=s3_bucket_name, - key=cloudtrail_filename_digest, - ) - - print("-2") - - _s3_upload_content_to_bucket( - client=self.s3_client, - content=gzip.compress(fixtures[1].encode("utf-8")), - content_type="application/x-gzip", - bucket_name=s3_bucket_name, - key=cloudtrail_filename_non_digest, - ) - - _sqs_send_s3_notifications( - self.sqs_client, - s3_sqs_queue_url, - s3_bucket_name, - [cloudtrail_filename_digest, cloudtrail_filename_non_digest], - ) - - event, _ = _sqs_get_messages(self.sqs_client, s3_sqs_queue_url, s3_sqs_queue_arn) - - ctx = ContextMock(remaining_time_in_millis=_OVER_COMPLETION_GRACE_PERIOD_2m) + #os.environ["S3_CONFIG_FILE"] = f"s3://{config_bucket_name}/{config_file_path}" + #fixtures = [ + # _load_file_fixture("cloudwatch-log-1.json"), + # _load_file_fixture("cloudwatch-log-2.json"), + #] +# + #cloudtrail_filename_digest = ( + # "AWSLogs/aws-account-id/CloudTrail-Digest/region/yyyy/mm/dd/" + # "aws-account-id_CloudTrail-Digest_region_end-time_random-string.log.gz" + #) + #cloudtrail_filename_non_digest = ( + # "AWSLogs/aws-account-id/CloudTrail/region/yyyy/mm/dd/" + # "aws-account-id_CloudTrail_region_end-time_random-string.log.gz" + #) +# + #s3_bucket_name = _time_based_id(suffix="test-bucket") +# + #_s3_upload_content_to_bucket( + # client=self.s3_client, + # content=gzip.compress(fixtures[0].encode("utf-8")), + # content_type="application/x-gzip", + # bucket_name=s3_bucket_name, + # key=cloudtrail_filename_digest, + #) +# + #_s3_upload_content_to_bucket( + # client=self.s3_client, + # content=gzip.compress(fixtures[1].encode("utf-8")), + # content_type="application/x-gzip", + # bucket_name=s3_bucket_name, + # key=cloudtrail_filename_non_digest, + #) +# + #_sqs_send_s3_notifications( + # self.sqs_client, + # s3_sqs_queue_url, + # s3_bucket_name, + # [cloudtrail_filename_digest, cloudtrail_filename_non_digest], + #) +# + #event, _ = _sqs_get_messages(self.sqs_client, s3_sqs_queue_url, s3_sqs_queue_arn) +# + #ctx = ContextMock(remaining_time_in_millis=_OVER_COMPLETION_GRACE_PERIOD_2m) first_call = handler(event, ctx) # type:ignore - assert first_call == "completed" - - self.elasticsearch.refresh(index="logs-aws.cloudtrail-default") - assert self.elasticsearch.count(index="logs-aws.cloudtrail-default")["count"] == 2 - - res = self.elasticsearch.search(index="logs-aws.cloudtrail-default", sort="_seq_no") - assert res["hits"]["total"] == {"value": 2, "relation": "eq"} - - print("-3") - - assert res["hits"]["hits"][0]["_source"]["message"] == fixtures[0].rstrip("\n") - assert res["hits"]["hits"][0]["_source"]["log"]["offset"] == 0 - assert ( - res["hits"]["hits"][0]["_source"]["log"]["file"]["path"] - == f"https://{s3_bucket_name}.s3.eu-central-1.amazonaws.com/{cloudtrail_filename_digest}" - ) - assert res["hits"]["hits"][0]["_source"]["aws"]["s3"]["bucket"]["name"] == s3_bucket_name - assert res["hits"]["hits"][0]["_source"]["aws"]["s3"]["bucket"]["arn"] == f"arn:aws:s3:::{s3_bucket_name}" - assert res["hits"]["hits"][0]["_source"]["aws"]["s3"]["object"]["key"] == cloudtrail_filename_digest - assert res["hits"]["hits"][0]["_source"]["cloud"]["provider"] == "aws" - assert res["hits"]["hits"][0]["_source"]["cloud"]["region"] == "eu-central-1" - assert res["hits"]["hits"][0]["_source"]["cloud"]["account"]["id"] == "000000000000" - assert res["hits"]["hits"][0]["_source"]["tags"] == ["forwarded", "aws-cloudtrail", "tag1", "tag2", "tag3"] - - assert res["hits"]["hits"][1]["_source"]["message"] == fixtures[1].rstrip("\n") - assert res["hits"]["hits"][1]["_source"]["log"]["offset"] == 0 - assert ( - res["hits"]["hits"][1]["_source"]["log"]["file"]["path"] - == f"https://{s3_bucket_name}.s3.eu-central-1.amazonaws.com/{cloudtrail_filename_non_digest}" - ) - assert res["hits"]["hits"][1]["_source"]["aws"]["s3"]["bucket"]["name"] == s3_bucket_name - assert res["hits"]["hits"][1]["_source"]["aws"]["s3"]["bucket"]["arn"] == f"arn:aws:s3:::{s3_bucket_name}" - assert res["hits"]["hits"][1]["_source"]["aws"]["s3"]["object"]["key"] == cloudtrail_filename_non_digest - assert res["hits"]["hits"][1]["_source"]["cloud"]["provider"] == "aws" - assert res["hits"]["hits"][1]["_source"]["cloud"]["region"] == "eu-central-1" - assert res["hits"]["hits"][1]["_source"]["cloud"]["account"]["id"] == "000000000000" - assert res["hits"]["hits"][1]["_source"]["tags"] == ["forwarded", "aws-cloudtrail", "tag1", "tag2", "tag3"] - - logstash_message = self.logstash.get_messages(expected=2) - assert len(logstash_message) == 2 - res["hits"]["hits"][0]["_source"]["tags"].remove("aws-cloudtrail") - res["hits"]["hits"][1]["_source"]["tags"].remove("aws-cloudtrail") - - assert res["hits"]["hits"][0]["_source"]["aws"] == logstash_message[0]["aws"] - assert res["hits"]["hits"][0]["_source"]["cloud"] == logstash_message[0]["cloud"] - assert res["hits"]["hits"][0]["_source"]["log"] == logstash_message[0]["log"] - assert res["hits"]["hits"][0]["_source"]["message"] == logstash_message[0]["message"] - assert res["hits"]["hits"][0]["_source"]["tags"] == logstash_message[0]["tags"] - - print("-4") - - assert res["hits"]["hits"][1]["_source"]["aws"] == logstash_message[1]["aws"] - assert res["hits"]["hits"][1]["_source"]["cloud"] == logstash_message[1]["cloud"] - assert res["hits"]["hits"][1]["_source"]["log"] == logstash_message[1]["log"] - assert res["hits"]["hits"][1]["_source"]["message"] == logstash_message[1]["message"] - assert res["hits"]["hits"][1]["_source"]["tags"] == logstash_message[1]["tags"] - - self.elasticsearch.refresh(index="logs-stash.elasticsearch-output") - assert self.elasticsearch.count(index="logs-stash.elasticsearch-output")["count"] == 2 - - res = self.elasticsearch.search(index="logs-stash.elasticsearch-output", sort="_seq_no") - assert res["hits"]["total"] == {"value": 2, "relation": "eq"} - - print("-5") - - assert res["hits"]["hits"][0]["_source"]["aws"] == logstash_message[0]["aws"] - assert res["hits"]["hits"][0]["_source"]["cloud"] == logstash_message[0]["cloud"] - assert res["hits"]["hits"][0]["_source"]["log"] == logstash_message[0]["log"] - assert res["hits"]["hits"][0]["_source"]["message"] == logstash_message[0]["message"] - assert res["hits"]["hits"][0]["_source"]["tags"] == logstash_message[0]["tags"] - - assert res["hits"]["hits"][1]["_source"]["aws"] == logstash_message[1]["aws"] - assert res["hits"]["hits"][1]["_source"]["cloud"] == logstash_message[1]["cloud"] - assert res["hits"]["hits"][1]["_source"]["log"] == logstash_message[1]["log"] - assert res["hits"]["hits"][1]["_source"]["message"] == logstash_message[1]["message"] - assert res["hits"]["hits"][1]["_source"]["tags"] == logstash_message[1]["tags"] + #assert first_call == "completed" +# + #self.elasticsearch.refresh(index="logs-aws.cloudtrail-default") + #assert self.elasticsearch.count(index="logs-aws.cloudtrail-default")["count"] == 2 +# + #res = self.elasticsearch.search(index="logs-aws.cloudtrail-default", sort="_seq_no") + #assert res["hits"]["total"] == {"value": 2, "relation": "eq"} +# + #assert res["hits"]["hits"][0]["_source"]["message"] == fixtures[0].rstrip("\n") + #assert res["hits"]["hits"][0]["_source"]["log"]["offset"] == 0 + #assert ( + # res["hits"]["hits"][0]["_source"]["log"]["file"]["path"] + # == f"https://{s3_bucket_name}.s3.eu-central-1.amazonaws.com/{cloudtrail_filename_digest}" + #) + #assert res["hits"]["hits"][0]["_source"]["aws"]["s3"]["bucket"]["name"] == s3_bucket_name + #assert res["hits"]["hits"][0]["_source"]["aws"]["s3"]["bucket"]["arn"] == f"arn:aws:s3:::{s3_bucket_name}" + #assert res["hits"]["hits"][0]["_source"]["aws"]["s3"]["object"]["key"] == cloudtrail_filename_digest + #assert res["hits"]["hits"][0]["_source"]["cloud"]["provider"] == "aws" + #assert res["hits"]["hits"][0]["_source"]["cloud"]["region"] == "eu-central-1" + #assert res["hits"]["hits"][0]["_source"]["cloud"]["account"]["id"] == "000000000000" + #assert res["hits"]["hits"][0]["_source"]["tags"] == ["forwarded", "aws-cloudtrail", "tag1", "tag2", "tag3"] +# + #assert res["hits"]["hits"][1]["_source"]["message"] == fixtures[1].rstrip("\n") + #assert res["hits"]["hits"][1]["_source"]["log"]["offset"] == 0 + #assert ( + # res["hits"]["hits"][1]["_source"]["log"]["file"]["path"] + # == f"https://{s3_bucket_name}.s3.eu-central-1.amazonaws.com/{cloudtrail_filename_non_digest}" + #) + #assert res["hits"]["hits"][1]["_source"]["aws"]["s3"]["bucket"]["name"] == s3_bucket_name + #assert res["hits"]["hits"][1]["_source"]["aws"]["s3"]["bucket"]["arn"] == f"arn:aws:s3:::{s3_bucket_name}" + #assert res["hits"]["hits"][1]["_source"]["aws"]["s3"]["object"]["key"] == cloudtrail_filename_non_digest + #assert res["hits"]["hits"][1]["_source"]["cloud"]["provider"] == "aws" + #assert res["hits"]["hits"][1]["_source"]["cloud"]["region"] == "eu-central-1" + #assert res["hits"]["hits"][1]["_source"]["cloud"]["account"]["id"] == "000000000000" + #assert res["hits"]["hits"][1]["_source"]["tags"] == ["forwarded", "aws-cloudtrail", "tag1", "tag2", "tag3"] +# + #logstash_message = self.logstash.get_messages(expected=2) + #assert len(logstash_message) == 2 + #res["hits"]["hits"][0]["_source"]["tags"].remove("aws-cloudtrail") + #res["hits"]["hits"][1]["_source"]["tags"].remove("aws-cloudtrail") +# + #assert res["hits"]["hits"][0]["_source"]["aws"] == logstash_message[0]["aws"] + #assert res["hits"]["hits"][0]["_source"]["cloud"] == logstash_message[0]["cloud"] + #assert res["hits"]["hits"][0]["_source"]["log"] == logstash_message[0]["log"] + #assert res["hits"]["hits"][0]["_source"]["message"] == logstash_message[0]["message"] + #assert res["hits"]["hits"][0]["_source"]["tags"] == logstash_message[0]["tags"] +# + #assert res["hits"]["hits"][1]["_source"]["aws"] == logstash_message[1]["aws"] + #assert res["hits"]["hits"][1]["_source"]["cloud"] == logstash_message[1]["cloud"] + #assert res["hits"]["hits"][1]["_source"]["log"] == logstash_message[1]["log"] + #assert res["hits"]["hits"][1]["_source"]["message"] == logstash_message[1]["message"] + #assert res["hits"]["hits"][1]["_source"]["tags"] == logstash_message[1]["tags"] +# + #self.elasticsearch.refresh(index="logs-stash.elasticsearch-output") + #assert self.elasticsearch.count(index="logs-stash.elasticsearch-output")["count"] == 2 +# + #res = self.elasticsearch.search(index="logs-stash.elasticsearch-output", sort="_seq_no") + #assert res["hits"]["total"] == {"value": 2, "relation": "eq"} +# + #assert res["hits"]["hits"][0]["_source"]["aws"] == logstash_message[0]["aws"] + #assert res["hits"]["hits"][0]["_source"]["cloud"] == logstash_message[0]["cloud"] + #assert res["hits"]["hits"][0]["_source"]["log"] == logstash_message[0]["log"] + #assert res["hits"]["hits"][0]["_source"]["message"] == logstash_message[0]["message"] + #assert res["hits"]["hits"][0]["_source"]["tags"] == logstash_message[0]["tags"] +# + #assert res["hits"]["hits"][1]["_source"]["aws"] == logstash_message[1]["aws"] + #assert res["hits"]["hits"][1]["_source"]["cloud"] == logstash_message[1]["cloud"] + #assert res["hits"]["hits"][1]["_source"]["log"] == logstash_message[1]["log"] + #assert res["hits"]["hits"][1]["_source"]["message"] == logstash_message[1]["message"] + #assert res["hits"]["hits"][1]["_source"]["tags"] == logstash_message[1]["tags"] +# From e55eeaec4251af60f95942062f98ae923cc11bfe Mon Sep 17 00:00:00 2001 From: constanca Date: Wed, 17 Apr 2024 12:14:32 +0200 Subject: [PATCH 10/26] . --- tests/handlers/aws/test_integrations.py | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/tests/handlers/aws/test_integrations.py b/tests/handlers/aws/test_integrations.py index 37844e07..ecfa2ca4 100644 --- a/tests/handlers/aws/test_integrations.py +++ b/tests/handlers/aws/test_integrations.py @@ -195,16 +195,16 @@ def test_ls_es_output(self) -> None: outputs: {self.default_outputs} """ - config_file_path = "config.yaml" - config_bucket_name = _time_based_id(suffix="config-bucket") - _s3_upload_content_to_bucket( - client=self.s3_client, - content=config_yaml.encode("utf-8"), - content_type="text/plain", - bucket_name=config_bucket_name, - key=config_file_path, - ) - + #config_file_path = "config.yaml" + #config_bucket_name = _time_based_id(suffix="config-bucket") + #_s3_upload_content_to_bucket( + # client=self.s3_client, + # content=config_yaml.encode("utf-8"), + # content_type="text/plain", + # bucket_name=config_bucket_name, + # key=config_file_path, + #) +# #os.environ["S3_CONFIG_FILE"] = f"s3://{config_bucket_name}/{config_file_path}" #fixtures = [ # _load_file_fixture("cloudwatch-log-1.json"), From 6b04a4479b2046596ba8e281551c230c8250d76f Mon Sep 17 00:00:00 2001 From: constanca Date: Wed, 17 Apr 2024 12:17:33 +0200 Subject: [PATCH 11/26] . --- tests/handlers/aws/test_integrations.py | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/tests/handlers/aws/test_integrations.py b/tests/handlers/aws/test_integrations.py index ecfa2ca4..5f2c9a18 100644 --- a/tests/handlers/aws/test_integrations.py +++ b/tests/handlers/aws/test_integrations.py @@ -182,18 +182,18 @@ def test_ls_es_output(self) -> None: s3_sqs_queue_name = _time_based_id(suffix="source-s3-sqs") - s3_sqs_queue = _sqs_create_queue(self.sqs_client, s3_sqs_queue_name, self.localstack.get_url()) - - s3_sqs_queue_arn = s3_sqs_queue["QueueArn"] - s3_sqs_queue_url = s3_sqs_queue["QueueUrl"] - - config_yaml: str = f""" - inputs: - - type: s3-sqs - id: "{s3_sqs_queue_arn}" - tags: {self.default_tags} - outputs: {self.default_outputs} - """ + #s3_sqs_queue = _sqs_create_queue(self.sqs_client, s3_sqs_queue_name, self.localstack.get_url()) +# + #s3_sqs_queue_arn = s3_sqs_queue["QueueArn"] + #s3_sqs_queue_url = s3_sqs_queue["QueueUrl"] +# + #config_yaml: str = f""" + # inputs: + # - type: s3-sqs + # id: "{s3_sqs_queue_arn}" + # tags: {self.default_tags} + # outputs: {self.default_outputs} + #""" #config_file_path = "config.yaml" #config_bucket_name = _time_based_id(suffix="config-bucket") From bbc17d8aead99942526b08b5f69d13ccc768cedf Mon Sep 17 00:00:00 2001 From: constanca Date: Wed, 17 Apr 2024 12:20:47 +0200 Subject: [PATCH 12/26] . --- tests/handlers/aws/test_integrations.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/tests/handlers/aws/test_integrations.py b/tests/handlers/aws/test_integrations.py index 5f2c9a18..e78a2a6b 100644 --- a/tests/handlers/aws/test_integrations.py +++ b/tests/handlers/aws/test_integrations.py @@ -175,12 +175,12 @@ def tearDown(self) -> None: def test_ls_es_output(self) -> None: print("TEST LS ES OUTPUT.") - - assert isinstance(self.elasticsearch, ElasticsearchContainer) - assert isinstance(self.logstash, LogstashContainer) - assert isinstance(self.localstack, LocalStackContainer) - - s3_sqs_queue_name = _time_based_id(suffix="source-s3-sqs") +# + # assert isinstance(self.elasticsearch, ElasticsearchContainer) + # assert isinstance(self.logstash, LogstashContainer) + # assert isinstance(self.localstack, LocalStackContainer) +# + # s3_sqs_queue_name = _time_based_id(suffix="source-s3-sqs") #s3_sqs_queue = _sqs_create_queue(self.sqs_client, s3_sqs_queue_name, self.localstack.get_url()) # From 24b88f9bf8c1a4d1a52b4d64213ace5de0d1873c Mon Sep 17 00:00:00 2001 From: constanca Date: Wed, 17 Apr 2024 12:23:43 +0200 Subject: [PATCH 13/26] . --- tests/handlers/aws/test_integrations.py | 323 --------------------- tests/handlers/aws/test_replay_trigger.py | 87 ------ tests/handlers/aws/test_utils.py | 336 ---------------------- 3 files changed, 746 deletions(-) delete mode 100644 tests/handlers/aws/test_integrations.py delete mode 100644 tests/handlers/aws/test_replay_trigger.py delete mode 100644 tests/handlers/aws/test_utils.py diff --git a/tests/handlers/aws/test_integrations.py b/tests/handlers/aws/test_integrations.py deleted file mode 100644 index e78a2a6b..00000000 --- a/tests/handlers/aws/test_integrations.py +++ /dev/null @@ -1,323 +0,0 @@ -# Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one -# or more contributor license agreements. Licensed under the Elastic License 2.0; -# you may not use this file except in compliance with the Elastic License 2.0. - -import datetime -import gzip -import os -import time -from typing import Any, Optional -from unittest import TestCase - -import boto3 -import mock -import pytest -from botocore.client import BaseClient as BotoBaseClient -from testcontainers.localstack import LocalStackContainer - -from handlers.aws.exceptions import ReplayHandlerException -from main_aws import handler -from share import get_hex_prefix, json_dumper, json_parser -from tests.testcontainers.es import ElasticsearchContainer -from tests.testcontainers.logstash import LogstashContainer - -from .utils import ( - _AWS_REGION, - _S3_NOTIFICATION_EVENT_TIME, - ContextMock, - _create_secrets, - _kinesis_create_stream, - _kinesis_put_records, - _kinesis_retrieve_event_from_kinesis_stream, - _load_file_fixture, - _logs_create_cloudwatch_logs_group, - _logs_create_cloudwatch_logs_stream, - _logs_retrieve_event_from_cloudwatch_logs, - _logs_upload_event_to_cloudwatch_logs, - _REMAINING_TIME_FORCE_CONTINUE_0ms, - _s3_upload_content_to_bucket, - _sqs_create_queue, - _sqs_get_messages, - _sqs_send_messages, - _sqs_send_s3_notifications, - _time_based_id, -) - -_OVER_COMPLETION_GRACE_PERIOD_2m = 1 + (1000 * 60 * 2) - - -@pytest.mark.integration -class TestLambdaHandlerIntegration(TestCase): - elasticsearch: Optional[ElasticsearchContainer] = None - logstash: Optional[LogstashContainer] = None - localstack: Optional[LocalStackContainer] = None - - aws_session: Optional[boto3.Session] = None - s3_client: Optional[BotoBaseClient] = None - logs_client: Optional[BotoBaseClient] = None - sqs_client: Optional[BotoBaseClient] = None - kinesis_client: Optional[BotoBaseClient] = None - sm_client: Optional[BotoBaseClient] = None - ec2_client: Optional[BotoBaseClient] = None - - secret_arn: Optional[Any] = None - - mocks: dict[str, Any] = {} - - @classmethod - def setUpClass(cls) -> None: - esc = ElasticsearchContainer() - cls.elasticsearch = esc.start() - - lgc = LogstashContainer(es_container=esc) - cls.logstash = lgc.start() - - lsc = LocalStackContainer(image="localstack/localstack:3.0.1") - lsc.with_env("EAGER_SERVICE_LOADING", "1") - lsc.with_env("SQS_DISABLE_CLOUDWATCH_METRICS", "1") - lsc.with_services("ec2", "kinesis", "logs", "s3", "sqs", "secretsmanager") - - cls.localstack = lsc.start() - - session = boto3.Session(region_name=_AWS_REGION) - cls.aws_session = session - cls.s3_client = session.client("s3", endpoint_url=cls.localstack.get_url()) - cls.logs_client = session.client("logs", endpoint_url=cls.localstack.get_url()) - cls.sqs_client = session.client("sqs", endpoint_url=cls.localstack.get_url()) - cls.kinesis_client = session.client("kinesis", endpoint_url=cls.localstack.get_url()) - cls.sm_client = session.client("secretsmanager", endpoint_url=cls.localstack.get_url()) - cls.ec2_client = session.client("ec2", endpoint_url=cls.localstack.get_url()) - - cls.secret_arn = _create_secrets( - cls.sm_client, - "es_secrets", - {"username": cls.elasticsearch.elastic_user, "password": cls.elasticsearch.elastic_password}, - ) - - cls.mocks = { - "storage.S3Storage._s3_client": mock.patch("storage.S3Storage._s3_client", new=cls.s3_client), - "share.secretsmanager._get_aws_sm_client": mock.patch( - "share.secretsmanager._get_aws_sm_client", lambda region_name: cls.sm_client - ), - "handlers.aws.utils.get_sqs_client": mock.patch( - "handlers.aws.utils.get_sqs_client", lambda: cls.sqs_client - ), - "handlers.aws.utils.get_ec2_client": mock.patch( - "handlers.aws.utils.get_ec2_client", lambda: cls.ec2_client - ), - "handlers.aws.handler.get_sqs_client": mock.patch( - "handlers.aws.handler.get_sqs_client", lambda: cls.sqs_client - ), - } - - for k, m in cls.mocks.items(): - m.start() - - @classmethod - def tearDownClass(cls) -> None: - assert cls.elasticsearch is not None - assert cls.logstash is not None - assert cls.localstack is not None - - cls.elasticsearch.stop() - cls.logstash.stop() - cls.localstack.stop() - - for k, m in cls.mocks.items(): - m.stop() - - def setUp(self) -> None: - assert isinstance(self.elasticsearch, ElasticsearchContainer) - assert isinstance(self.logstash, LogstashContainer) - assert isinstance(self.localstack, LocalStackContainer) - - os.environ["S3_CONFIG_FILE"] = "" - - sqs_continue_queue = _sqs_create_queue(self.sqs_client, _time_based_id(suffix="continuing")) - sqs_replay_queue = _sqs_create_queue(self.sqs_client, _time_based_id(suffix="replay")) - os.environ["SQS_CONTINUE_URL"] = sqs_continue_queue["QueueUrl"] - os.environ["SQS_REPLAY_URL"] = sqs_replay_queue["QueueUrl"] - - self.sqs_continue_queue_arn = sqs_continue_queue["QueueArn"] - self.sqs_replay_queue_arn = sqs_replay_queue["QueueArn"] - - self.default_tags: str = """ - - "tag1" - - "tag2" - - "tag3" - """ - - self.default_outputs: str = f""" - - type: "elasticsearch" - args: - elasticsearch_url: "{self.elasticsearch.get_url()}" - ssl_assert_fingerprint: {self.elasticsearch.ssl_assert_fingerprint} - username: "{self.secret_arn}:username" - password: "{self.secret_arn}:password" - - type: "logstash" - args: - logstash_url: "{self.logstash.get_url()}" - ssl_assert_fingerprint: {self.logstash.ssl_assert_fingerprint} - username: "{self.logstash.logstash_user}" - password: "{self.logstash.logstash_password}" - """ - - def tearDown(self) -> None: - assert isinstance(self.elasticsearch, ElasticsearchContainer) - assert isinstance(self.logstash, LogstashContainer) - - self.logstash.reset() - self.elasticsearch.reset() - - os.environ["S3_CONFIG_FILE"] = "" - os.environ["SQS_CONTINUE_URL"] = "" - os.environ["SQS_REPLAY_URL"] = "" - - def test_ls_es_output(self) -> None: - print("TEST LS ES OUTPUT.") -# - # assert isinstance(self.elasticsearch, ElasticsearchContainer) - # assert isinstance(self.logstash, LogstashContainer) - # assert isinstance(self.localstack, LocalStackContainer) -# - # s3_sqs_queue_name = _time_based_id(suffix="source-s3-sqs") - - #s3_sqs_queue = _sqs_create_queue(self.sqs_client, s3_sqs_queue_name, self.localstack.get_url()) -# - #s3_sqs_queue_arn = s3_sqs_queue["QueueArn"] - #s3_sqs_queue_url = s3_sqs_queue["QueueUrl"] -# - #config_yaml: str = f""" - # inputs: - # - type: s3-sqs - # id: "{s3_sqs_queue_arn}" - # tags: {self.default_tags} - # outputs: {self.default_outputs} - #""" - - #config_file_path = "config.yaml" - #config_bucket_name = _time_based_id(suffix="config-bucket") - #_s3_upload_content_to_bucket( - # client=self.s3_client, - # content=config_yaml.encode("utf-8"), - # content_type="text/plain", - # bucket_name=config_bucket_name, - # key=config_file_path, - #) -# - #os.environ["S3_CONFIG_FILE"] = f"s3://{config_bucket_name}/{config_file_path}" - #fixtures = [ - # _load_file_fixture("cloudwatch-log-1.json"), - # _load_file_fixture("cloudwatch-log-2.json"), - #] -# - #cloudtrail_filename_digest = ( - # "AWSLogs/aws-account-id/CloudTrail-Digest/region/yyyy/mm/dd/" - # "aws-account-id_CloudTrail-Digest_region_end-time_random-string.log.gz" - #) - #cloudtrail_filename_non_digest = ( - # "AWSLogs/aws-account-id/CloudTrail/region/yyyy/mm/dd/" - # "aws-account-id_CloudTrail_region_end-time_random-string.log.gz" - #) -# - #s3_bucket_name = _time_based_id(suffix="test-bucket") -# - #_s3_upload_content_to_bucket( - # client=self.s3_client, - # content=gzip.compress(fixtures[0].encode("utf-8")), - # content_type="application/x-gzip", - # bucket_name=s3_bucket_name, - # key=cloudtrail_filename_digest, - #) -# - #_s3_upload_content_to_bucket( - # client=self.s3_client, - # content=gzip.compress(fixtures[1].encode("utf-8")), - # content_type="application/x-gzip", - # bucket_name=s3_bucket_name, - # key=cloudtrail_filename_non_digest, - #) -# - #_sqs_send_s3_notifications( - # self.sqs_client, - # s3_sqs_queue_url, - # s3_bucket_name, - # [cloudtrail_filename_digest, cloudtrail_filename_non_digest], - #) -# - #event, _ = _sqs_get_messages(self.sqs_client, s3_sqs_queue_url, s3_sqs_queue_arn) -# - #ctx = ContextMock(remaining_time_in_millis=_OVER_COMPLETION_GRACE_PERIOD_2m) - first_call = handler(event, ctx) # type:ignore - - #assert first_call == "completed" -# - #self.elasticsearch.refresh(index="logs-aws.cloudtrail-default") - #assert self.elasticsearch.count(index="logs-aws.cloudtrail-default")["count"] == 2 -# - #res = self.elasticsearch.search(index="logs-aws.cloudtrail-default", sort="_seq_no") - #assert res["hits"]["total"] == {"value": 2, "relation": "eq"} -# - #assert res["hits"]["hits"][0]["_source"]["message"] == fixtures[0].rstrip("\n") - #assert res["hits"]["hits"][0]["_source"]["log"]["offset"] == 0 - #assert ( - # res["hits"]["hits"][0]["_source"]["log"]["file"]["path"] - # == f"https://{s3_bucket_name}.s3.eu-central-1.amazonaws.com/{cloudtrail_filename_digest}" - #) - #assert res["hits"]["hits"][0]["_source"]["aws"]["s3"]["bucket"]["name"] == s3_bucket_name - #assert res["hits"]["hits"][0]["_source"]["aws"]["s3"]["bucket"]["arn"] == f"arn:aws:s3:::{s3_bucket_name}" - #assert res["hits"]["hits"][0]["_source"]["aws"]["s3"]["object"]["key"] == cloudtrail_filename_digest - #assert res["hits"]["hits"][0]["_source"]["cloud"]["provider"] == "aws" - #assert res["hits"]["hits"][0]["_source"]["cloud"]["region"] == "eu-central-1" - #assert res["hits"]["hits"][0]["_source"]["cloud"]["account"]["id"] == "000000000000" - #assert res["hits"]["hits"][0]["_source"]["tags"] == ["forwarded", "aws-cloudtrail", "tag1", "tag2", "tag3"] -# - #assert res["hits"]["hits"][1]["_source"]["message"] == fixtures[1].rstrip("\n") - #assert res["hits"]["hits"][1]["_source"]["log"]["offset"] == 0 - #assert ( - # res["hits"]["hits"][1]["_source"]["log"]["file"]["path"] - # == f"https://{s3_bucket_name}.s3.eu-central-1.amazonaws.com/{cloudtrail_filename_non_digest}" - #) - #assert res["hits"]["hits"][1]["_source"]["aws"]["s3"]["bucket"]["name"] == s3_bucket_name - #assert res["hits"]["hits"][1]["_source"]["aws"]["s3"]["bucket"]["arn"] == f"arn:aws:s3:::{s3_bucket_name}" - #assert res["hits"]["hits"][1]["_source"]["aws"]["s3"]["object"]["key"] == cloudtrail_filename_non_digest - #assert res["hits"]["hits"][1]["_source"]["cloud"]["provider"] == "aws" - #assert res["hits"]["hits"][1]["_source"]["cloud"]["region"] == "eu-central-1" - #assert res["hits"]["hits"][1]["_source"]["cloud"]["account"]["id"] == "000000000000" - #assert res["hits"]["hits"][1]["_source"]["tags"] == ["forwarded", "aws-cloudtrail", "tag1", "tag2", "tag3"] -# - #logstash_message = self.logstash.get_messages(expected=2) - #assert len(logstash_message) == 2 - #res["hits"]["hits"][0]["_source"]["tags"].remove("aws-cloudtrail") - #res["hits"]["hits"][1]["_source"]["tags"].remove("aws-cloudtrail") -# - #assert res["hits"]["hits"][0]["_source"]["aws"] == logstash_message[0]["aws"] - #assert res["hits"]["hits"][0]["_source"]["cloud"] == logstash_message[0]["cloud"] - #assert res["hits"]["hits"][0]["_source"]["log"] == logstash_message[0]["log"] - #assert res["hits"]["hits"][0]["_source"]["message"] == logstash_message[0]["message"] - #assert res["hits"]["hits"][0]["_source"]["tags"] == logstash_message[0]["tags"] -# - #assert res["hits"]["hits"][1]["_source"]["aws"] == logstash_message[1]["aws"] - #assert res["hits"]["hits"][1]["_source"]["cloud"] == logstash_message[1]["cloud"] - #assert res["hits"]["hits"][1]["_source"]["log"] == logstash_message[1]["log"] - #assert res["hits"]["hits"][1]["_source"]["message"] == logstash_message[1]["message"] - #assert res["hits"]["hits"][1]["_source"]["tags"] == logstash_message[1]["tags"] -# - #self.elasticsearch.refresh(index="logs-stash.elasticsearch-output") - #assert self.elasticsearch.count(index="logs-stash.elasticsearch-output")["count"] == 2 -# - #res = self.elasticsearch.search(index="logs-stash.elasticsearch-output", sort="_seq_no") - #assert res["hits"]["total"] == {"value": 2, "relation": "eq"} -# - #assert res["hits"]["hits"][0]["_source"]["aws"] == logstash_message[0]["aws"] - #assert res["hits"]["hits"][0]["_source"]["cloud"] == logstash_message[0]["cloud"] - #assert res["hits"]["hits"][0]["_source"]["log"] == logstash_message[0]["log"] - #assert res["hits"]["hits"][0]["_source"]["message"] == logstash_message[0]["message"] - #assert res["hits"]["hits"][0]["_source"]["tags"] == logstash_message[0]["tags"] -# - #assert res["hits"]["hits"][1]["_source"]["aws"] == logstash_message[1]["aws"] - #assert res["hits"]["hits"][1]["_source"]["cloud"] == logstash_message[1]["cloud"] - #assert res["hits"]["hits"][1]["_source"]["log"] == logstash_message[1]["log"] - #assert res["hits"]["hits"][1]["_source"]["message"] == logstash_message[1]["message"] - #assert res["hits"]["hits"][1]["_source"]["tags"] == logstash_message[1]["tags"] -# diff --git a/tests/handlers/aws/test_replay_trigger.py b/tests/handlers/aws/test_replay_trigger.py deleted file mode 100644 index c67efd52..00000000 --- a/tests/handlers/aws/test_replay_trigger.py +++ /dev/null @@ -1,87 +0,0 @@ -# Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one -# or more contributor license agreements. Licensed under the Elastic License 2.0; -# you may not use this file except in compliance with the Elastic License 2.0. - -from typing import Optional -from unittest import TestCase - -import mock -import pytest - -from handlers.aws.replay_trigger import ReplayedEventReplayHandler, get_shipper_for_replay_event -from share import parse_config -from shippers import CompositeShipper, ElasticsearchShipper, LogstashShipper - - -@pytest.mark.unit -class TestReplayTrigger(TestCase): - @mock.patch("share.config._available_output_types", new=["elasticsearch", "logstash", "output_type"]) - def test_get_shipper_for_replay_event(self) -> None: - with self.subTest("Logstash shipper from replay event"): - config_yaml_kinesis = """ - inputs: - - type: kinesis-data-stream - id: arn:aws:kinesis:eu-central-1:123456789:stream/test-esf-kinesis-stream - outputs: - - type: logstash - args: - logstash_url: logstash_url - """ - config = parse_config(config_yaml_kinesis) - replay_handler = ReplayedEventReplayHandler("arn:aws:sqs:eu-central-1:123456789:queue/replayqueue") - logstash_shipper: Optional[CompositeShipper] = get_shipper_for_replay_event( - config, - "logstash", - {}, - "arn:aws:kinesis:eu-central-1:123456789:stream/test-esf-kinesis-stream", - replay_handler, - ) - assert isinstance(logstash_shipper, CompositeShipper) - assert isinstance(logstash_shipper._shippers[0], LogstashShipper) - - with self.subTest("Elasticsearch shipper from replay event"): - config_yaml_kinesis = """ - inputs: - - type: kinesis-data-stream - id: arn:aws:kinesis:eu-central-1:123456789:stream/test-esf-kinesis-stream - outputs: - - type: elasticsearch - args: - elasticsearch_url: "elasticsearch_url" - username: "username" - password: "password" - es_datastream_name: "es_datastream_name" - """ - config = parse_config(config_yaml_kinesis) - replay_handler = ReplayedEventReplayHandler("arn:aws:sqs:eu-central-1:123456789:queue/replayqueue") - elasticsearch_shipper: Optional[CompositeShipper] = get_shipper_for_replay_event( - config, - "elasticsearch", - {"es_datastream_name": "es_datastream_name"}, - "arn:aws:kinesis:eu-central-1:123456789:stream/test-esf-kinesis-stream", - replay_handler, - ) - - assert isinstance(elasticsearch_shipper, CompositeShipper) - assert isinstance(elasticsearch_shipper._shippers[0], ElasticsearchShipper) - - with self.subTest("None shipper from replay event"): - config_yaml_kinesis = """ - inputs: - - type: kinesis-data-stream - id: arn:aws:kinesis:eu-central-1:123456789:stream/test-esf-kinesis-stream - outputs: - - type: output_type - args: - output_arg: output_arg - """ - config = parse_config(config_yaml_kinesis) - replay_handler = ReplayedEventReplayHandler("arn:aws:sqs:eu-central-1:123456789:queue/replayqueue") - none_shipper = get_shipper_for_replay_event( - config, - "output_type", - {}, - "arn:aws:kinesis:eu-central-1:123456789:stream/test-esf-kinesis-stream", - replay_handler, - ) - assert none_shipper is None diff --git a/tests/handlers/aws/test_utils.py b/tests/handlers/aws/test_utils.py deleted file mode 100644 index 16e02ea5..00000000 --- a/tests/handlers/aws/test_utils.py +++ /dev/null @@ -1,336 +0,0 @@ -# Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one -# or more contributor license agreements. Licensed under the Elastic License 2.0; -# you may not use this file except in compliance with the Elastic License 2.0. - - -import random -import string -from datetime import datetime -from typing import Any -from unittest import TestCase - -import pytest - -from handlers.aws.utils import ( - cloudwatch_logs_object_id, - get_shipper_from_input, - kinesis_record_id, - s3_object_id, - sqs_object_id, -) -from share import parse_config -from shippers import LogstashShipper - -# Elasticsearch _id constraints -MAX_ES_ID_SIZ_BYTES = 512 - -# Kinesis Input -# https://docs.aws.amazon.com/kinesis/latest/APIReference/API_CreateStream.html -MAX_STREAM_NAME_CHARS = 128 -# https://docs.aws.amazon.com/kinesis/latest/APIReference/API_PutRecord.html#Streams-PutRecord-request-PartitionKey -MAX_PARTITION_KEY_CHARS = 256 -# https://docs.aws.amazon.com/kinesis/latest/APIReference/API_PutRecord.html#API_PutRecord_ResponseSyntax -MAX_SEQUENCE_NUMBER_DIGITS = 128 - -# S3-SQS Input -# https://docs.aws.amazon.com/AmazonS3/latest/API/API_control_CreateBucket.html -MAX_BUCKET_NAME_CHARS = 255 -# https://docs.aws.amazon.com/AmazonS3/latest/API/API_PutObject.html#API_PutObject_RequestSyntax -# S3 Object key does not seem to have a maximum allowed number of chars, so we set it to our internal maximum -MAX_OBJECT_KEY_CHARS = 512 - -# SQS Input -# https://docs.aws.amazon.com/AWSSimpleQueueService/latest/APIReference/API_CreateQueue.html#API_CreateQueue_RequestParameters -MAX_QUEUE_NAME_CHARS = 80 -# https://docs.aws.amazon.com/AWSSimpleQueueService/latest/SQSDeveloperGuide/sqs-queue-message-identifiers.html -MAX_MESSAGE_ID_CHARS = 100 - -# Cloudwatch logs input -# https://docs.aws.amazon.com/AmazonCloudWatchLogs/latest/APIReference/API_CreateLogGroup.html -MAX_CW_LOG_GROUP_NAME_CHARS = 512 -# https://docs.aws.amazon.com/AmazonCloudWatchLogs/latest/APIReference/API_CreateLogStream.html -MAX_CW_LOG_STREAM_NAME_CHARS = 512 -# No docs available, set it to the max -MAX_CW_EVENT_ID_CHARS = 512 - - -def _utf8len(s: str) -> int: - return len(s.encode("utf-8")) - - -def _get_random_string_of_size(size: int) -> str: - return "".join(random.choices(string.ascii_lowercase + string.digits, k=size)) - - -def _get_random_digit_string_of_size(size: int) -> str: - return "".join(random.choices(string.digits, k=size)) - - -@pytest.mark.unit -class TestGetTriggerTypeAndConfigSource(TestCase): - def test_get_trigger_type_and_config_source(self) -> None: - from handlers.aws.utils import CONFIG_FROM_PAYLOAD, CONFIG_FROM_S3FILE, get_trigger_type_and_config_source - - with self.subTest("cloudwatch-logs and CONFIG_FROM_S3FILE"): - event: dict[str, Any] = {"awslogs": {"data": ""}} - - assert get_trigger_type_and_config_source(event=event) == ("cloudwatch-logs", CONFIG_FROM_S3FILE) - - with self.subTest("no Records"): - with self.assertRaisesRegexp(Exception, "Not supported trigger"): - event = {} - - get_trigger_type_and_config_source(event=event) - - with self.subTest("len(Records) < 1"): - with self.assertRaisesRegexp(Exception, "Not supported trigger"): - event = {"Records": []} - - get_trigger_type_and_config_source(event=event) - - with self.subTest("body in first record: replay-sqs CONFIG_FROM_S3FILE"): - event = { - "Records": [ - { - "body": '{"output_type": "output_type", ' - '"output_args": "output_args", "event_payload": "event_payload"}' - } - ] - } - - assert get_trigger_type_and_config_source(event=event) == ("replay-sqs", CONFIG_FROM_S3FILE) - - with self.subTest("body in first record: eventSource override"): - event = {"Records": [{"body": '{"Records": [{"eventSource":"aws:s3"}]}', "eventSource": "aws:kinesis"}]} - - assert get_trigger_type_and_config_source(event=event) == ("s3-sqs", CONFIG_FROM_S3FILE) - - with self.subTest("body in first record: eventSource not override"): - event = { - "Records": [ - {"body": '{"Records": [{"eventSource":"not-available-trigger"}]}', "eventSource": "aws:kinesis"} - ] - } - - assert get_trigger_type_and_config_source(event=event) == ("kinesis-data-stream", CONFIG_FROM_S3FILE) - - with self.subTest("body not in first record: eventSource not override"): - event = {"Records": [{"eventSource": "aws:kinesis"}]} - - assert get_trigger_type_and_config_source(event=event) == ("kinesis-data-stream", CONFIG_FROM_S3FILE) - - with self.subTest("messageAttributes without originalEventSourceARN in first record, CONFIG_FROM_S3FILE"): - event = {"Records": [{"messageAttributes": {}, "eventSource": "aws:kinesis"}]} - - assert get_trigger_type_and_config_source(event=event) == ("kinesis-data-stream", CONFIG_FROM_S3FILE) - - with self.subTest("messageAttributes with originalEventSourceARN in first record, CONFIG_FROM_PAYLOAD"): - event = {"Records": [{"messageAttributes": {"originalEventSourceARN": ""}, "eventSource": "aws:kinesis"}]} - - assert get_trigger_type_and_config_source(event=event) == ("kinesis-data-stream", CONFIG_FROM_PAYLOAD) - - -@pytest.mark.unit -class TestDiscoverIntegrationScope(TestCase): - def test_discover_integration_scope(self) -> None: - from handlers.aws.utils import discover_integration_scope - - with self.subTest("discover_integration_scope aws.cloudtrail integration scope"): - s3_object_key = ( - "AWSLogs/aws-account-id/CloudTrail/region/" - "yyyy/mm/dd/aws-account-id_CloudTrail_region_end-time_random-string.log.gz" - ) - - assert discover_integration_scope(s3_object_key=s3_object_key) == "aws.cloudtrail" - - with self.subTest("discover_integration_scope aws.cloudtrail digest integration scope"): - s3_object_key = ( - "AWSLogs/aws-account-id/CloudTrail-Digest/region/" - "yyyy/mm/dd/aws-account-id_CloudTrail-Digest_region_end-time_random-string.log.gz" - ) - - assert discover_integration_scope(s3_object_key=s3_object_key) == "aws.cloudtrail-digest" - - with self.subTest("discover_integration_scope aws.cloudtrail insight integration scope"): - s3_object_key = ( - "AWSLogs/aws-account-id/CloudTrail-Insight/region/" - "yyyy/mm/dd/aws-account-id_CloudTrail-Insight_region_end-time_random-string.log.gz" - ) - - assert discover_integration_scope(s3_object_key=s3_object_key) == "aws.cloudtrail" - - with self.subTest("discover_integration_scope aws.cloudwatch_logs integration scope"): - s3_object_key = "exportedlogs/111-222-333/2021-12-28/hash/file.gz" - - assert discover_integration_scope(s3_object_key=s3_object_key) == "aws.cloudwatch_logs" - - with self.subTest("discover_integration_scope aws.elb_logs integration scope"): - s3_object_key = ( - "AWSLogs/aws-account-id/elasticloadbalancing/region/yyyy/mm/dd/" - "aws-account-id_elasticloadbalancing_region_load-balancer-id_end-time_ip-address_random-string.log.gz" - ) - - assert discover_integration_scope(s3_object_key=s3_object_key) == "aws.elb_logs" - - with self.subTest("discover_integration_scope aws.firewall_logs integration scope"): - s3_object_key = "AWSLogs/aws-account-id/network-firewall/log-type/Region/firewall-name/timestamp/" - - assert discover_integration_scope(s3_object_key=s3_object_key) == "aws.firewall_logs" - - with self.subTest("discover_integration_scope aws.waf integration scope"): - s3_object_key = "AWSLogs/account-id/WAFLogs/Region/web-acl-name/YYYY/MM/dd/HH/mm" - - assert discover_integration_scope(s3_object_key=s3_object_key) == "aws.waf" - - with self.subTest("discover_integration_scope aws.vpcflow integration scope"): - s3_object_key = "AWSLogs/id/vpcflowlogs/region/date_vpcflowlogs_region_file.log.gz" - - assert discover_integration_scope(s3_object_key=s3_object_key) == "aws.vpcflow" - - with self.subTest("discover_integration_scope unknown integration scope"): - s3_object_key = "random_hash" - - assert discover_integration_scope(s3_object_key=s3_object_key) == "generic" - - with self.subTest("discover_integration_scope empty s3"): - s3_object_key = "" - - assert discover_integration_scope(s3_object_key=s3_object_key) == "generic" - - -@pytest.mark.unit -class TestGetShipperFromInput(TestCase): - def test_get_shipper_from_input(self) -> None: - with self.subTest("Logstash shipper from Kinesis input"): - config_yaml_kinesis: str = """ - inputs: - - type: kinesis-data-stream - id: arn:aws:kinesis:eu-central-1:123456789:stream/test-esf-kinesis-stream - outputs: - - type: logstash - args: - logstash_url: logstash_url - """ - config = parse_config(config_yaml_kinesis) - event_input = config.get_input_by_id( - "arn:aws:kinesis:eu-central-1:123456789:stream/test-esf-kinesis-stream" - ) - assert event_input is not None - shipper = get_shipper_from_input(event_input=event_input, config_yaml=config_yaml_kinesis) - assert len(shipper._shippers) == 1 - assert isinstance(shipper._shippers[0], LogstashShipper) - - with self.subTest("Logstash shipper from Cloudwatch logs input"): - config_yaml_cw: str = """ - inputs: - - type: cloudwatch-logs - id: arn:aws:logs:eu-central-1:123456789:stream/test-cw-logs - outputs: - - type: logstash - args: - logstash_url: logstash_url - """ - config = parse_config(config_yaml_cw) - event_input = config.get_input_by_id("arn:aws:logs:eu-central-1:123456789:stream/test-cw-logs") - assert event_input is not None - shipper = get_shipper_from_input(event_input=event_input, config_yaml=config_yaml_cw) - assert len(shipper._shippers) == 1 - assert isinstance(shipper._shippers[0], LogstashShipper) - - -@pytest.mark.unit -class TestRecordId(TestCase): - def test_kinesis_id_less_than_512bytes(self) -> None: - stream_name: str = _get_random_string_of_size(MAX_STREAM_NAME_CHARS) - partition_key: str = _get_random_string_of_size(MAX_PARTITION_KEY_CHARS) - sequence_number: str = _get_random_digit_string_of_size(MAX_SEQUENCE_NUMBER_DIGITS) - approximate_arrival_timestamp: int = int(datetime.utcnow().timestamp() * 1000) - relevant_fields_for_id: dict[str, Any] = { - "fields": { - "log": {"offset": 1}, - "aws": { - "kinesis": { - "type": "stream", - "name": stream_name, - "partition_key": partition_key, - "sequence_number": sequence_number, - } - }, - }, - "meta": { - "approximate_arrival_timestamp": approximate_arrival_timestamp, - }, - } - - generated_id = kinesis_record_id(relevant_fields_for_id) - assert _utf8len(generated_id) <= MAX_ES_ID_SIZ_BYTES - - def test_s3_id_less_than_512bytes(self) -> None: - event_time: int = int(datetime.utcnow().timestamp() * 1000) - bucket_name: str = _get_random_string_of_size(MAX_BUCKET_NAME_CHARS) - bucket_arn: str = f"arn:aws:s3:::{bucket_name}" - object_key: str = _get_random_string_of_size(MAX_OBJECT_KEY_CHARS) - relevant_fields_for_id: dict[str, Any] = { - "fields": { - "log": { - "offset": 1, - }, - "aws": { - "s3": { - "bucket": {"arn": bucket_arn}, - "object": {"key": object_key}, - } - }, - }, - "meta": {"event_time": event_time}, - } - generated_id = s3_object_id(relevant_fields_for_id) - assert _utf8len(generated_id) <= MAX_ES_ID_SIZ_BYTES - - def test_sqs_id_less_than_512bytes(self) -> None: - sent_timestamp: int = int(datetime.utcnow().timestamp() * 1000) - queue_name: str = _get_random_string_of_size(MAX_QUEUE_NAME_CHARS) - message_id: str = _get_random_string_of_size(MAX_MESSAGE_ID_CHARS) - - relevant_fields_for_id: dict[str, Any] = { - "fields": { - "log": { - "offset": 1, - }, - "aws": { - "sqs": { - "name": queue_name, - "message_id": message_id, - }, - }, - }, - "meta": {"sent_timestamp": sent_timestamp}, - } - - generated_id = sqs_object_id(relevant_fields_for_id) - assert _utf8len(generated_id) <= MAX_ES_ID_SIZ_BYTES - - def test_cloudwatch_id_less_than_512bytes(self) -> None: - event_timestamp: int = int(datetime.utcnow().timestamp() * 1000) - log_group_name: str = _get_random_string_of_size(MAX_CW_LOG_GROUP_NAME_CHARS) - log_stream_name: str = _get_random_string_of_size(MAX_CW_LOG_STREAM_NAME_CHARS) - event_id: str = _get_random_string_of_size(MAX_CW_EVENT_ID_CHARS) - - relevant_fields_for_id: dict[str, Any] = { - "fields": { - "log": { - "offset": 1, - }, - "aws": { - "cloudwatch": { - "log_group": log_group_name, - "log_stream": log_stream_name, - "event_id": event_id, - } - }, - }, - "meta": {"event_timestamp": event_timestamp}, - } - - generated_id = cloudwatch_logs_object_id(relevant_fields_for_id) - assert _utf8len(generated_id) <= MAX_ES_ID_SIZ_BYTES From d12f13a5aef13303229cc860125368f62a942174 Mon Sep 17 00:00:00 2001 From: constanca Date: Wed, 17 Apr 2024 12:28:26 +0200 Subject: [PATCH 14/26] . --- tests/handlers/aws/test_handler.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/tests/handlers/aws/test_handler.py b/tests/handlers/aws/test_handler.py index 6858d0bd..38458311 100644 --- a/tests/handlers/aws/test_handler.py +++ b/tests/handlers/aws/test_handler.py @@ -5,6 +5,7 @@ import base64 import datetime import importlib +import logging import os import sys from copy import deepcopy @@ -394,6 +395,13 @@ class TestLambdaHandlerNoop(TestCase): new={"aws:s3": "s3-sqs", "aws:sqs": "sqs", "aws:kinesis": "kinesis-data-stream", "dummy": "s3-sqs"}, ) def test_lambda_handler_noop(self) -> None: + print("Test lambda handler noop") + + logger = logging.getLogger() + logger.info( + "Test lambda handler noop" + ) + reload_handlers_aws_handler() with self.subTest("no originalEventSourceARN in messageAttributes"): From 2c674b405bb3e6e03c8336b8fbd933adea3ca5cc Mon Sep 17 00:00:00 2001 From: constanca Date: Wed, 17 Apr 2024 12:30:48 +0200 Subject: [PATCH 15/26] . --- tests/handlers/aws/test_handler.py | 1233 ++++++++++++++-------------- 1 file changed, 617 insertions(+), 616 deletions(-) diff --git a/tests/handlers/aws/test_handler.py b/tests/handlers/aws/test_handler.py index 38458311..218ca806 100644 --- a/tests/handlers/aws/test_handler.py +++ b/tests/handlers/aws/test_handler.py @@ -530,619 +530,620 @@ def test_lambda_handler_noop(self) -> None: assert handler(lambda_event, ctx) == "exception raised: Exception('raised')" # type:ignore -@pytest.mark.unit -class TestLambdaHandlerFailure(TestCase): - def setUp(self) -> None: - revert_handlers_aws_handler() - - @mock.patch("share.config._available_output_types", new=["elasticsearch", "logstash", "output_type"]) - @mock.patch( - "share.config._available_input_types", new=["cloudwatch-logs", "s3-sqs", "sqs", "kinesis-data-stream", "dummy"] - ) - @mock.patch("share.secretsmanager._get_aws_sm_client", new=MockContent._get_aws_sm_client) - @mock.patch("handlers.aws.utils.get_ec2_client", lambda: _ec2_client_mock) - @mock.patch("handlers.aws.handler.get_sqs_client", lambda: _sqs_client_mock) - @mock.patch("storage.S3Storage._s3_client", _s3_client_mock) - def test_lambda_handler_failure(self) -> None: - dummy_event: dict[str, Any] = { - "Records": [ - { - "eventSource": "aws:sqs", - "eventSourceARN": "arn:aws:sqs", - }, - ] - } - - with self.subTest("output not in config from replay payload body"): - os.environ["S3_CONFIG_FILE"] = "s3://s3_config_file_bucket/s3_config_file_object_key" - event = { - "Records": [ - { - "eventSourceARN": "arn:aws:sqs:eu-central-1:123456789:replay-queue", - "receiptHandle": "receiptHandle", - "body": '{"output_type": "output_type", "output_args": {},' - '"event_input_id": "arn:aws:dummy:eu-central-1:123456789:input", ' - '"event_payload": {"_id": "_id"}}', - } - ] - } - with self.assertRaisesRegex(OutputConfigException, "Cannot load output of type output_type"): - ctx = ContextMock() - - handler(event, ctx) # type:ignore - - with self.subTest("input not in config from replay payload body"): - os.environ["S3_CONFIG_FILE"] = "s3://s3_config_file_bucket/s3_config_file_object_key" - event = { - "Records": [ - { - "eventSourceARN": "arn:aws:sqs:eu-central-1:123456789:replay-queue", - "receiptHandle": "receiptHandle", - "body": '{"output_type": "output_type", "output_args": {},' - '"event_input_id": "arn:aws:dummy:eu-central-1:123456789:not-existing-input", ' - '"event_payload": {"_id": "_id"}}', - } - ] - } - with self.assertRaisesRegex( - InputConfigException, - "Cannot load input for input id arn:aws:dummy:eu-central-1:123456789:not-existing-input", - ): - ctx = ContextMock() - - handler(event, ctx) # type:ignore - - with self.subTest("empty config"): - os.environ["S3_CONFIG_FILE"] = "s3://s3_config_file_bucket/s3_config_file_object_key" - with self.assertRaisesRegex(ConfigFileException, "Empty config"): - ctx = ContextMock() - _s3_client_mock.config_content = b"" - - handler(dummy_event, ctx) # type:ignore - - with self.subTest("Invalid s3 uri apm client not None"): - with mock.patch("handlers.aws.utils.get_apm_client", lambda: mock.MagicMock()): - with self.assertRaisesRegex(ConfigFileException, "Invalid s3 uri provided: ``"): - os.environ["S3_CONFIG_FILE"] = "" - ctx = ContextMock() - - handler(dummy_event, ctx) # type:ignore - - with self.subTest("Invalid s3 uri"): - with self.assertRaisesRegex(ConfigFileException, "Invalid s3 uri provided: ``"): - os.environ["S3_CONFIG_FILE"] = "" - ctx = ContextMock() - - handler(dummy_event, ctx) # type:ignore - - with self.subTest("Invalid s3 uri no bucket and key"): - with self.assertRaisesRegex(ConfigFileException, "Invalid s3 uri provided: `s3://`"): - os.environ["S3_CONFIG_FILE"] = "s3://" - ctx = ContextMock() - - handler(dummy_event, ctx) # type:ignore - - with self.subTest("Invalid s3 uri no key"): - with self.assertRaisesRegex(ConfigFileException, "Invalid s3 uri provided: `s3://bucket`"): - os.environ["S3_CONFIG_FILE"] = "s3://bucket" - ctx = ContextMock() - - handler(dummy_event, ctx) # type:ignore - - with self.subTest("no Records in event"): - with self.assertRaisesRegex(TriggerTypeException, "Not supported trigger"): - ctx = ContextMock() - event = {} - - handler(event, ctx) # type:ignore - - with self.subTest("empty Records in event"): - with self.assertRaisesRegex(TriggerTypeException, "Not supported trigger"): - ctx = ContextMock() - event = {"Records": []} - - handler(event, ctx) # type:ignore - - with self.subTest("no eventSource in Records in event"): - with self.assertRaisesRegex(TriggerTypeException, "Not supported trigger"): - ctx = ContextMock() - event = {"Records": [{}]} - - handler(event, ctx) # type:ignore - - with self.subTest("no valid eventSource in Records in event"): - with self.assertRaisesRegex(TriggerTypeException, "Not supported trigger"): - ctx = ContextMock() - event = {"Records": [{"eventSource": "invalid"}]} - - handler(event, ctx) # type:ignore - - with self.subTest("no eventSource in body Records in event"): - with self.assertRaisesRegex(TriggerTypeException, "Not supported trigger"): - ctx = ContextMock() - event = {"Records": [{"body": ""}]} - - handler(event, ctx) # type:ignore - - with self.subTest("no valid eventSource in body Records in event"): - with self.assertRaisesRegex(TriggerTypeException, "Not supported trigger"): - ctx = ContextMock() - event = {"Records": [{"body": "", "eventSource": "invalid"}]} - - handler(event, ctx) # type:ignore - - with self.subTest("replay event loads config from s3"): - with self.assertRaisesRegex(ConfigFileException, "Invalid s3 uri provided: `s3://bucket`"): - ctx = ContextMock() - event = { - "Records": [ - { - "body": '{"output_type": "", "output_args": "", "event_payload": ""}', - } - ] - } - handler(event, ctx) # type:ignore - - with self.subTest("invalid secretsmanager: arn format too long"): - os.environ["S3_CONFIG_FILE"] = "s3://s3_config_file_bucket/s3_config_file_object_key" - with self.assertRaisesRegex( - ConfigFileException, - "Invalid arn format: " - "arn:aws:secretsmanager:eu-central-1:123456789:secret:plain_secret:THIS:IS:INVALID", - ): - ctx = ContextMock() - _s3_client_mock.config_content = b""" - inputs: - - type: "s3-sqs" - id: "arn:aws:secretsmanager:eu-central-1:123456789:secret:plain_secret:THIS:IS:INVALID" - outputs: - - type: "elasticsearch" - args: - elasticsearch_url: "arn:aws:secretsmanager:eu-central-1:123456789:secret:es_secrets:url" - username: "arn:aws:secretsmanager:eu-central-1:123456789:secret:es_secrets:username" - password: "arn:aws:secretsmanager:eu-central-1:123456789:secret:es_secrets:password" - es_datastream_name: "logs-redis.log-default" - """ - - event = deepcopy(dummy_event) - - handler(event, ctx) # type:ignore - - with self.subTest("invalid secretsmanager: empty region"): - os.environ["S3_CONFIG_FILE"] = "s3://s3_config_file_bucket/s3_config_file_object_key" - with self.assertRaisesRegex( - ConfigFileException, - "Must be provided region in arn: " "arn:aws:secretsmanager::123456789:secret:plain_secret", - ): - ctx = ContextMock() - # BEWARE region is empty at id - _s3_client_mock.config_content = b""" - inputs: - - type: "s3-sqs" - id: "arn:aws:secretsmanager::123456789:secret:plain_secret" - outputs: - - type: "elasticsearch" - args: - elasticsearch_url: "arn:aws:secretsmanager:eu-central-1:123456789:secret:es_secrets" - username: "arn:aws:secretsmanager:eu-central-1:123456789:secret:es_secrets:username" - password: "arn:aws:secretsmanager:eu-central-1:123456789:secret:es_secrets:password" - es_datastream_name: "logs-redis.log-default" - """ - - event = deepcopy(dummy_event) - - handler(event, ctx) # type:ignore - - with self.subTest("invalid secretsmanager: empty secrets manager name"): - os.environ["S3_CONFIG_FILE"] = "s3://s3_config_file_bucket/s3_config_file_object_key" - with self.assertRaisesRegex( - ConfigFileException, - "Must be provided secrets manager name in arn: " - "arn:aws:secretsmanager:eu-central-1:123456789:secret:", - ): - ctx = ContextMock() - # BEWARE empty secrets manager name at id - _s3_client_mock.config_content = b""" - inputs: - - type: "s3-sqs" - id: "arn:aws:secretsmanager:eu-central-1:123456789:secret:" - outputs: - - type: "elasticsearch" - args: - elasticsearch_url: "arn:aws:secretsmanager:eu-central-1:123456789:secret:es_secrets" - username: "arn:aws:secretsmanager:eu-central-1:123456789:secret:es_secrets:username" - password: "arn:aws:secretsmanager:eu-central-1:123456789:secret:es_secrets:password" - es_datastream_name: "logs-redis.log-default" - """ - - event = deepcopy(dummy_event) - - handler(event, ctx) # type:ignore - - with self.subTest("invalid secretsmanager: cannot use both plain text and key/value pairs"): - os.environ["S3_CONFIG_FILE"] = "s3://s3_config_file_bucket/s3_config_file_object_key" - with self.assertRaisesRegex( - ConfigFileException, - "You cannot have both plain text and json key for the same " - "secret: arn:aws:secretsmanager:eu-central-1:123456789:secret:es_secrets:username", - ): - ctx = ContextMock() - # BEWARE using es_secrets plain text for elasticsearch_url and es_secrets:username for username - _s3_client_mock.config_content = b""" - inputs: - - type: "s3-sqs" - id: "arn:aws:secretsmanager:eu-central-1:123456789:secret:plain_secrets" - outputs: - - type: "elasticsearch" - args: - elasticsearch_url: "arn:aws:secretsmanager:eu-central-1:123456789:secret:es_secrets" - username: "arn:aws:secretsmanager:eu-central-1:123456789:secret:es_secrets:username" - password: "arn:aws:secretsmanager:eu-central-1:123456789:secret:es_secrets:password" - es_datastream_name: "logs-redis.log-default" - """ - - event = deepcopy(dummy_event) - - handler(event, ctx) # type:ignore - - with self.subTest("invalid secretsmanager: empty secret key"): - os.environ["S3_CONFIG_FILE"] = "s3://s3_config_file_bucket/s3_config_file_object_key" - with self.assertRaisesRegex( - ConfigFileException, - "Error for secret " - "arn:aws:secretsmanager:eu-central-1:123456789:secret:es_secrets:: key must " - "not be empty", - ): - ctx = ContextMock() - # BEWARE empty key at elasticsearch_url - _s3_client_mock.config_content = b""" - inputs: - - type: "s3-sqs" - id: "arn:aws:secretsmanager:eu-central-1:123456789:secret:plain_secret" - outputs: - - type: "elasticsearch" - args: - elasticsearch_url: "arn:aws:secretsmanager:eu-central-1:123456789:secret:es_secrets:" - username: "arn:aws:secretsmanager:eu-central-1:123456789:secret:es_secrets:username" - password: "arn:aws:secretsmanager:eu-central-1:123456789:secret:es_secrets:password" - es_datastream_name: "logs-redis.log-default" - """ - - event = deepcopy(dummy_event) - - handler(event, ctx) # type:ignore - - with self.subTest("invalid secretsmanager: secret does not exist"): - os.environ["S3_CONFIG_FILE"] = "s3://s3_config_file_bucket/s3_config_file_object_key" - with self.assertRaisesRegex( - ConfigFileException, - r"An error occurred \(ResourceNotFoundException\) when calling " - "the GetSecretValue operation: Secrets Manager can't find the specified secret.", - ): - ctx = ContextMock() - _s3_client_mock.config_content = b""" - inputs: - - type: "s3-sqs" - id: "arn:aws:secretsmanager:eu-central-1:123456789:secret:DOES_NOT_EXIST" - outputs: - - type: "elasticsearch" - args: - elasticsearch_url: "arn:aws:secretsmanager:eu-central-1:123456789:secret:es_secrets:url" - username: "arn:aws:secretsmanager:eu-central-1:123456789:secret:es_secrets:username" - password: "arn:aws:secretsmanager:eu-central-1:123456789:secret:es_secrets:password" - es_datastream_name: "logs-redis.log-default" - """ - - event = deepcopy(dummy_event) - - handler(event, ctx) # type:ignore - - with self.subTest("invalid secretsmanager: empty plain secret value"): - os.environ["S3_CONFIG_FILE"] = "s3://s3_config_file_bucket/s3_config_file_object_key" - with self.assertRaisesRegex( - ConfigFileException, - "Error for secret " - "arn:aws:secretsmanager:eu-central-1:123456789:secret:empty_secret: must " - "not be empty", - ): - ctx = ContextMock() - _s3_client_mock.config_content = b""" - inputs: - - type: "s3-sqs" - id: "arn:aws:secretsmanager:eu-central-1:123456789:secret:empty_secret" - outputs: - - type: "elasticsearch" - args: - elasticsearch_url: "arn:aws:secretsmanager:eu-central-1:123456789:secret:es_secrets:url" - username: "arn:aws:secretsmanager:eu-central-1:123456789:secret:es_secrets:username" - password: "arn:aws:secretsmanager:eu-central-1:123456789:secret:es_secrets:password" - es_datastream_name: "logs-redis.log-default" - """ - - event = deepcopy(dummy_event) - - handler(event, ctx) # type:ignore - - with self.subTest("invalid secretsmanager: empty key/value secret value"): - os.environ["S3_CONFIG_FILE"] = "s3://s3_config_file_bucket/s3_config_file_object_key" - with self.assertRaisesRegex( - ConfigFileException, - "Error for secret " - "arn:aws:secretsmanager:eu-central-1:123456789:secret:es_secrets:empty: must " - "not be empty", - ): - ctx = ContextMock() - _s3_client_mock.config_content = b""" - inputs: - - type: "s3-sqs" - id: "arn:aws:secretsmanager:eu-central-1:123456789:secret:es_secrets:empty" - outputs: - - type: "elasticsearch" - args: - elasticsearch_url: "arn:aws:secretsmanager:eu-central-1:123456789:secret:es_secrets:url" - username: "arn:aws:secretsmanager:eu-central-1:123456789:secret:es_secrets:username" - password: "arn:aws:secretsmanager:eu-central-1:123456789:secret:es_secrets:password" - es_datastream_name: "logs-redis.log-default" - """ - - event = deepcopy(dummy_event) - - handler(event, ctx) # type:ignore - - with self.subTest("invalid secretsmanager: plain text used as key/value"): - os.environ["S3_CONFIG_FILE"] = "s3://s3_config_file_bucket/s3_config_file_object_key" - with self.assertRaisesRegex( - ConfigFileException, - "Error for secret " - "arn:aws:secretsmanager:eu-central-1:123456789:secret:plain_secret:SHOULD_NOT_HAVE_A_KEY: " - "expected to be keys/values pair", - ): - ctx = ContextMock() - _s3_client_mock.config_content = b""" - inputs: - - type: "s3-sqs" - id: "arn:aws:secretsmanager:eu-central-1:123456789:secret:plain_secret:SHOULD_NOT_HAVE_A_KEY" - outputs: - - type: "elasticsearch" - args: - elasticsearch_url: "arn:aws:secretsmanager:eu-central-1:123456789:secret:es_secrets:url" - username: "arn:aws:secretsmanager:eu-central-1:123456789:secret:es_secrets:username" - password: "arn:aws:secretsmanager:eu-central-1:123456789:secret:es_secrets:password" - es_datastream_name: "logs-redis.log-default" - """ - - event = deepcopy(dummy_event) - - handler(event, ctx) # type:ignore - - with self.subTest("invalid secretsmanager: key does not exist in secret manager"): - os.environ["S3_CONFIG_FILE"] = "s3://s3_config_file_bucket/s3_config_file_object_key" - with self.assertRaisesRegex( - ConfigFileException, - "Error for secret " - "arn:aws:secretsmanager:eu-central-1:123456789:secret:es_secrets:I_DO_NOT_EXIST: " - "key not found", - ): - ctx = ContextMock() - _s3_client_mock.config_content = b""" - inputs: - - type: "s3-sqs" - id: "arn:aws:secretsmanager:eu-central-1:123456789:secret:es_secrets:I_DO_NOT_EXIST" - outputs: - - type: "elasticsearch" - args: - elasticsearch_url: "arn:aws:secretsmanager:eu-central-1:123456789:secret:es_secrets:url" - username: "arn:aws:secretsmanager:eu-central-1:123456789:secret:es_secrets:username" - password: "arn:aws:secretsmanager:eu-central-1:123456789:secret:es_secrets:password" - es_datastream_name: "logs-redis.log-default" - """ - - event = deepcopy(dummy_event) - - handler(event, ctx) # type:ignore - - with self.subTest("invalid secretsmanager: plain text secret not str"): - os.environ["S3_CONFIG_FILE"] = "s3://s3_config_file_bucket/s3_config_file_object_key" - with self.assertRaisesRegex( - ConfigFileException, - "Error for secret " - "arn:aws:secretsmanager:eu-central-1:123456789:secret:plain_secret_not_str_byte: " - "expected to be a string", - ): - ctx = ContextMock() - _s3_client_mock.config_content = b""" - inputs: - - type: "s3-sqs" - id: "arn:aws:secretsmanager:eu-central-1:123456789:secret:plain_secret_not_str_byte" - outputs: - - type: "elasticsearch" - args: - elasticsearch_url: "arn:aws:secretsmanager:eu-central-1:123456789:secret:es_secrets:url" - username: "arn:aws:secretsmanager:eu-central-1:123456789:secret:es_secrets:username" - password: "arn:aws:secretsmanager:eu-central-1:123456789:secret:es_secrets:password" - es_datastream_name: "logs-redis.log-default" - """ - - event = deepcopy(dummy_event) - - handler(event, ctx) # type:ignore - - with self.subTest("invalid secretsmanager: json TypeError raised"): - os.environ["S3_CONFIG_FILE"] = "s3://s3_config_file_bucket/s3_config_file_object_key" - with self.assertRaisesRegex( - ConfigFileException, - "Error for secret " - "arn:aws:secretsmanager:eu-central-1:123456789:secret:plain_secret_not_str_int: " - "expected to be a string", - ): - ctx = ContextMock() - _s3_client_mock.config_content = b""" - inputs: - - type: "s3-sqs" - id: "arn:aws:secretsmanager:eu-central-1:123456789:secret:plain_secret_not_str_int" - outputs: - - type: "elasticsearch" - args: - elasticsearch_url: "arn:aws:secretsmanager:eu-central-1:123456789:secret:es_secrets:url" - username: "arn:aws:secretsmanager:eu-central-1:123456789:secret:es_secrets:username" - password: "arn:aws:secretsmanager:eu-central-1:123456789:secret:es_secrets:password" - es_datastream_name: "logs-redis.log-default" - """ - - event = deepcopy(dummy_event) - - handler(event, ctx) # type:ignore - - with self.subTest("tags not list"): - os.environ["S3_CONFIG_FILE"] = "s3://s3_config_file_bucket/s3_config_file_object_key" - with self.assertRaisesRegex( - ConfigFileException, "`tags` must be provided as list for input mock_plain_text_sqs_arn" - ): - ctx = ContextMock() - _s3_client_mock.config_content = b""" - inputs: - - type: "s3-sqs" - id: "arn:aws:secretsmanager:eu-central-1:123456789:secret:plain_secret" - tags: "tag1" - outputs: - - type: "elasticsearch" - args: - elasticsearch_url: "arn:aws:secretsmanager:eu-central-1:123456789:secret:es_secrets:url" - username: "arn:aws:secretsmanager:eu-central-1:123456789:secret:es_secrets:username" - password: "arn:aws:secretsmanager:eu-central-1:123456789:secret:es_secrets:password" - es_datastream_name: "logs-redis.log-default" - """ - - event = deepcopy(dummy_event) - - handler(event, ctx) # type:ignore - - with self.subTest("each tag must be of type str"): - os.environ["S3_CONFIG_FILE"] = "s3://s3_config_file_bucket/s3_config_file_object_key" - with self.assertRaisesRegex( - ConfigFileException, - r"Each tag in `tags` must be provided as string for input " - r"mock_plain_text_sqs_arn, given: \['tag1', 2, 'tag3'\]", - ): - ctx = ContextMock() - _s3_client_mock.config_content = b""" - inputs: - - type: "s3-sqs" - id: "arn:aws:secretsmanager:eu-central-1:123456789:secret:plain_secret" - tags: - - "tag1" - - 2 - - "tag3" - outputs: - - type: "elasticsearch" - args: - elasticsearch_url: "arn:aws:secretsmanager:eu-central-1:123456789:secret:es_secrets:url" - username: "arn:aws:secretsmanager:eu-central-1:123456789:secret:es_secrets:username" - password: "arn:aws:secretsmanager:eu-central-1:123456789:secret:es_secrets:password" - es_datastream_name: "logs-redis.log-default" - """ - - event = deepcopy(dummy_event) - - handler(event, ctx) # type:ignore - - with self.subTest("expand_event_list_from_field not str"): - os.environ["S3_CONFIG_FILE"] = "s3://s3_config_file_bucket/s3_config_file_object_key" - with self.assertRaisesRegex( - ConfigFileException, - "`expand_event_list_from_field` must be provided as string for input mock_plain_text_sqs_arn", - ): - ctx = ContextMock() - _s3_client_mock.config_content = b""" - inputs: - - type: "s3-sqs" - id: "arn:aws:secretsmanager:eu-central-1:123456789:secret:plain_secret" - expand_event_list_from_field: 0 - outputs: - - type: "elasticsearch" - args: - elasticsearch_url: "arn:aws:secretsmanager:eu-central-1:123456789:secret:es_secrets:url" - username: "arn:aws:secretsmanager:eu-central-1:123456789:secret:es_secrets:username" - password: "arn:aws:secretsmanager:eu-central-1:123456789:secret:es_secrets:password" - es_datastream_name: "logs-redis.log-default" - """ - - event = deepcopy(dummy_event) - - handler(event, ctx) # type:ignore - - with self.subTest("root_fields_to_add_to_expanded_event not `all` when string"): - os.environ["S3_CONFIG_FILE"] = "s3://s3_config_file_bucket/s3_config_file_object_key" - with self.assertRaisesRegex( - ConfigFileException, - "`root_fields_to_add_to_expanded_event` must be provided as `all` or a list of strings", - ): - ctx = ContextMock() - _s3_client_mock.config_content = b""" - inputs: - - type: "s3-sqs" - id: "arn:aws:secretsmanager:eu-central-1:123456789:secret:plain_secret" - root_fields_to_add_to_expanded_event: not_all - outputs: - - type: "elasticsearch" - args: - elasticsearch_url: "arn:aws:secretsmanager:eu-central-1:123456789:secret:es_secrets:url" - username: "arn:aws:secretsmanager:eu-central-1:123456789:secret:es_secrets:username" - password: "arn:aws:secretsmanager:eu-central-1:123456789:secret:es_secrets:password" - es_datastream_name: "logs-redis.log-default" - """ - - event = deepcopy(dummy_event) - - handler(event, ctx) # type:ignore - - with self.subTest("root_fields_to_add_to_expanded_event not `all` neither list of strings"): - os.environ["S3_CONFIG_FILE"] = "s3://s3_config_file_bucket/s3_config_file_object_key" - with self.assertRaisesRegex( - ConfigFileException, - "`root_fields_to_add_to_expanded_event` must be provided as `all` or a list of strings", - ): - ctx = ContextMock() - _s3_client_mock.config_content = b""" - inputs: - - type: "s3-sqs" - id: "arn:aws:secretsmanager:eu-central-1:123456789:secret:plain_secret" - root_fields_to_add_to_expanded_event: 0 - outputs: - - type: "elasticsearch" - args: - elasticsearch_url: "arn:aws:secretsmanager:eu-central-1:123456789:secret:es_secrets:url" - username: "arn:aws:secretsmanager:eu-central-1:123456789:secret:es_secrets:username" - password: "arn:aws:secretsmanager:eu-central-1:123456789:secret:es_secrets:password" - es_datastream_name: "logs-redis.log-default" - """ - - event = deepcopy(dummy_event) - - handler(event, ctx) # type:ignore - - with self.subTest("json_content_type not valid"): - os.environ["S3_CONFIG_FILE"] = "s3://s3_config_file_bucket/s3_config_file_object_key" - with self.assertRaisesRegex( - ConfigFileException, - "`json_content_type` must be one of ndjson,single,disabled " - "for input mock_plain_text_sqs_arn: whatever given", - ): - ctx = ContextMock() - _s3_client_mock.config_content = b""" - inputs: - - type: "s3-sqs" - id: "arn:aws:secretsmanager:eu-central-1:123456789:secret:plain_secret" - json_content_type: whatever - outputs: - - type: "elasticsearch" - args: - elasticsearch_url: "arn:aws:secretsmanager:eu-central-1:123456789:secret:es_secrets:url" - username: "arn:aws:secretsmanager:eu-central-1:123456789:secret:es_secrets:username" - password: "arn:aws:secretsmanager:eu-central-1:123456789:secret:es_secrets:password" - es_datastream_name: "logs-redis.log-default" - """ - - event = deepcopy(dummy_event) - - handler(event, ctx) # type:ignore +#@pytest.mark.unit +#class TestLambdaHandlerFailure(TestCase): +# def setUp(self) -> None: +# revert_handlers_aws_handler() +# +# @mock.patch("share.config._available_output_types", new=["elasticsearch", "logstash", "output_type"]) +# @mock.patch( +# "share.config._available_input_types", new=["cloudwatch-logs", "s3-sqs", "sqs", "kinesis-data-stream", "dummy"] +# ) +# @mock.patch("share.secretsmanager._get_aws_sm_client", new=MockContent._get_aws_sm_client) +# @mock.patch("handlers.aws.utils.get_ec2_client", lambda: _ec2_client_mock) +# @mock.patch("handlers.aws.handler.get_sqs_client", lambda: _sqs_client_mock) +# @mock.patch("storage.S3Storage._s3_client", _s3_client_mock) +# def test_lambda_handler_failure(self) -> None: +# dummy_event: dict[str, Any] = { +# "Records": [ +# { +# "eventSource": "aws:sqs", +# "eventSourceARN": "arn:aws:sqs", +# }, +# ] +# } +# +# with self.subTest("output not in config from replay payload body"): +# os.environ["S3_CONFIG_FILE"] = "s3://s3_config_file_bucket/s3_config_file_object_key" +# event = { +# "Records": [ +# { +# "eventSourceARN": "arn:aws:sqs:eu-central-1:123456789:replay-queue", +# "receiptHandle": "receiptHandle", +# "body": '{"output_type": "output_type", "output_args": {},' +# '"event_input_id": "arn:aws:dummy:eu-central-1:123456789:input", ' +# '"event_payload": {"_id": "_id"}}', +# } +# ] +# } +# with self.assertRaisesRegex(OutputConfigException, "Cannot load output of type output_type"): +# ctx = ContextMock() +# +# handler(event, ctx) # type:ignore +# +# with self.subTest("input not in config from replay payload body"): +# os.environ["S3_CONFIG_FILE"] = "s3://s3_config_file_bucket/s3_config_file_object_key" +# event = { +# "Records": [ +# { +# "eventSourceARN": "arn:aws:sqs:eu-central-1:123456789:replay-queue", +# "receiptHandle": "receiptHandle", +# "body": '{"output_type": "output_type", "output_args": {},' +# '"event_input_id": "arn:aws:dummy:eu-central-1:123456789:not-existing-input", ' +# '"event_payload": {"_id": "_id"}}', +# } +# ] +# } +# with self.assertRaisesRegex( +# InputConfigException, +# "Cannot load input for input id arn:aws:dummy:eu-central-1:123456789:not-existing-input", +# ): +# ctx = ContextMock() +# +# handler(event, ctx) # type:ignore +# +# with self.subTest("empty config"): +# os.environ["S3_CONFIG_FILE"] = "s3://s3_config_file_bucket/s3_config_file_object_key" +# with self.assertRaisesRegex(ConfigFileException, "Empty config"): +# ctx = ContextMock() +# _s3_client_mock.config_content = b"" +# +# handler(dummy_event, ctx) # type:ignore +# +# with self.subTest("Invalid s3 uri apm client not None"): +# with mock.patch("handlers.aws.utils.get_apm_client", lambda: mock.MagicMock()): +# with self.assertRaisesRegex(ConfigFileException, "Invalid s3 uri provided: ``"): +# os.environ["S3_CONFIG_FILE"] = "" +# ctx = ContextMock() +# +# handler(dummy_event, ctx) # type:ignore +# +# with self.subTest("Invalid s3 uri"): +# with self.assertRaisesRegex(ConfigFileException, "Invalid s3 uri provided: ``"): +# os.environ["S3_CONFIG_FILE"] = "" +# ctx = ContextMock() +# +# handler(dummy_event, ctx) # type:ignore +# +# with self.subTest("Invalid s3 uri no bucket and key"): +# with self.assertRaisesRegex(ConfigFileException, "Invalid s3 uri provided: `s3://`"): +# os.environ["S3_CONFIG_FILE"] = "s3://" +# ctx = ContextMock() +# +# handler(dummy_event, ctx) # type:ignore +# +# with self.subTest("Invalid s3 uri no key"): +# with self.assertRaisesRegex(ConfigFileException, "Invalid s3 uri provided: `s3://bucket`"): +# os.environ["S3_CONFIG_FILE"] = "s3://bucket" +# ctx = ContextMock() +# +# handler(dummy_event, ctx) # type:ignore +# +# with self.subTest("no Records in event"): +# with self.assertRaisesRegex(TriggerTypeException, "Not supported trigger"): +# ctx = ContextMock() +# event = {} +# +# handler(event, ctx) # type:ignore +# +# with self.subTest("empty Records in event"): +# with self.assertRaisesRegex(TriggerTypeException, "Not supported trigger"): +# ctx = ContextMock() +# event = {"Records": []} +# +# handler(event, ctx) # type:ignore +# +# with self.subTest("no eventSource in Records in event"): +# with self.assertRaisesRegex(TriggerTypeException, "Not supported trigger"): +# ctx = ContextMock() +# event = {"Records": [{}]} +# +# handler(event, ctx) # type:ignore +# +# with self.subTest("no valid eventSource in Records in event"): +# with self.assertRaisesRegex(TriggerTypeException, "Not supported trigger"): +# ctx = ContextMock() +# event = {"Records": [{"eventSource": "invalid"}]} +# +# handler(event, ctx) # type:ignore +# +# with self.subTest("no eventSource in body Records in event"): +# with self.assertRaisesRegex(TriggerTypeException, "Not supported trigger"): +# ctx = ContextMock() +# event = {"Records": [{"body": ""}]} +# +# handler(event, ctx) # type:ignore +# +# with self.subTest("no valid eventSource in body Records in event"): +# with self.assertRaisesRegex(TriggerTypeException, "Not supported trigger"): +# ctx = ContextMock() +# event = {"Records": [{"body": "", "eventSource": "invalid"}]} +# +# handler(event, ctx) # type:ignore +# +# with self.subTest("replay event loads config from s3"): +# with self.assertRaisesRegex(ConfigFileException, "Invalid s3 uri provided: `s3://bucket`"): +# ctx = ContextMock() +# event = { +# "Records": [ +# { +# "body": '{"output_type": "", "output_args": "", "event_payload": ""}', +# } +# ] +# } +# handler(event, ctx) # type:ignore +# +# with self.subTest("invalid secretsmanager: arn format too long"): +# os.environ["S3_CONFIG_FILE"] = "s3://s3_config_file_bucket/s3_config_file_object_key" +# with self.assertRaisesRegex( +# ConfigFileException, +# "Invalid arn format: " +# "arn:aws:secretsmanager:eu-central-1:123456789:secret:plain_secret:THIS:IS:INVALID", +# ): +# ctx = ContextMock() +# _s3_client_mock.config_content = b""" +# inputs: +# - type: "s3-sqs" +# id: "arn:aws:secretsmanager:eu-central-1:123456789:secret:plain_secret:THIS:IS:INVALID" +# outputs: +# - type: "elasticsearch" +# args: +# elasticsearch_url: "arn:aws:secretsmanager:eu-central-1:123456789:secret:es_secrets:url" +# username: "arn:aws:secretsmanager:eu-central-1:123456789:secret:es_secrets:username" +# password: "arn:aws:secretsmanager:eu-central-1:123456789:secret:es_secrets:password" +# es_datastream_name: "logs-redis.log-default" +# """ +# +# event = deepcopy(dummy_event) +# +# handler(event, ctx) # type:ignore +# +# with self.subTest("invalid secretsmanager: empty region"): +# os.environ["S3_CONFIG_FILE"] = "s3://s3_config_file_bucket/s3_config_file_object_key" +# with self.assertRaisesRegex( +# ConfigFileException, +# "Must be provided region in arn: " "arn:aws:secretsmanager::123456789:secret:plain_secret", +# ): +# ctx = ContextMock() +# # BEWARE region is empty at id +# _s3_client_mock.config_content = b""" +# inputs: +# - type: "s3-sqs" +# id: "arn:aws:secretsmanager::123456789:secret:plain_secret" +# outputs: +# - type: "elasticsearch" +# args: +# elasticsearch_url: "arn:aws:secretsmanager:eu-central-1:123456789:secret:es_secrets" +# username: "arn:aws:secretsmanager:eu-central-1:123456789:secret:es_secrets:username" +# password: "arn:aws:secretsmanager:eu-central-1:123456789:secret:es_secrets:password" +# es_datastream_name: "logs-redis.log-default" +# """ +# +# event = deepcopy(dummy_event) +# +# handler(event, ctx) # type:ignore +# +# with self.subTest("invalid secretsmanager: empty secrets manager name"): +# os.environ["S3_CONFIG_FILE"] = "s3://s3_config_file_bucket/s3_config_file_object_key" +# with self.assertRaisesRegex( +# ConfigFileException, +# "Must be provided secrets manager name in arn: " +# "arn:aws:secretsmanager:eu-central-1:123456789:secret:", +# ): +# ctx = ContextMock() +# # BEWARE empty secrets manager name at id +# _s3_client_mock.config_content = b""" +# inputs: +# - type: "s3-sqs" +# id: "arn:aws:secretsmanager:eu-central-1:123456789:secret:" +# outputs: +# - type: "elasticsearch" +# args: +# elasticsearch_url: "arn:aws:secretsmanager:eu-central-1:123456789:secret:es_secrets" +# username: "arn:aws:secretsmanager:eu-central-1:123456789:secret:es_secrets:username" +# password: "arn:aws:secretsmanager:eu-central-1:123456789:secret:es_secrets:password" +# es_datastream_name: "logs-redis.log-default" +# """ +# +# event = deepcopy(dummy_event) +# +# handler(event, ctx) # type:ignore +# +# with self.subTest("invalid secretsmanager: cannot use both plain text and key/value pairs"): +# os.environ["S3_CONFIG_FILE"] = "s3://s3_config_file_bucket/s3_config_file_object_key" +# with self.assertRaisesRegex( +# ConfigFileException, +# "You cannot have both plain text and json key for the same " +# "secret: arn:aws:secretsmanager:eu-central-1:123456789:secret:es_secrets:username", +# ): +# ctx = ContextMock() +# # BEWARE using es_secrets plain text for elasticsearch_url and es_secrets:username for username +# _s3_client_mock.config_content = b""" +# inputs: +# - type: "s3-sqs" +# id: "arn:aws:secretsmanager:eu-central-1:123456789:secret:plain_secrets" +# outputs: +# - type: "elasticsearch" +# args: +# elasticsearch_url: "arn:aws:secretsmanager:eu-central-1:123456789:secret:es_secrets" +# username: "arn:aws:secretsmanager:eu-central-1:123456789:secret:es_secrets:username" +# password: "arn:aws:secretsmanager:eu-central-1:123456789:secret:es_secrets:password" +# es_datastream_name: "logs-redis.log-default" +# """ +# +# event = deepcopy(dummy_event) +# +# handler(event, ctx) # type:ignore +# +# with self.subTest("invalid secretsmanager: empty secret key"): +# os.environ["S3_CONFIG_FILE"] = "s3://s3_config_file_bucket/s3_config_file_object_key" +# with self.assertRaisesRegex( +# ConfigFileException, +# "Error for secret " +# "arn:aws:secretsmanager:eu-central-1:123456789:secret:es_secrets:: key must " +# "not be empty", +# ): +# ctx = ContextMock() +# # BEWARE empty key at elasticsearch_url +# _s3_client_mock.config_content = b""" +# inputs: +# - type: "s3-sqs" +# id: "arn:aws:secretsmanager:eu-central-1:123456789:secret:plain_secret" +# outputs: +# - type: "elasticsearch" +# args: +# elasticsearch_url: "arn:aws:secretsmanager:eu-central-1:123456789:secret:es_secrets:" +# username: "arn:aws:secretsmanager:eu-central-1:123456789:secret:es_secrets:username" +# password: "arn:aws:secretsmanager:eu-central-1:123456789:secret:es_secrets:password" +# es_datastream_name: "logs-redis.log-default" +# """ +# +# event = deepcopy(dummy_event) +# +# handler(event, ctx) # type:ignore +# +# with self.subTest("invalid secretsmanager: secret does not exist"): +# os.environ["S3_CONFIG_FILE"] = "s3://s3_config_file_bucket/s3_config_file_object_key" +# with self.assertRaisesRegex( +# ConfigFileException, +# r"An error occurred \(ResourceNotFoundException\) when calling " +# "the GetSecretValue operation: Secrets Manager can't find the specified secret.", +# ): +# ctx = ContextMock() +# _s3_client_mock.config_content = b""" +# inputs: +# - type: "s3-sqs" +# id: "arn:aws:secretsmanager:eu-central-1:123456789:secret:DOES_NOT_EXIST" +# outputs: +# - type: "elasticsearch" +# args: +# elasticsearch_url: "arn:aws:secretsmanager:eu-central-1:123456789:secret:es_secrets:url" +# username: "arn:aws:secretsmanager:eu-central-1:123456789:secret:es_secrets:username" +# password: "arn:aws:secretsmanager:eu-central-1:123456789:secret:es_secrets:password" +# es_datastream_name: "logs-redis.log-default" +# """ +# +# event = deepcopy(dummy_event) +# +# handler(event, ctx) # type:ignore +# +# with self.subTest("invalid secretsmanager: empty plain secret value"): +# os.environ["S3_CONFIG_FILE"] = "s3://s3_config_file_bucket/s3_config_file_object_key" +# with self.assertRaisesRegex( +# ConfigFileException, +# "Error for secret " +# "arn:aws:secretsmanager:eu-central-1:123456789:secret:empty_secret: must " +# "not be empty", +# ): +# ctx = ContextMock() +# _s3_client_mock.config_content = b""" +# inputs: +# - type: "s3-sqs" +# id: "arn:aws:secretsmanager:eu-central-1:123456789:secret:empty_secret" +# outputs: +# - type: "elasticsearch" +# args: +# elasticsearch_url: "arn:aws:secretsmanager:eu-central-1:123456789:secret:es_secrets:url" +# username: "arn:aws:secretsmanager:eu-central-1:123456789:secret:es_secrets:username" +# password: "arn:aws:secretsmanager:eu-central-1:123456789:secret:es_secrets:password" +# es_datastream_name: "logs-redis.log-default" +# """ +# +# event = deepcopy(dummy_event) +# +# handler(event, ctx) # type:ignore +# +# with self.subTest("invalid secretsmanager: empty key/value secret value"): +# os.environ["S3_CONFIG_FILE"] = "s3://s3_config_file_bucket/s3_config_file_object_key" +# with self.assertRaisesRegex( +# ConfigFileException, +# "Error for secret " +# "arn:aws:secretsmanager:eu-central-1:123456789:secret:es_secrets:empty: must " +# "not be empty", +# ): +# ctx = ContextMock() +# _s3_client_mock.config_content = b""" +# inputs: +# - type: "s3-sqs" +# id: "arn:aws:secretsmanager:eu-central-1:123456789:secret:es_secrets:empty" +# outputs: +# - type: "elasticsearch" +# args: +# elasticsearch_url: "arn:aws:secretsmanager:eu-central-1:123456789:secret:es_secrets:url" +# username: "arn:aws:secretsmanager:eu-central-1:123456789:secret:es_secrets:username" +# password: "arn:aws:secretsmanager:eu-central-1:123456789:secret:es_secrets:password" +# es_datastream_name: "logs-redis.log-default" +# """ +# +# event = deepcopy(dummy_event) +# +# handler(event, ctx) # type:ignore +# +# with self.subTest("invalid secretsmanager: plain text used as key/value"): +# os.environ["S3_CONFIG_FILE"] = "s3://s3_config_file_bucket/s3_config_file_object_key" +# with self.assertRaisesRegex( +# ConfigFileException, +# "Error for secret " +# "arn:aws:secretsmanager:eu-central-1:123456789:secret:plain_secret:SHOULD_NOT_HAVE_A_KEY: " +# "expected to be keys/values pair", +# ): +# ctx = ContextMock() +# _s3_client_mock.config_content = b""" +# inputs: +# - type: "s3-sqs" +# id: "arn:aws:secretsmanager:eu-central-1:123456789:secret:plain_secret:SHOULD_NOT_HAVE_A_KEY" +# outputs: +# - type: "elasticsearch" +# args: +# elasticsearch_url: "arn:aws:secretsmanager:eu-central-1:123456789:secret:es_secrets:url" +# username: "arn:aws:secretsmanager:eu-central-1:123456789:secret:es_secrets:username" +# password: "arn:aws:secretsmanager:eu-central-1:123456789:secret:es_secrets:password" +# es_datastream_name: "logs-redis.log-default" +# """ +# +# event = deepcopy(dummy_event) +# +# handler(event, ctx) # type:ignore +# +# with self.subTest("invalid secretsmanager: key does not exist in secret manager"): +# os.environ["S3_CONFIG_FILE"] = "s3://s3_config_file_bucket/s3_config_file_object_key" +# with self.assertRaisesRegex( +# ConfigFileException, +# "Error for secret " +# "arn:aws:secretsmanager:eu-central-1:123456789:secret:es_secrets:I_DO_NOT_EXIST: " +# "key not found", +# ): +# ctx = ContextMock() +# _s3_client_mock.config_content = b""" +# inputs: +# - type: "s3-sqs" +# id: "arn:aws:secretsmanager:eu-central-1:123456789:secret:es_secrets:I_DO_NOT_EXIST" +# outputs: +# - type: "elasticsearch" +# args: +# elasticsearch_url: "arn:aws:secretsmanager:eu-central-1:123456789:secret:es_secrets:url" +# username: "arn:aws:secretsmanager:eu-central-1:123456789:secret:es_secrets:username" +# password: "arn:aws:secretsmanager:eu-central-1:123456789:secret:es_secrets:password" +# es_datastream_name: "logs-redis.log-default" +# """ +# +# event = deepcopy(dummy_event) +# +# handler(event, ctx) # type:ignore +# +# with self.subTest("invalid secretsmanager: plain text secret not str"): +# os.environ["S3_CONFIG_FILE"] = "s3://s3_config_file_bucket/s3_config_file_object_key" +# with self.assertRaisesRegex( +# ConfigFileException, +# "Error for secret " +# "arn:aws:secretsmanager:eu-central-1:123456789:secret:plain_secret_not_str_byte: " +# "expected to be a string", +# ): +# ctx = ContextMock() +# _s3_client_mock.config_content = b""" +# inputs: +# - type: "s3-sqs" +# id: "arn:aws:secretsmanager:eu-central-1:123456789:secret:plain_secret_not_str_byte" +# outputs: +# - type: "elasticsearch" +# args: +# elasticsearch_url: "arn:aws:secretsmanager:eu-central-1:123456789:secret:es_secrets:url" +# username: "arn:aws:secretsmanager:eu-central-1:123456789:secret:es_secrets:username" +# password: "arn:aws:secretsmanager:eu-central-1:123456789:secret:es_secrets:password" +# es_datastream_name: "logs-redis.log-default" +# """ +# +# event = deepcopy(dummy_event) +# +# handler(event, ctx) # type:ignore +# +# with self.subTest("invalid secretsmanager: json TypeError raised"): +# os.environ["S3_CONFIG_FILE"] = "s3://s3_config_file_bucket/s3_config_file_object_key" +# with self.assertRaisesRegex( +# ConfigFileException, +# "Error for secret " +# "arn:aws:secretsmanager:eu-central-1:123456789:secret:plain_secret_not_str_int: " +# "expected to be a string", +# ): +# ctx = ContextMock() +# _s3_client_mock.config_content = b""" +# inputs: +# - type: "s3-sqs" +# id: "arn:aws:secretsmanager:eu-central-1:123456789:secret:plain_secret_not_str_int" +# outputs: +# - type: "elasticsearch" +# args: +# elasticsearch_url: "arn:aws:secretsmanager:eu-central-1:123456789:secret:es_secrets:url" +# username: "arn:aws:secretsmanager:eu-central-1:123456789:secret:es_secrets:username" +# password: "arn:aws:secretsmanager:eu-central-1:123456789:secret:es_secrets:password" +# es_datastream_name: "logs-redis.log-default" +# """ +# +# event = deepcopy(dummy_event) +# +# handler(event, ctx) # type:ignore +# +# with self.subTest("tags not list"): +# os.environ["S3_CONFIG_FILE"] = "s3://s3_config_file_bucket/s3_config_file_object_key" +# with self.assertRaisesRegex( +# ConfigFileException, "`tags` must be provided as list for input mock_plain_text_sqs_arn" +# ): +# ctx = ContextMock() +# _s3_client_mock.config_content = b""" +# inputs: +# - type: "s3-sqs" +# id: "arn:aws:secretsmanager:eu-central-1:123456789:secret:plain_secret" +# tags: "tag1" +# outputs: +# - type: "elasticsearch" +# args: +# elasticsearch_url: "arn:aws:secretsmanager:eu-central-1:123456789:secret:es_secrets:url" +# username: "arn:aws:secretsmanager:eu-central-1:123456789:secret:es_secrets:username" +# password: "arn:aws:secretsmanager:eu-central-1:123456789:secret:es_secrets:password" +# es_datastream_name: "logs-redis.log-default" +# """ +# +# event = deepcopy(dummy_event) +# +# handler(event, ctx) # type:ignore +# +# with self.subTest("each tag must be of type str"): +# os.environ["S3_CONFIG_FILE"] = "s3://s3_config_file_bucket/s3_config_file_object_key" +# with self.assertRaisesRegex( +# ConfigFileException, +# r"Each tag in `tags` must be provided as string for input " +# r"mock_plain_text_sqs_arn, given: \['tag1', 2, 'tag3'\]", +# ): +# ctx = ContextMock() +# _s3_client_mock.config_content = b""" +# inputs: +# - type: "s3-sqs" +# id: "arn:aws:secretsmanager:eu-central-1:123456789:secret:plain_secret" +# tags: +# - "tag1" +# - 2 +# - "tag3" +# outputs: +# - type: "elasticsearch" +# args: +# elasticsearch_url: "arn:aws:secretsmanager:eu-central-1:123456789:secret:es_secrets:url" +# username: "arn:aws:secretsmanager:eu-central-1:123456789:secret:es_secrets:username" +# password: "arn:aws:secretsmanager:eu-central-1:123456789:secret:es_secrets:password" +# es_datastream_name: "logs-redis.log-default" +# """ +# +# event = deepcopy(dummy_event) +# +# handler(event, ctx) # type:ignore +# +# with self.subTest("expand_event_list_from_field not str"): +# os.environ["S3_CONFIG_FILE"] = "s3://s3_config_file_bucket/s3_config_file_object_key" +# with self.assertRaisesRegex( +# ConfigFileException, +# "`expand_event_list_from_field` must be provided as string for input mock_plain_text_sqs_arn", +# ): +# ctx = ContextMock() +# _s3_client_mock.config_content = b""" +# inputs: +# - type: "s3-sqs" +# id: "arn:aws:secretsmanager:eu-central-1:123456789:secret:plain_secret" +# expand_event_list_from_field: 0 +# outputs: +# - type: "elasticsearch" +# args: +# elasticsearch_url: "arn:aws:secretsmanager:eu-central-1:123456789:secret:es_secrets:url" +# username: "arn:aws:secretsmanager:eu-central-1:123456789:secret:es_secrets:username" +# password: "arn:aws:secretsmanager:eu-central-1:123456789:secret:es_secrets:password" +# es_datastream_name: "logs-redis.log-default" +# """ +# +# event = deepcopy(dummy_event) +# +# handler(event, ctx) # type:ignore +# +# with self.subTest("root_fields_to_add_to_expanded_event not `all` when string"): +# os.environ["S3_CONFIG_FILE"] = "s3://s3_config_file_bucket/s3_config_file_object_key" +# with self.assertRaisesRegex( +# ConfigFileException, +# "`root_fields_to_add_to_expanded_event` must be provided as `all` or a list of strings", +# ): +# ctx = ContextMock() +# _s3_client_mock.config_content = b""" +# inputs: +# - type: "s3-sqs" +# id: "arn:aws:secretsmanager:eu-central-1:123456789:secret:plain_secret" +# root_fields_to_add_to_expanded_event: not_all +# outputs: +# - type: "elasticsearch" +# args: +# elasticsearch_url: "arn:aws:secretsmanager:eu-central-1:123456789:secret:es_secrets:url" +# username: "arn:aws:secretsmanager:eu-central-1:123456789:secret:es_secrets:username" +# password: "arn:aws:secretsmanager:eu-central-1:123456789:secret:es_secrets:password" +# es_datastream_name: "logs-redis.log-default" +# """ +# +# event = deepcopy(dummy_event) +# +# handler(event, ctx) # type:ignore +# +# with self.subTest("root_fields_to_add_to_expanded_event not `all` neither list of strings"): +# os.environ["S3_CONFIG_FILE"] = "s3://s3_config_file_bucket/s3_config_file_object_key" +# with self.assertRaisesRegex( +# ConfigFileException, +# "`root_fields_to_add_to_expanded_event` must be provided as `all` or a list of strings", +# ): +# ctx = ContextMock() +# _s3_client_mock.config_content = b""" +# inputs: +# - type: "s3-sqs" +# id: "arn:aws:secretsmanager:eu-central-1:123456789:secret:plain_secret" +# root_fields_to_add_to_expanded_event: 0 +# outputs: +# - type: "elasticsearch" +# args: +# elasticsearch_url: "arn:aws:secretsmanager:eu-central-1:123456789:secret:es_secrets:url" +# username: "arn:aws:secretsmanager:eu-central-1:123456789:secret:es_secrets:username" +# password: "arn:aws:secretsmanager:eu-central-1:123456789:secret:es_secrets:password" +# es_datastream_name: "logs-redis.log-default" +# """ +# +# event = deepcopy(dummy_event) +# +# handler(event, ctx) # type:ignore +# +# with self.subTest("json_content_type not valid"): +# os.environ["S3_CONFIG_FILE"] = "s3://s3_config_file_bucket/s3_config_file_object_key" +# with self.assertRaisesRegex( +# ConfigFileException, +# "`json_content_type` must be one of ndjson,single,disabled " +# "for input mock_plain_text_sqs_arn: whatever given", +# ): +# ctx = ContextMock() +# _s3_client_mock.config_content = b""" +# inputs: +# - type: "s3-sqs" +# id: "arn:aws:secretsmanager:eu-central-1:123456789:secret:plain_secret" +# json_content_type: whatever +# outputs: +# - type: "elasticsearch" +# args: +# elasticsearch_url: "arn:aws:secretsmanager:eu-central-1:123456789:secret:es_secrets:url" +# username: "arn:aws:secretsmanager:eu-central-1:123456789:secret:es_secrets:username" +# password: "arn:aws:secretsmanager:eu-central-1:123456789:secret:es_secrets:password" +# es_datastream_name: "logs-redis.log-default" +# """ +# +# event = deepcopy(dummy_event) +# +# handler(event, ctx) # type:ignore +# From 0115e0c1ce7bb2e519cf52cef07a1e1b251ec4be Mon Sep 17 00:00:00 2001 From: constanca Date: Wed, 17 Apr 2024 12:33:01 +0200 Subject: [PATCH 16/26] . --- tests/handlers/aws/test_handler.py | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/tests/handlers/aws/test_handler.py b/tests/handlers/aws/test_handler.py index 218ca806..b178a296 100644 --- a/tests/handlers/aws/test_handler.py +++ b/tests/handlers/aws/test_handler.py @@ -405,6 +405,9 @@ def test_lambda_handler_noop(self) -> None: reload_handlers_aws_handler() with self.subTest("no originalEventSourceARN in messageAttributes"): + logger.info( + ">> TESTE no originalEventSourceARN in messageAttributes" + ) ctx = ContextMock() os.environ["S3_CONFIG_FILE"] = "s3://s3_config_file_bucket/s3_config_file_object_key" lambda_event = deepcopy(_dummy_lambda_event) @@ -412,6 +415,9 @@ def test_lambda_handler_noop(self) -> None: assert handler(lambda_event, ctx) == "completed" # type:ignore with self.subTest("no input defined for cloudwatch_logs"): + logger.info( + ">> TESTE no input defined for cloudwatch_logs" + ) ctx = ContextMock() os.environ["S3_CONFIG_FILE"] = "s3://s3_config_file_bucket/s3_config_file_object_key" lambda_event = { @@ -424,6 +430,9 @@ def test_lambda_handler_noop(self) -> None: assert handler(lambda_event, ctx) == "completed" # type:ignore with self.subTest("output not elasticsearch from payload config"): + logger.info( + ">> TESTE output not elasticsearch from payload config" + ) with mock.patch( "handlers.aws.handler.get_shipper_for_replay_event", lambda config, output_type, output_args, event_input_id, replay_handler: None, @@ -509,6 +518,9 @@ def test_lambda_handler_noop(self) -> None: assert handler(lambda_event, ctx) == "completed" # type:ignore with self.subTest("raising unexpected exception"): + logger.info( + ">> TESTE raising unexpected exception" + ) ctx = ContextMock() lambda_event = deepcopy(_dummy_lambda_event) lambda_event_body = json_parser(lambda_event["Records"][0]["body"]) From 9d3ee3999c3342ba4e4e701cfeb9cfb96b590cb3 Mon Sep 17 00:00:00 2001 From: constanca Date: Wed, 17 Apr 2024 12:37:29 +0200 Subject: [PATCH 17/26] . --- tests/handlers/aws/test_handler.py | 1161 ----------------------- tests/handlers/aws/test_integrations.py | 675 +++++++++++++ 2 files changed, 675 insertions(+), 1161 deletions(-) delete mode 100644 tests/handlers/aws/test_handler.py create mode 100644 tests/handlers/aws/test_integrations.py diff --git a/tests/handlers/aws/test_handler.py b/tests/handlers/aws/test_handler.py deleted file mode 100644 index b178a296..00000000 --- a/tests/handlers/aws/test_handler.py +++ /dev/null @@ -1,1161 +0,0 @@ -# Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one -# or more contributor license agreements. Licensed under the Elastic License 2.0; -# you may not use this file except in compliance with the Elastic License 2.0. - -import base64 -import datetime -import importlib -import logging -import os -import sys -from copy import deepcopy -from io import BytesIO -from typing import Any, Optional, Union -from unittest import TestCase - -import mock -import pytest -from botocore.exceptions import ClientError -from botocore.response import StreamingBody - -from handlers.aws.exceptions import ( - ConfigFileException, - InputConfigException, - OutputConfigException, - TriggerTypeException, -) -from main_aws import handler -from share import json_dumper, json_parser - -from .utils import ContextMock - - -class MockContent: - SECRETS_MANAGER_MOCK_DATA: dict[str, dict[str, str]] = { - "arn:aws:secretsmanager:eu-central-1:123456789:secret:es_secrets": { - "type": "SecretString", - "data": json_dumper( - { - "url": "mock_elastic_url", - "username": "mock_elastic_username", - "password": "mock_elastic_password", - "empty": "", - } - ), - }, - "arn:aws:secretsmanager:eu-central-1:123456789:secret:plain_secret": { - "type": "SecretString", - "data": "mock_plain_text_sqs_arn", - }, - "arn:aws:secretsmanager:eu-central-1:123456789:secret:plain_secret_not_str_byte": { - "type": "SecretString", - "data": b"i am not a string", # type:ignore - }, - "arn:aws:secretsmanager:eu-central-1:123456789:secret:plain_secret_not_str_int": { - "type": "SecretString", - "data": 2021, # type:ignore - }, - "arn:aws:secretsmanager:eu-central-1:123456789:secret:binary_secret": { - "type": "SecretBinary", - "data": "bW9ja19uZ2lueC5sb2c=", - }, - "arn:aws:secretsmanager:eu-central-1:123456789:secret:empty_secret": {"type": "SecretString", "data": ""}, - } - - @staticmethod - def _get_aws_sm_client(region_name: str) -> mock.MagicMock: - client = mock.Mock() - client.get_secret_value = MockContent.get_secret_value - return client - - @staticmethod - def get_secret_value(SecretId: str) -> Optional[dict[str, Union[bytes, str]]]: - secrets = MockContent.SECRETS_MANAGER_MOCK_DATA.get(SecretId) - - if secrets is None: - raise ClientError( - { - "Error": { - "Message": "Secrets Manager can't find the specified secret.", - "Code": "ResourceNotFoundException", - } - }, - "GetSecretValue", - ) - - if secrets["type"] == "SecretBinary": - return {"SecretBinary": base64.b64decode(secrets["data"])} - elif secrets["type"] == "SecretString": - return {"SecretString": secrets["data"]} - - return None - - -_now = datetime.datetime.utcnow().strftime("%Y-%m-%dT%H:%M:%S.%fZ") -_dummy_lambda_event: dict[str, Any] = { - "Records": [ - { - "messageId": "dummy_message_id", - "receiptHandle": "dummy_receipt_handle", - "body": json_dumper( - { - "Records": [ - { - "eventVersion": "2.1", - "eventSource": "aws:s3", - "awsRegion": "eu-central-1", - "eventTime": _now, - "eventName": "ObjectCreated:Put", - "userIdentity": {"principalId": "dummy_principal_id"}, - "requestParameters": {"sourceIPAddress": "dummy_source_ip_address"}, - "responseElements": { - "x-amz-request-id": "dummy_request_id", - "x-amz-id-2": "dummy_request_id_2", - }, - "s3": { - "s3SchemaVersion": "1.0", - "configurationId": "sqs_event", - "bucket": { - "name": "dummy_bucket_name", - "ownerIdentity": {"principalId": "dummy_principal_id"}, - "arn": "arn:aws:s3:::dummy_bucket_name", - }, - "object": { - "key": "file.log", - "size": 27, - "eTag": "", - "sequencer": "", - }, - }, - } - ] - } - ), - "attributes": { - "ApproximateReceiveCount": "1", - "SentTimestamp": _now, - "SenderId": "dummy_sender_id", - "ApproximateFirstReceiveTimestamp": _now, - }, - "messageAttributes": { - "config": { - "stringValue": "inputs:\n - type: s3-sqs" - "\n id: arn:aws:sqs:eu-central-1:123456789:sqs-queue\n outputs:" - "\n - type: elasticsearch\n args:" - "\n cloud_id: cloud_id:bG9jYWxob3N0OjkyMDAkMA==\n api_key: api_key\n" - }, - "originalEventSourceARN": {"stringValue": "arn:aws:sqs:eu-central-1:123456789:sqs-queue"}, - "originalLastEndingOffset": {"stringValue": "32"}, - }, - "md5OfBody": "dummy_hash", - "eventSource": "aws:sqs", - "eventSourceARN": "arn:aws:sqs:eu-central-1:123456789:s3-sqs-queue", - "awsRegion": "eu-central-1", - } - ] -} - - -def _get_queue_url_mock(QueueName: str, QueueOwnerAWSAccountId: str) -> dict[str, Any]: - return {"QueueUrl": ""} - - -def _send_message(QueueUrl: str, MessageBody: str, MessageAttributes: dict[str, Any]) -> None: - pass - - -def _describe_regions(AllRegions: bool) -> dict[str, Any]: - return { - "Regions": [ - { - "RegionName": "af-south-1", - }, - { - "RegionName": "ap-east-1", - }, - { - "RegionName": "ap-northeast-1", - }, - { - "RegionName": "ap-northeast-2", - }, - { - "RegionName": "ap-northeast-3", - }, - { - "RegionName": "ap-south-1", - }, - { - "RegionName": "ap-south-2", - }, - { - "RegionName": "ap-southeast-1", - }, - { - "RegionName": "ap-southeast-2", - }, - { - "RegionName": "ap-southeast-3", - }, - { - "RegionName": "ap-southeast-4", - }, - { - "RegionName": "ca-central-1", - }, - { - "RegionName": "eu-central-1", - }, - { - "RegionName": "eu-central-2", - }, - { - "RegionName": "eu-north-1", - }, - { - "RegionName": "eu-south-1", - }, - { - "RegionName": "eu-south-2", - }, - { - "RegionName": "eu-west-1", - }, - { - "RegionName": "eu-west-2", - }, - { - "RegionName": "eu-west-3", - }, - { - "RegionName": "me-central-1", - }, - { - "RegionName": "me-south-1", - }, - { - "RegionName": "sa-east-1", - }, - { - "RegionName": "us-east-1", - }, - { - "RegionName": "us-east-2", - }, - { - "RegionName": "us-gov-east-1", - }, - { - "RegionName": "us-gov-west-1", - }, - { - "RegionName": "us-west-1", - }, - { - "RegionName": "us-west-2", - }, - ] - } - - -_ec2_client_mock = mock.MagicMock() -_ec2_client_mock.describe_regions = _describe_regions - -_sqs_client_mock = mock.MagicMock() -_sqs_client_mock.get_queue_url = _get_queue_url_mock -_sqs_client_mock.send_message = _send_message - - -_s3_client_mock = mock.MagicMock() - - -_s3_client_mock.config_content = ( - b"inputs:\n" - b" - type: s3-sqs\n" - b" id: arn:aws:sqs:eu-central-1:123456789:s3-sqs-queue\n" - b" outputs:\n" - b" - type: elasticsearch\n" - b" args:\n" - b" cloud_id: cloud_id:bG9jYWxob3N0OjkyMDAkMA==\n" - b" api_key: api_key\n" - b" - type: logstash\n" - b" args:\n" - b" logstash_url: logstash_url\n" - b" - type: cloudwatch-logs\n" - b" id: arn:aws:logs:eu-central-1:123456789:log-group:logGroup:log-stream:logStream\n" - b" outputs:\n" - b" - type: elasticsearch\n" - b" args:\n" - b" cloud_id: cloud_id:bG9jYWxob3N0OjkyMDAkMA==\n" - b" api_key: api_key\n" - b" - type: logstash\n" - b" args:\n" - b" logstash_url: logstash_url\n" - b" - type: sqs\n" - b" id: arn:aws:sqs:eu-central-1:123456789:sqs-queue\n" - b" outputs:\n" - b" - type: elasticsearch\n" - b" args:\n" - b" cloud_id: cloud_id:bG9jYWxob3N0OjkyMDAkMA==\n" - b" api_key: api_key\n" - b" - type: logstash\n" - b" args:\n" - b" logstash_url: logstash_url\n" - b" - type: dummy\n" - b" id: arn:aws:dummy:eu-central-1:123456789:input\n" - b" outputs:\n" - b" - type: elasticsearch\n" - b" args:\n" - b" cloud_id: cloud_id:bG9jYWxob3N0OjkyMDAkMA==\n" - b" api_key: api_key\n" - b" - type: logstash\n" - b" args:\n" - b" logstash_url: logstash_url\n" - b" - type: s3-sqs\n" - b" id: arn:aws:sqs:eu-central-1:123456789:s3-sqs-queue-with-dummy-output\n" - b" outputs:\n" - b" - type: output_type\n" - b" args:\n" - b" output_arg: output_arg" -) - - -def _head_object(Bucket: str, Key: str) -> dict[str, Any]: - return {"ContentType": "ContentType", "ContentLength": 0} - - -def _get_object(Bucket: str, Key: str, Range: str) -> dict[str, Any]: - content = _s3_client_mock.config_content - content_body = BytesIO(content) - content_length = len(content) - return {"Body": StreamingBody(content_body, content_length), "ContentLength": content_length} - - -def _download_fileobj(Bucket: str, Key: str, Fileobj: BytesIO) -> None: - if Key == "please raise": - raise Exception("raised") - - -_s3_client_mock.head_object = _head_object -_s3_client_mock.download_fileobj = _download_fileobj -_s3_client_mock.get_object = _get_object - - -def _apm_capture_serverless() -> Any: - def wrapper(func: Any) -> Any: - def decorated(*args: Any, **kwds: Any) -> Any: - return func(*args, **kwds) - - return decorated - - return wrapper - - -def reload_handlers_aws_handler() -> None: - os.environ["ELASTIC_APM_ACTIVE"] = "ELASTIC_APM_ACTIVE" - os.environ["AWS_LAMBDA_FUNCTION_NAME"] = "AWS_LAMBDA_FUNCTION_NAME" - - from handlers.aws.utils import get_ec2_client, get_sqs_client - - os.environ["AWS_DEFAULT_REGION"] = "us-east-1" - _ = get_sqs_client() - _ = get_ec2_client() - - mock.patch("handlers.aws.utils.get_sqs_client", lambda: _sqs_client_mock).start() - mock.patch("handlers.aws.utils.get_ec2_client", lambda: _ec2_client_mock).start() - - handlers_aws_handler = sys.modules["handlers.aws.handler"] - importlib.reload(handlers_aws_handler) - - -def revert_handlers_aws_handler() -> None: - if "AWS_DEFAULT_REGION" in os.environ: - del os.environ["AWS_DEFAULT_REGION"] - - if "ELASTIC_APM_ACTIVE" in os.environ: - del os.environ["ELASTIC_APM_ACTIVE"] - - if "AWS_LAMBDA_FUNCTION_NAME" in os.environ: - del os.environ["AWS_LAMBDA_FUNCTION_NAME"] - - handlers_aws_handler = sys.modules["handlers.aws.handler"] - importlib.reload(handlers_aws_handler) - - -@pytest.mark.unit -class TestLambdaHandlerNoop(TestCase): - @mock.patch("share.config._available_output_types", new=["elasticsearch", "logstash", "output_type"]) - @mock.patch( - "share.config._available_input_types", new=["cloudwatch-logs", "s3-sqs", "sqs", "kinesis-data-stream", "dummy"] - ) - @mock.patch("storage.S3Storage._s3_client", _s3_client_mock) - @mock.patch("handlers.aws.utils.apm_capture_serverless", _apm_capture_serverless) - @mock.patch( - "handlers.aws.utils._available_triggers", - new={"aws:s3": "s3-sqs", "aws:sqs": "sqs", "aws:kinesis": "kinesis-data-stream", "dummy": "s3-sqs"}, - ) - def test_lambda_handler_noop(self) -> None: - print("Test lambda handler noop") - - logger = logging.getLogger() - logger.info( - "Test lambda handler noop" - ) - - reload_handlers_aws_handler() - - with self.subTest("no originalEventSourceARN in messageAttributes"): - logger.info( - ">> TESTE no originalEventSourceARN in messageAttributes" - ) - ctx = ContextMock() - os.environ["S3_CONFIG_FILE"] = "s3://s3_config_file_bucket/s3_config_file_object_key" - lambda_event = deepcopy(_dummy_lambda_event) - del lambda_event["Records"][0]["messageAttributes"]["originalEventSourceARN"] - assert handler(lambda_event, ctx) == "completed" # type:ignore - - with self.subTest("no input defined for cloudwatch_logs"): - logger.info( - ">> TESTE no input defined for cloudwatch_logs" - ) - ctx = ContextMock() - os.environ["S3_CONFIG_FILE"] = "s3://s3_config_file_bucket/s3_config_file_object_key" - lambda_event = { - "awslogs": { - "data": json_dumper( - {"logGroup": "logGroup", "logStream": "logStream", "owner": "123456789", "logEvents": []} - ) - } - } - assert handler(lambda_event, ctx) == "completed" # type:ignore - - with self.subTest("output not elasticsearch from payload config"): - logger.info( - ">> TESTE output not elasticsearch from payload config" - ) - with mock.patch( - "handlers.aws.handler.get_shipper_for_replay_event", - lambda config, output_type, output_args, event_input_id, replay_handler: None, - ): - ctx = ContextMock() - event = { - "Records": [ - { - "eventSourceARN": "arn:aws:sqs:eu-central-1:123456789:replay-queue", - "receiptHandle": "receiptHandle", - "body": '{"output_type": "output_type", "output_args": {},' - '"event_input_id": "arn:aws:sqs:eu-central-1:123456789:s3-sqs-queue", ' - '"event_payload": {"_id": "_id"}}', - } - ] - } - assert handler(event, ctx) == "replayed" # type:ignore - - with self.subTest("no input defined for cloudwatch_logs in continuing queue"): - ctx = ContextMock() - os.environ["S3_CONFIG_FILE"] = "s3://s3_config_file_bucket/s3_config_file_object_key" - lambda_event = deepcopy(_dummy_lambda_event) - lambda_event["Records"][0]["messageAttributes"]["originalEventSourceARN"] = { - "stringValue": "arn:aws:logs:eu-central-1:123456789:log-group:test-not-existing-esf-loggroup:*" - } - assert handler(lambda_event, ctx) == "completed" # type:ignore - - with self.subTest("no output type elasticsearch in continuing queue"): - ctx = ContextMock() - os.environ["S3_CONFIG_FILE"] = "s3://s3_config_file_bucket/s3_config_file_object_key" - lambda_event = deepcopy(_dummy_lambda_event) - lambda_event["Records"][0][ - "eventSourceARN" - ] = "arn:aws:sqs:eu-central-1:123456789:s3-sqs-queue-with-dummy-output" - del lambda_event["Records"][0]["messageAttributes"]["originalEventSourceARN"] - assert handler(lambda_event, ctx) == "completed" # type:ignore - - with self.subTest("no input type for output type elasticsearch in continuing queue"): - ctx = ContextMock() - os.environ["S3_CONFIG_FILE"] = "s3://s3_config_file_bucket/s3_config_file_object_key" - lambda_event = deepcopy(_dummy_lambda_event) - lambda_event["Records"][0]["eventSource"] = "dummy" - lambda_event["Records"][0]["eventSourceARN"] = "arn:aws:dummy:eu-central-1:123456789:input" - del lambda_event["Records"][0]["messageAttributes"]["originalEventSourceARN"] - assert handler(lambda_event, ctx) == "completed" # type:ignore - - with self.subTest("no input defined for kinesis-data-stream"): - ctx = ContextMock() - os.environ["S3_CONFIG_FILE"] = "s3://s3_config_file_bucket/s3_config_file_object_key" - lambda_event = { - "Records": [ - { - "eventSource": "aws:kinesis", - "kinesis": {"data": ""}, - "eventSourceARN": "arn:aws:kinesis:eu-central-1:123456789:stream/test-esf-kinesis-stream", - } - ] - } - assert handler(lambda_event, ctx) == "completed" # type:ignore - - with self.subTest("body is neither replay queue nor s3-sqs"): - ctx = ContextMock() - os.environ["S3_CONFIG_FILE"] = "s3://s3_config_file_bucket/s3_config_file_object_key" - os.environ["SQS_REPLAY_URL"] = "https://sqs.us-east-2.amazonaws.com/123456789012/replay_queue" - os.environ["SQS_CONTINUE_URL"] = "https://sqs.us-east-2.amazonaws.com/123456789012/continue_queue" - lambda_event = deepcopy(_dummy_lambda_event) - lambda_event["Records"][0]["body"] = json_dumper({"Records": [{"key": "value"}]}) - lambda_event["Records"][0]["eventSourceARN"] = "arn:aws:sqs:eu-central-1:123456789:sqs-queue" - del lambda_event["Records"][0]["messageAttributes"]["originalEventSourceARN"] - assert handler(lambda_event, ctx) == "completed" # type:ignore - - with self.subTest("raising cannot find cloudwatch_logs ARN"): - ctx = ContextMock() - os.environ["S3_CONFIG_FILE"] = "s3://s3_config_file_bucket/s3_config_file_object_key" - lambda_event = { - "awslogs": { - "data": json_dumper( - {"logGroup": "logGroup", "logStream": "logStreamNotMatching", "owner": "owner", "logEvents": []} - ) - } - } - - assert handler(lambda_event, ctx) == "completed" # type:ignore - - with self.subTest("raising unexpected exception"): - logger.info( - ">> TESTE raising unexpected exception" - ) - ctx = ContextMock() - lambda_event = deepcopy(_dummy_lambda_event) - lambda_event_body = json_parser(lambda_event["Records"][0]["body"]) - lambda_event_body["Records"][0]["s3"]["object"]["key"] = "please raise" - - lambda_event["Records"][0]["body"] = json_dumper(lambda_event_body) - - assert handler(lambda_event, ctx) == "exception raised: Exception('raised')" # type:ignore - - with self.subTest("raising unexpected exception apm client not None"): - with mock.patch("handlers.aws.utils.get_apm_client", lambda: mock.MagicMock()): - ctx = ContextMock() - lambda_event = deepcopy(_dummy_lambda_event) - lambda_event_body = json_parser(lambda_event["Records"][0]["body"]) - lambda_event_body["Records"][0]["s3"]["object"]["key"] = "please raise" - - lambda_event["Records"][0]["body"] = json_dumper(lambda_event_body) - - assert handler(lambda_event, ctx) == "exception raised: Exception('raised')" # type:ignore - - -#@pytest.mark.unit -#class TestLambdaHandlerFailure(TestCase): -# def setUp(self) -> None: -# revert_handlers_aws_handler() -# -# @mock.patch("share.config._available_output_types", new=["elasticsearch", "logstash", "output_type"]) -# @mock.patch( -# "share.config._available_input_types", new=["cloudwatch-logs", "s3-sqs", "sqs", "kinesis-data-stream", "dummy"] -# ) -# @mock.patch("share.secretsmanager._get_aws_sm_client", new=MockContent._get_aws_sm_client) -# @mock.patch("handlers.aws.utils.get_ec2_client", lambda: _ec2_client_mock) -# @mock.patch("handlers.aws.handler.get_sqs_client", lambda: _sqs_client_mock) -# @mock.patch("storage.S3Storage._s3_client", _s3_client_mock) -# def test_lambda_handler_failure(self) -> None: -# dummy_event: dict[str, Any] = { -# "Records": [ -# { -# "eventSource": "aws:sqs", -# "eventSourceARN": "arn:aws:sqs", -# }, -# ] -# } -# -# with self.subTest("output not in config from replay payload body"): -# os.environ["S3_CONFIG_FILE"] = "s3://s3_config_file_bucket/s3_config_file_object_key" -# event = { -# "Records": [ -# { -# "eventSourceARN": "arn:aws:sqs:eu-central-1:123456789:replay-queue", -# "receiptHandle": "receiptHandle", -# "body": '{"output_type": "output_type", "output_args": {},' -# '"event_input_id": "arn:aws:dummy:eu-central-1:123456789:input", ' -# '"event_payload": {"_id": "_id"}}', -# } -# ] -# } -# with self.assertRaisesRegex(OutputConfigException, "Cannot load output of type output_type"): -# ctx = ContextMock() -# -# handler(event, ctx) # type:ignore -# -# with self.subTest("input not in config from replay payload body"): -# os.environ["S3_CONFIG_FILE"] = "s3://s3_config_file_bucket/s3_config_file_object_key" -# event = { -# "Records": [ -# { -# "eventSourceARN": "arn:aws:sqs:eu-central-1:123456789:replay-queue", -# "receiptHandle": "receiptHandle", -# "body": '{"output_type": "output_type", "output_args": {},' -# '"event_input_id": "arn:aws:dummy:eu-central-1:123456789:not-existing-input", ' -# '"event_payload": {"_id": "_id"}}', -# } -# ] -# } -# with self.assertRaisesRegex( -# InputConfigException, -# "Cannot load input for input id arn:aws:dummy:eu-central-1:123456789:not-existing-input", -# ): -# ctx = ContextMock() -# -# handler(event, ctx) # type:ignore -# -# with self.subTest("empty config"): -# os.environ["S3_CONFIG_FILE"] = "s3://s3_config_file_bucket/s3_config_file_object_key" -# with self.assertRaisesRegex(ConfigFileException, "Empty config"): -# ctx = ContextMock() -# _s3_client_mock.config_content = b"" -# -# handler(dummy_event, ctx) # type:ignore -# -# with self.subTest("Invalid s3 uri apm client not None"): -# with mock.patch("handlers.aws.utils.get_apm_client", lambda: mock.MagicMock()): -# with self.assertRaisesRegex(ConfigFileException, "Invalid s3 uri provided: ``"): -# os.environ["S3_CONFIG_FILE"] = "" -# ctx = ContextMock() -# -# handler(dummy_event, ctx) # type:ignore -# -# with self.subTest("Invalid s3 uri"): -# with self.assertRaisesRegex(ConfigFileException, "Invalid s3 uri provided: ``"): -# os.environ["S3_CONFIG_FILE"] = "" -# ctx = ContextMock() -# -# handler(dummy_event, ctx) # type:ignore -# -# with self.subTest("Invalid s3 uri no bucket and key"): -# with self.assertRaisesRegex(ConfigFileException, "Invalid s3 uri provided: `s3://`"): -# os.environ["S3_CONFIG_FILE"] = "s3://" -# ctx = ContextMock() -# -# handler(dummy_event, ctx) # type:ignore -# -# with self.subTest("Invalid s3 uri no key"): -# with self.assertRaisesRegex(ConfigFileException, "Invalid s3 uri provided: `s3://bucket`"): -# os.environ["S3_CONFIG_FILE"] = "s3://bucket" -# ctx = ContextMock() -# -# handler(dummy_event, ctx) # type:ignore -# -# with self.subTest("no Records in event"): -# with self.assertRaisesRegex(TriggerTypeException, "Not supported trigger"): -# ctx = ContextMock() -# event = {} -# -# handler(event, ctx) # type:ignore -# -# with self.subTest("empty Records in event"): -# with self.assertRaisesRegex(TriggerTypeException, "Not supported trigger"): -# ctx = ContextMock() -# event = {"Records": []} -# -# handler(event, ctx) # type:ignore -# -# with self.subTest("no eventSource in Records in event"): -# with self.assertRaisesRegex(TriggerTypeException, "Not supported trigger"): -# ctx = ContextMock() -# event = {"Records": [{}]} -# -# handler(event, ctx) # type:ignore -# -# with self.subTest("no valid eventSource in Records in event"): -# with self.assertRaisesRegex(TriggerTypeException, "Not supported trigger"): -# ctx = ContextMock() -# event = {"Records": [{"eventSource": "invalid"}]} -# -# handler(event, ctx) # type:ignore -# -# with self.subTest("no eventSource in body Records in event"): -# with self.assertRaisesRegex(TriggerTypeException, "Not supported trigger"): -# ctx = ContextMock() -# event = {"Records": [{"body": ""}]} -# -# handler(event, ctx) # type:ignore -# -# with self.subTest("no valid eventSource in body Records in event"): -# with self.assertRaisesRegex(TriggerTypeException, "Not supported trigger"): -# ctx = ContextMock() -# event = {"Records": [{"body": "", "eventSource": "invalid"}]} -# -# handler(event, ctx) # type:ignore -# -# with self.subTest("replay event loads config from s3"): -# with self.assertRaisesRegex(ConfigFileException, "Invalid s3 uri provided: `s3://bucket`"): -# ctx = ContextMock() -# event = { -# "Records": [ -# { -# "body": '{"output_type": "", "output_args": "", "event_payload": ""}', -# } -# ] -# } -# handler(event, ctx) # type:ignore -# -# with self.subTest("invalid secretsmanager: arn format too long"): -# os.environ["S3_CONFIG_FILE"] = "s3://s3_config_file_bucket/s3_config_file_object_key" -# with self.assertRaisesRegex( -# ConfigFileException, -# "Invalid arn format: " -# "arn:aws:secretsmanager:eu-central-1:123456789:secret:plain_secret:THIS:IS:INVALID", -# ): -# ctx = ContextMock() -# _s3_client_mock.config_content = b""" -# inputs: -# - type: "s3-sqs" -# id: "arn:aws:secretsmanager:eu-central-1:123456789:secret:plain_secret:THIS:IS:INVALID" -# outputs: -# - type: "elasticsearch" -# args: -# elasticsearch_url: "arn:aws:secretsmanager:eu-central-1:123456789:secret:es_secrets:url" -# username: "arn:aws:secretsmanager:eu-central-1:123456789:secret:es_secrets:username" -# password: "arn:aws:secretsmanager:eu-central-1:123456789:secret:es_secrets:password" -# es_datastream_name: "logs-redis.log-default" -# """ -# -# event = deepcopy(dummy_event) -# -# handler(event, ctx) # type:ignore -# -# with self.subTest("invalid secretsmanager: empty region"): -# os.environ["S3_CONFIG_FILE"] = "s3://s3_config_file_bucket/s3_config_file_object_key" -# with self.assertRaisesRegex( -# ConfigFileException, -# "Must be provided region in arn: " "arn:aws:secretsmanager::123456789:secret:plain_secret", -# ): -# ctx = ContextMock() -# # BEWARE region is empty at id -# _s3_client_mock.config_content = b""" -# inputs: -# - type: "s3-sqs" -# id: "arn:aws:secretsmanager::123456789:secret:plain_secret" -# outputs: -# - type: "elasticsearch" -# args: -# elasticsearch_url: "arn:aws:secretsmanager:eu-central-1:123456789:secret:es_secrets" -# username: "arn:aws:secretsmanager:eu-central-1:123456789:secret:es_secrets:username" -# password: "arn:aws:secretsmanager:eu-central-1:123456789:secret:es_secrets:password" -# es_datastream_name: "logs-redis.log-default" -# """ -# -# event = deepcopy(dummy_event) -# -# handler(event, ctx) # type:ignore -# -# with self.subTest("invalid secretsmanager: empty secrets manager name"): -# os.environ["S3_CONFIG_FILE"] = "s3://s3_config_file_bucket/s3_config_file_object_key" -# with self.assertRaisesRegex( -# ConfigFileException, -# "Must be provided secrets manager name in arn: " -# "arn:aws:secretsmanager:eu-central-1:123456789:secret:", -# ): -# ctx = ContextMock() -# # BEWARE empty secrets manager name at id -# _s3_client_mock.config_content = b""" -# inputs: -# - type: "s3-sqs" -# id: "arn:aws:secretsmanager:eu-central-1:123456789:secret:" -# outputs: -# - type: "elasticsearch" -# args: -# elasticsearch_url: "arn:aws:secretsmanager:eu-central-1:123456789:secret:es_secrets" -# username: "arn:aws:secretsmanager:eu-central-1:123456789:secret:es_secrets:username" -# password: "arn:aws:secretsmanager:eu-central-1:123456789:secret:es_secrets:password" -# es_datastream_name: "logs-redis.log-default" -# """ -# -# event = deepcopy(dummy_event) -# -# handler(event, ctx) # type:ignore -# -# with self.subTest("invalid secretsmanager: cannot use both plain text and key/value pairs"): -# os.environ["S3_CONFIG_FILE"] = "s3://s3_config_file_bucket/s3_config_file_object_key" -# with self.assertRaisesRegex( -# ConfigFileException, -# "You cannot have both plain text and json key for the same " -# "secret: arn:aws:secretsmanager:eu-central-1:123456789:secret:es_secrets:username", -# ): -# ctx = ContextMock() -# # BEWARE using es_secrets plain text for elasticsearch_url and es_secrets:username for username -# _s3_client_mock.config_content = b""" -# inputs: -# - type: "s3-sqs" -# id: "arn:aws:secretsmanager:eu-central-1:123456789:secret:plain_secrets" -# outputs: -# - type: "elasticsearch" -# args: -# elasticsearch_url: "arn:aws:secretsmanager:eu-central-1:123456789:secret:es_secrets" -# username: "arn:aws:secretsmanager:eu-central-1:123456789:secret:es_secrets:username" -# password: "arn:aws:secretsmanager:eu-central-1:123456789:secret:es_secrets:password" -# es_datastream_name: "logs-redis.log-default" -# """ -# -# event = deepcopy(dummy_event) -# -# handler(event, ctx) # type:ignore -# -# with self.subTest("invalid secretsmanager: empty secret key"): -# os.environ["S3_CONFIG_FILE"] = "s3://s3_config_file_bucket/s3_config_file_object_key" -# with self.assertRaisesRegex( -# ConfigFileException, -# "Error for secret " -# "arn:aws:secretsmanager:eu-central-1:123456789:secret:es_secrets:: key must " -# "not be empty", -# ): -# ctx = ContextMock() -# # BEWARE empty key at elasticsearch_url -# _s3_client_mock.config_content = b""" -# inputs: -# - type: "s3-sqs" -# id: "arn:aws:secretsmanager:eu-central-1:123456789:secret:plain_secret" -# outputs: -# - type: "elasticsearch" -# args: -# elasticsearch_url: "arn:aws:secretsmanager:eu-central-1:123456789:secret:es_secrets:" -# username: "arn:aws:secretsmanager:eu-central-1:123456789:secret:es_secrets:username" -# password: "arn:aws:secretsmanager:eu-central-1:123456789:secret:es_secrets:password" -# es_datastream_name: "logs-redis.log-default" -# """ -# -# event = deepcopy(dummy_event) -# -# handler(event, ctx) # type:ignore -# -# with self.subTest("invalid secretsmanager: secret does not exist"): -# os.environ["S3_CONFIG_FILE"] = "s3://s3_config_file_bucket/s3_config_file_object_key" -# with self.assertRaisesRegex( -# ConfigFileException, -# r"An error occurred \(ResourceNotFoundException\) when calling " -# "the GetSecretValue operation: Secrets Manager can't find the specified secret.", -# ): -# ctx = ContextMock() -# _s3_client_mock.config_content = b""" -# inputs: -# - type: "s3-sqs" -# id: "arn:aws:secretsmanager:eu-central-1:123456789:secret:DOES_NOT_EXIST" -# outputs: -# - type: "elasticsearch" -# args: -# elasticsearch_url: "arn:aws:secretsmanager:eu-central-1:123456789:secret:es_secrets:url" -# username: "arn:aws:secretsmanager:eu-central-1:123456789:secret:es_secrets:username" -# password: "arn:aws:secretsmanager:eu-central-1:123456789:secret:es_secrets:password" -# es_datastream_name: "logs-redis.log-default" -# """ -# -# event = deepcopy(dummy_event) -# -# handler(event, ctx) # type:ignore -# -# with self.subTest("invalid secretsmanager: empty plain secret value"): -# os.environ["S3_CONFIG_FILE"] = "s3://s3_config_file_bucket/s3_config_file_object_key" -# with self.assertRaisesRegex( -# ConfigFileException, -# "Error for secret " -# "arn:aws:secretsmanager:eu-central-1:123456789:secret:empty_secret: must " -# "not be empty", -# ): -# ctx = ContextMock() -# _s3_client_mock.config_content = b""" -# inputs: -# - type: "s3-sqs" -# id: "arn:aws:secretsmanager:eu-central-1:123456789:secret:empty_secret" -# outputs: -# - type: "elasticsearch" -# args: -# elasticsearch_url: "arn:aws:secretsmanager:eu-central-1:123456789:secret:es_secrets:url" -# username: "arn:aws:secretsmanager:eu-central-1:123456789:secret:es_secrets:username" -# password: "arn:aws:secretsmanager:eu-central-1:123456789:secret:es_secrets:password" -# es_datastream_name: "logs-redis.log-default" -# """ -# -# event = deepcopy(dummy_event) -# -# handler(event, ctx) # type:ignore -# -# with self.subTest("invalid secretsmanager: empty key/value secret value"): -# os.environ["S3_CONFIG_FILE"] = "s3://s3_config_file_bucket/s3_config_file_object_key" -# with self.assertRaisesRegex( -# ConfigFileException, -# "Error for secret " -# "arn:aws:secretsmanager:eu-central-1:123456789:secret:es_secrets:empty: must " -# "not be empty", -# ): -# ctx = ContextMock() -# _s3_client_mock.config_content = b""" -# inputs: -# - type: "s3-sqs" -# id: "arn:aws:secretsmanager:eu-central-1:123456789:secret:es_secrets:empty" -# outputs: -# - type: "elasticsearch" -# args: -# elasticsearch_url: "arn:aws:secretsmanager:eu-central-1:123456789:secret:es_secrets:url" -# username: "arn:aws:secretsmanager:eu-central-1:123456789:secret:es_secrets:username" -# password: "arn:aws:secretsmanager:eu-central-1:123456789:secret:es_secrets:password" -# es_datastream_name: "logs-redis.log-default" -# """ -# -# event = deepcopy(dummy_event) -# -# handler(event, ctx) # type:ignore -# -# with self.subTest("invalid secretsmanager: plain text used as key/value"): -# os.environ["S3_CONFIG_FILE"] = "s3://s3_config_file_bucket/s3_config_file_object_key" -# with self.assertRaisesRegex( -# ConfigFileException, -# "Error for secret " -# "arn:aws:secretsmanager:eu-central-1:123456789:secret:plain_secret:SHOULD_NOT_HAVE_A_KEY: " -# "expected to be keys/values pair", -# ): -# ctx = ContextMock() -# _s3_client_mock.config_content = b""" -# inputs: -# - type: "s3-sqs" -# id: "arn:aws:secretsmanager:eu-central-1:123456789:secret:plain_secret:SHOULD_NOT_HAVE_A_KEY" -# outputs: -# - type: "elasticsearch" -# args: -# elasticsearch_url: "arn:aws:secretsmanager:eu-central-1:123456789:secret:es_secrets:url" -# username: "arn:aws:secretsmanager:eu-central-1:123456789:secret:es_secrets:username" -# password: "arn:aws:secretsmanager:eu-central-1:123456789:secret:es_secrets:password" -# es_datastream_name: "logs-redis.log-default" -# """ -# -# event = deepcopy(dummy_event) -# -# handler(event, ctx) # type:ignore -# -# with self.subTest("invalid secretsmanager: key does not exist in secret manager"): -# os.environ["S3_CONFIG_FILE"] = "s3://s3_config_file_bucket/s3_config_file_object_key" -# with self.assertRaisesRegex( -# ConfigFileException, -# "Error for secret " -# "arn:aws:secretsmanager:eu-central-1:123456789:secret:es_secrets:I_DO_NOT_EXIST: " -# "key not found", -# ): -# ctx = ContextMock() -# _s3_client_mock.config_content = b""" -# inputs: -# - type: "s3-sqs" -# id: "arn:aws:secretsmanager:eu-central-1:123456789:secret:es_secrets:I_DO_NOT_EXIST" -# outputs: -# - type: "elasticsearch" -# args: -# elasticsearch_url: "arn:aws:secretsmanager:eu-central-1:123456789:secret:es_secrets:url" -# username: "arn:aws:secretsmanager:eu-central-1:123456789:secret:es_secrets:username" -# password: "arn:aws:secretsmanager:eu-central-1:123456789:secret:es_secrets:password" -# es_datastream_name: "logs-redis.log-default" -# """ -# -# event = deepcopy(dummy_event) -# -# handler(event, ctx) # type:ignore -# -# with self.subTest("invalid secretsmanager: plain text secret not str"): -# os.environ["S3_CONFIG_FILE"] = "s3://s3_config_file_bucket/s3_config_file_object_key" -# with self.assertRaisesRegex( -# ConfigFileException, -# "Error for secret " -# "arn:aws:secretsmanager:eu-central-1:123456789:secret:plain_secret_not_str_byte: " -# "expected to be a string", -# ): -# ctx = ContextMock() -# _s3_client_mock.config_content = b""" -# inputs: -# - type: "s3-sqs" -# id: "arn:aws:secretsmanager:eu-central-1:123456789:secret:plain_secret_not_str_byte" -# outputs: -# - type: "elasticsearch" -# args: -# elasticsearch_url: "arn:aws:secretsmanager:eu-central-1:123456789:secret:es_secrets:url" -# username: "arn:aws:secretsmanager:eu-central-1:123456789:secret:es_secrets:username" -# password: "arn:aws:secretsmanager:eu-central-1:123456789:secret:es_secrets:password" -# es_datastream_name: "logs-redis.log-default" -# """ -# -# event = deepcopy(dummy_event) -# -# handler(event, ctx) # type:ignore -# -# with self.subTest("invalid secretsmanager: json TypeError raised"): -# os.environ["S3_CONFIG_FILE"] = "s3://s3_config_file_bucket/s3_config_file_object_key" -# with self.assertRaisesRegex( -# ConfigFileException, -# "Error for secret " -# "arn:aws:secretsmanager:eu-central-1:123456789:secret:plain_secret_not_str_int: " -# "expected to be a string", -# ): -# ctx = ContextMock() -# _s3_client_mock.config_content = b""" -# inputs: -# - type: "s3-sqs" -# id: "arn:aws:secretsmanager:eu-central-1:123456789:secret:plain_secret_not_str_int" -# outputs: -# - type: "elasticsearch" -# args: -# elasticsearch_url: "arn:aws:secretsmanager:eu-central-1:123456789:secret:es_secrets:url" -# username: "arn:aws:secretsmanager:eu-central-1:123456789:secret:es_secrets:username" -# password: "arn:aws:secretsmanager:eu-central-1:123456789:secret:es_secrets:password" -# es_datastream_name: "logs-redis.log-default" -# """ -# -# event = deepcopy(dummy_event) -# -# handler(event, ctx) # type:ignore -# -# with self.subTest("tags not list"): -# os.environ["S3_CONFIG_FILE"] = "s3://s3_config_file_bucket/s3_config_file_object_key" -# with self.assertRaisesRegex( -# ConfigFileException, "`tags` must be provided as list for input mock_plain_text_sqs_arn" -# ): -# ctx = ContextMock() -# _s3_client_mock.config_content = b""" -# inputs: -# - type: "s3-sqs" -# id: "arn:aws:secretsmanager:eu-central-1:123456789:secret:plain_secret" -# tags: "tag1" -# outputs: -# - type: "elasticsearch" -# args: -# elasticsearch_url: "arn:aws:secretsmanager:eu-central-1:123456789:secret:es_secrets:url" -# username: "arn:aws:secretsmanager:eu-central-1:123456789:secret:es_secrets:username" -# password: "arn:aws:secretsmanager:eu-central-1:123456789:secret:es_secrets:password" -# es_datastream_name: "logs-redis.log-default" -# """ -# -# event = deepcopy(dummy_event) -# -# handler(event, ctx) # type:ignore -# -# with self.subTest("each tag must be of type str"): -# os.environ["S3_CONFIG_FILE"] = "s3://s3_config_file_bucket/s3_config_file_object_key" -# with self.assertRaisesRegex( -# ConfigFileException, -# r"Each tag in `tags` must be provided as string for input " -# r"mock_plain_text_sqs_arn, given: \['tag1', 2, 'tag3'\]", -# ): -# ctx = ContextMock() -# _s3_client_mock.config_content = b""" -# inputs: -# - type: "s3-sqs" -# id: "arn:aws:secretsmanager:eu-central-1:123456789:secret:plain_secret" -# tags: -# - "tag1" -# - 2 -# - "tag3" -# outputs: -# - type: "elasticsearch" -# args: -# elasticsearch_url: "arn:aws:secretsmanager:eu-central-1:123456789:secret:es_secrets:url" -# username: "arn:aws:secretsmanager:eu-central-1:123456789:secret:es_secrets:username" -# password: "arn:aws:secretsmanager:eu-central-1:123456789:secret:es_secrets:password" -# es_datastream_name: "logs-redis.log-default" -# """ -# -# event = deepcopy(dummy_event) -# -# handler(event, ctx) # type:ignore -# -# with self.subTest("expand_event_list_from_field not str"): -# os.environ["S3_CONFIG_FILE"] = "s3://s3_config_file_bucket/s3_config_file_object_key" -# with self.assertRaisesRegex( -# ConfigFileException, -# "`expand_event_list_from_field` must be provided as string for input mock_plain_text_sqs_arn", -# ): -# ctx = ContextMock() -# _s3_client_mock.config_content = b""" -# inputs: -# - type: "s3-sqs" -# id: "arn:aws:secretsmanager:eu-central-1:123456789:secret:plain_secret" -# expand_event_list_from_field: 0 -# outputs: -# - type: "elasticsearch" -# args: -# elasticsearch_url: "arn:aws:secretsmanager:eu-central-1:123456789:secret:es_secrets:url" -# username: "arn:aws:secretsmanager:eu-central-1:123456789:secret:es_secrets:username" -# password: "arn:aws:secretsmanager:eu-central-1:123456789:secret:es_secrets:password" -# es_datastream_name: "logs-redis.log-default" -# """ -# -# event = deepcopy(dummy_event) -# -# handler(event, ctx) # type:ignore -# -# with self.subTest("root_fields_to_add_to_expanded_event not `all` when string"): -# os.environ["S3_CONFIG_FILE"] = "s3://s3_config_file_bucket/s3_config_file_object_key" -# with self.assertRaisesRegex( -# ConfigFileException, -# "`root_fields_to_add_to_expanded_event` must be provided as `all` or a list of strings", -# ): -# ctx = ContextMock() -# _s3_client_mock.config_content = b""" -# inputs: -# - type: "s3-sqs" -# id: "arn:aws:secretsmanager:eu-central-1:123456789:secret:plain_secret" -# root_fields_to_add_to_expanded_event: not_all -# outputs: -# - type: "elasticsearch" -# args: -# elasticsearch_url: "arn:aws:secretsmanager:eu-central-1:123456789:secret:es_secrets:url" -# username: "arn:aws:secretsmanager:eu-central-1:123456789:secret:es_secrets:username" -# password: "arn:aws:secretsmanager:eu-central-1:123456789:secret:es_secrets:password" -# es_datastream_name: "logs-redis.log-default" -# """ -# -# event = deepcopy(dummy_event) -# -# handler(event, ctx) # type:ignore -# -# with self.subTest("root_fields_to_add_to_expanded_event not `all` neither list of strings"): -# os.environ["S3_CONFIG_FILE"] = "s3://s3_config_file_bucket/s3_config_file_object_key" -# with self.assertRaisesRegex( -# ConfigFileException, -# "`root_fields_to_add_to_expanded_event` must be provided as `all` or a list of strings", -# ): -# ctx = ContextMock() -# _s3_client_mock.config_content = b""" -# inputs: -# - type: "s3-sqs" -# id: "arn:aws:secretsmanager:eu-central-1:123456789:secret:plain_secret" -# root_fields_to_add_to_expanded_event: 0 -# outputs: -# - type: "elasticsearch" -# args: -# elasticsearch_url: "arn:aws:secretsmanager:eu-central-1:123456789:secret:es_secrets:url" -# username: "arn:aws:secretsmanager:eu-central-1:123456789:secret:es_secrets:username" -# password: "arn:aws:secretsmanager:eu-central-1:123456789:secret:es_secrets:password" -# es_datastream_name: "logs-redis.log-default" -# """ -# -# event = deepcopy(dummy_event) -# -# handler(event, ctx) # type:ignore -# -# with self.subTest("json_content_type not valid"): -# os.environ["S3_CONFIG_FILE"] = "s3://s3_config_file_bucket/s3_config_file_object_key" -# with self.assertRaisesRegex( -# ConfigFileException, -# "`json_content_type` must be one of ndjson,single,disabled " -# "for input mock_plain_text_sqs_arn: whatever given", -# ): -# ctx = ContextMock() -# _s3_client_mock.config_content = b""" -# inputs: -# - type: "s3-sqs" -# id: "arn:aws:secretsmanager:eu-central-1:123456789:secret:plain_secret" -# json_content_type: whatever -# outputs: -# - type: "elasticsearch" -# args: -# elasticsearch_url: "arn:aws:secretsmanager:eu-central-1:123456789:secret:es_secrets:url" -# username: "arn:aws:secretsmanager:eu-central-1:123456789:secret:es_secrets:username" -# password: "arn:aws:secretsmanager:eu-central-1:123456789:secret:es_secrets:password" -# es_datastream_name: "logs-redis.log-default" -# """ -# -# event = deepcopy(dummy_event) -# -# handler(event, ctx) # type:ignore -# diff --git a/tests/handlers/aws/test_integrations.py b/tests/handlers/aws/test_integrations.py new file mode 100644 index 00000000..094b61fe --- /dev/null +++ b/tests/handlers/aws/test_integrations.py @@ -0,0 +1,675 @@ +# Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one +# or more contributor license agreements. Licensed under the Elastic License 2.0; +# you may not use this file except in compliance with the Elastic License 2.0. + +import datetime +import gzip +import os +import time +from typing import Any, Optional +from unittest import TestCase + +import boto3 +import mock +import pytest +from botocore.client import BaseClient as BotoBaseClient +from testcontainers.localstack import LocalStackContainer + +from handlers.aws.exceptions import ReplayHandlerException +from main_aws import handler +from share import get_hex_prefix, json_dumper, json_parser +from tests.testcontainers.es import ElasticsearchContainer +from tests.testcontainers.logstash import LogstashContainer + +from .utils import ( + _AWS_REGION, + _S3_NOTIFICATION_EVENT_TIME, + ContextMock, + _create_secrets, + _kinesis_create_stream, + _kinesis_put_records, + _kinesis_retrieve_event_from_kinesis_stream, + _load_file_fixture, + _logs_create_cloudwatch_logs_group, + _logs_create_cloudwatch_logs_stream, + _logs_retrieve_event_from_cloudwatch_logs, + _logs_upload_event_to_cloudwatch_logs, + _REMAINING_TIME_FORCE_CONTINUE_0ms, + _s3_upload_content_to_bucket, + _sqs_create_queue, + _sqs_get_messages, + _sqs_send_messages, + _sqs_send_s3_notifications, + _time_based_id, +) + +_OVER_COMPLETION_GRACE_PERIOD_2m = 1 + (1000 * 60 * 2) + + +@pytest.mark.integration +class TestLambdaHandlerIntegration(TestCase): + elasticsearch: Optional[ElasticsearchContainer] = None + logstash: Optional[LogstashContainer] = None + localstack: Optional[LocalStackContainer] = None + + aws_session: Optional[boto3.Session] = None + s3_client: Optional[BotoBaseClient] = None + logs_client: Optional[BotoBaseClient] = None + sqs_client: Optional[BotoBaseClient] = None + kinesis_client: Optional[BotoBaseClient] = None + sm_client: Optional[BotoBaseClient] = None + ec2_client: Optional[BotoBaseClient] = None + + secret_arn: Optional[Any] = None + + mocks: dict[str, Any] = {} + + @classmethod + def setUpClass(cls) -> None: + esc = ElasticsearchContainer() + cls.elasticsearch = esc.start() + + lgc = LogstashContainer(es_container=esc) + cls.logstash = lgc.start() + + lsc = LocalStackContainer(image="localstack/localstack:3.0.1") + lsc.with_env("EAGER_SERVICE_LOADING", "1") + lsc.with_env("SQS_DISABLE_CLOUDWATCH_METRICS", "1") + lsc.with_services("ec2", "kinesis", "logs", "s3", "sqs", "secretsmanager") + + cls.localstack = lsc.start() + + session = boto3.Session(region_name=_AWS_REGION) + cls.aws_session = session + cls.s3_client = session.client("s3", endpoint_url=cls.localstack.get_url()) + cls.logs_client = session.client("logs", endpoint_url=cls.localstack.get_url()) + cls.sqs_client = session.client("sqs", endpoint_url=cls.localstack.get_url()) + cls.kinesis_client = session.client("kinesis", endpoint_url=cls.localstack.get_url()) + cls.sm_client = session.client("secretsmanager", endpoint_url=cls.localstack.get_url()) + cls.ec2_client = session.client("ec2", endpoint_url=cls.localstack.get_url()) + + cls.secret_arn = _create_secrets( + cls.sm_client, + "es_secrets", + {"username": cls.elasticsearch.elastic_user, "password": cls.elasticsearch.elastic_password}, + ) + + cls.mocks = { + "storage.S3Storage._s3_client": mock.patch("storage.S3Storage._s3_client", new=cls.s3_client), + "share.secretsmanager._get_aws_sm_client": mock.patch( + "share.secretsmanager._get_aws_sm_client", lambda region_name: cls.sm_client + ), + "handlers.aws.utils.get_sqs_client": mock.patch( + "handlers.aws.utils.get_sqs_client", lambda: cls.sqs_client + ), + "handlers.aws.utils.get_ec2_client": mock.patch( + "handlers.aws.utils.get_ec2_client", lambda: cls.ec2_client + ), + "handlers.aws.handler.get_sqs_client": mock.patch( + "handlers.aws.handler.get_sqs_client", lambda: cls.sqs_client + ), + } + + for k, m in cls.mocks.items(): + m.start() + + @classmethod + def tearDownClass(cls) -> None: + assert cls.elasticsearch is not None + assert cls.logstash is not None + assert cls.localstack is not None + + cls.elasticsearch.stop() + cls.logstash.stop() + cls.localstack.stop() + + for k, m in cls.mocks.items(): + m.stop() + + def setUp(self) -> None: + assert isinstance(self.elasticsearch, ElasticsearchContainer) + assert isinstance(self.logstash, LogstashContainer) + assert isinstance(self.localstack, LocalStackContainer) + + os.environ["S3_CONFIG_FILE"] = "" + + sqs_continue_queue = _sqs_create_queue(self.sqs_client, _time_based_id(suffix="continuing")) + sqs_replay_queue = _sqs_create_queue(self.sqs_client, _time_based_id(suffix="replay")) + os.environ["SQS_CONTINUE_URL"] = sqs_continue_queue["QueueUrl"] + os.environ["SQS_REPLAY_URL"] = sqs_replay_queue["QueueUrl"] + + self.sqs_continue_queue_arn = sqs_continue_queue["QueueArn"] + self.sqs_replay_queue_arn = sqs_replay_queue["QueueArn"] + + self.default_tags: str = """ + - "tag1" + - "tag2" + - "tag3" + """ + + self.default_outputs: str = f""" + - type: "elasticsearch" + args: + elasticsearch_url: "{self.elasticsearch.get_url()}" + ssl_assert_fingerprint: {self.elasticsearch.ssl_assert_fingerprint} + username: "{self.secret_arn}:username" + password: "{self.secret_arn}:password" + - type: "logstash" + args: + logstash_url: "{self.logstash.get_url()}" + ssl_assert_fingerprint: {self.logstash.ssl_assert_fingerprint} + username: "{self.logstash.logstash_user}" + password: "{self.logstash.logstash_password}" + """ + + def tearDown(self) -> None: + assert isinstance(self.elasticsearch, ElasticsearchContainer) + assert isinstance(self.logstash, LogstashContainer) + + self.logstash.reset() + self.elasticsearch.reset() + + os.environ["S3_CONFIG_FILE"] = "" + os.environ["SQS_CONTINUE_URL"] = "" + os.environ["SQS_REPLAY_URL"] = "" + + def test_ls_es_output(self) -> None: + assert isinstance(self.elasticsearch, ElasticsearchContainer) + assert isinstance(self.logstash, LogstashContainer) + assert isinstance(self.localstack, LocalStackContainer) + + s3_sqs_queue_name = _time_based_id(suffix="source-s3-sqs") + + s3_sqs_queue = _sqs_create_queue(self.sqs_client, s3_sqs_queue_name, self.localstack.get_url()) + + s3_sqs_queue_arn = s3_sqs_queue["QueueArn"] + s3_sqs_queue_url = s3_sqs_queue["QueueUrl"] + + config_yaml: str = f""" + inputs: + - type: s3-sqs + id: "{s3_sqs_queue_arn}" + tags: {self.default_tags} + outputs: {self.default_outputs} + """ + + config_file_path = "config.yaml" + config_bucket_name = _time_based_id(suffix="config-bucket") + _s3_upload_content_to_bucket( + client=self.s3_client, + content=config_yaml.encode("utf-8"), + content_type="text/plain", + bucket_name=config_bucket_name, + key=config_file_path, + ) + + os.environ["S3_CONFIG_FILE"] = f"s3://{config_bucket_name}/{config_file_path}" + fixtures = [ + _load_file_fixture("cloudwatch-log-1.json"), + _load_file_fixture("cloudwatch-log-2.json"), + ] + + cloudtrail_filename_digest = ( + "AWSLogs/aws-account-id/CloudTrail-Digest/region/yyyy/mm/dd/" + "aws-account-id_CloudTrail-Digest_region_end-time_random-string.log.gz" + ) + cloudtrail_filename_non_digest = ( + "AWSLogs/aws-account-id/CloudTrail/region/yyyy/mm/dd/" + "aws-account-id_CloudTrail_region_end-time_random-string.log.gz" + ) + + s3_bucket_name = _time_based_id(suffix="test-bucket") + + _s3_upload_content_to_bucket( + client=self.s3_client, + content=gzip.compress(fixtures[0].encode("utf-8")), + content_type="application/x-gzip", + bucket_name=s3_bucket_name, + key=cloudtrail_filename_digest, + ) + + _s3_upload_content_to_bucket( + client=self.s3_client, + content=gzip.compress(fixtures[1].encode("utf-8")), + content_type="application/x-gzip", + bucket_name=s3_bucket_name, + key=cloudtrail_filename_non_digest, + ) + + _sqs_send_s3_notifications( + self.sqs_client, + s3_sqs_queue_url, + s3_bucket_name, + [cloudtrail_filename_digest, cloudtrail_filename_non_digest], + ) + + event, _ = _sqs_get_messages(self.sqs_client, s3_sqs_queue_url, s3_sqs_queue_arn) + + ctx = ContextMock(remaining_time_in_millis=_OVER_COMPLETION_GRACE_PERIOD_2m) + first_call = handler(event, ctx) # type:ignore + + assert first_call == "completed" + + self.elasticsearch.refresh(index="logs-aws.cloudtrail-default") + assert self.elasticsearch.count(index="logs-aws.cloudtrail-default")["count"] == 2 + + res = self.elasticsearch.search(index="logs-aws.cloudtrail-default", sort="_seq_no") + assert res["hits"]["total"] == {"value": 2, "relation": "eq"} + + assert res["hits"]["hits"][0]["_source"]["message"] == fixtures[0].rstrip("\n") + assert res["hits"]["hits"][0]["_source"]["log"]["offset"] == 0 + assert ( + res["hits"]["hits"][0]["_source"]["log"]["file"]["path"] + == f"https://{s3_bucket_name}.s3.eu-central-1.amazonaws.com/{cloudtrail_filename_digest}" + ) + assert res["hits"]["hits"][0]["_source"]["aws"]["s3"]["bucket"]["name"] == s3_bucket_name + assert res["hits"]["hits"][0]["_source"]["aws"]["s3"]["bucket"]["arn"] == f"arn:aws:s3:::{s3_bucket_name}" + assert res["hits"]["hits"][0]["_source"]["aws"]["s3"]["object"]["key"] == cloudtrail_filename_digest + assert res["hits"]["hits"][0]["_source"]["cloud"]["provider"] == "aws" + assert res["hits"]["hits"][0]["_source"]["cloud"]["region"] == "eu-central-1" + assert res["hits"]["hits"][0]["_source"]["cloud"]["account"]["id"] == "000000000000" + assert res["hits"]["hits"][0]["_source"]["tags"] == ["forwarded", "aws-cloudtrail", "tag1", "tag2", "tag3"] + + assert res["hits"]["hits"][1]["_source"]["message"] == fixtures[1].rstrip("\n") + assert res["hits"]["hits"][1]["_source"]["log"]["offset"] == 0 + assert ( + res["hits"]["hits"][1]["_source"]["log"]["file"]["path"] + == f"https://{s3_bucket_name}.s3.eu-central-1.amazonaws.com/{cloudtrail_filename_non_digest}" + ) + assert res["hits"]["hits"][1]["_source"]["aws"]["s3"]["bucket"]["name"] == s3_bucket_name + assert res["hits"]["hits"][1]["_source"]["aws"]["s3"]["bucket"]["arn"] == f"arn:aws:s3:::{s3_bucket_name}" + assert res["hits"]["hits"][1]["_source"]["aws"]["s3"]["object"]["key"] == cloudtrail_filename_non_digest + assert res["hits"]["hits"][1]["_source"]["cloud"]["provider"] == "aws" + assert res["hits"]["hits"][1]["_source"]["cloud"]["region"] == "eu-central-1" + assert res["hits"]["hits"][1]["_source"]["cloud"]["account"]["id"] == "000000000000" + assert res["hits"]["hits"][1]["_source"]["tags"] == ["forwarded", "aws-cloudtrail", "tag1", "tag2", "tag3"] + + logstash_message = self.logstash.get_messages(expected=2) + assert len(logstash_message) == 2 + res["hits"]["hits"][0]["_source"]["tags"].remove("aws-cloudtrail") + res["hits"]["hits"][1]["_source"]["tags"].remove("aws-cloudtrail") + + assert res["hits"]["hits"][0]["_source"]["aws"] == logstash_message[0]["aws"] + assert res["hits"]["hits"][0]["_source"]["cloud"] == logstash_message[0]["cloud"] + assert res["hits"]["hits"][0]["_source"]["log"] == logstash_message[0]["log"] + assert res["hits"]["hits"][0]["_source"]["message"] == logstash_message[0]["message"] + assert res["hits"]["hits"][0]["_source"]["tags"] == logstash_message[0]["tags"] + + assert res["hits"]["hits"][1]["_source"]["aws"] == logstash_message[1]["aws"] + assert res["hits"]["hits"][1]["_source"]["cloud"] == logstash_message[1]["cloud"] + assert res["hits"]["hits"][1]["_source"]["log"] == logstash_message[1]["log"] + assert res["hits"]["hits"][1]["_source"]["message"] == logstash_message[1]["message"] + assert res["hits"]["hits"][1]["_source"]["tags"] == logstash_message[1]["tags"] + + self.elasticsearch.refresh(index="logs-stash.elasticsearch-output") + assert self.elasticsearch.count(index="logs-stash.elasticsearch-output")["count"] == 2 + + res = self.elasticsearch.search(index="logs-stash.elasticsearch-output", sort="_seq_no") + assert res["hits"]["total"] == {"value": 2, "relation": "eq"} + + assert res["hits"]["hits"][0]["_source"]["aws"] == logstash_message[0]["aws"] + assert res["hits"]["hits"][0]["_source"]["cloud"] == logstash_message[0]["cloud"] + assert res["hits"]["hits"][0]["_source"]["log"] == logstash_message[0]["log"] + assert res["hits"]["hits"][0]["_source"]["message"] == logstash_message[0]["message"] + assert res["hits"]["hits"][0]["_source"]["tags"] == logstash_message[0]["tags"] + + assert res["hits"]["hits"][1]["_source"]["aws"] == logstash_message[1]["aws"] + assert res["hits"]["hits"][1]["_source"]["cloud"] == logstash_message[1]["cloud"] + assert res["hits"]["hits"][1]["_source"]["log"] == logstash_message[1]["log"] + assert res["hits"]["hits"][1]["_source"]["message"] == logstash_message[1]["message"] + assert res["hits"]["hits"][1]["_source"]["tags"] == logstash_message[1]["tags"] + + def test_continuing(self) -> None: + assert isinstance(self.elasticsearch, ElasticsearchContainer) + assert isinstance(self.logstash, LogstashContainer) + assert isinstance(self.localstack, LocalStackContainer) + +# fixtures = [ +# _load_file_fixture("cloudwatch-log-1.json"), +# _load_file_fixture("cloudwatch-log-2.json"), +# ] +# +# s3_bucket_name = _time_based_id(suffix="test-bucket") +# first_filename = "exportedlog/uuid/yyyy-mm-dd-[$LATEST]hash/000000.gz" +# _s3_upload_content_to_bucket( +# client=self.s3_client, +# content=gzip.compress("".join(fixtures).encode("utf-8")), +# content_type="application/x-gzip", +# bucket_name=s3_bucket_name, +# key=first_filename, +# ) +# +# cloudwatch_group_name = _time_based_id(suffix="source-group") +# cloudwatch_group = _logs_create_cloudwatch_logs_group(self.logs_client, group_name=cloudwatch_group_name) +# +# cloudwatch_stream_name = _time_based_id(suffix="source-stream") +# _logs_create_cloudwatch_logs_stream( +# self.logs_client, group_name=cloudwatch_group_name, stream_name=cloudwatch_stream_name +# ) +# +# _logs_upload_event_to_cloudwatch_logs( +# self.logs_client, +# group_name=cloudwatch_group_name, +# stream_name=cloudwatch_stream_name, +# messages_body=["".join(fixtures)], +# ) +# +# cloudwatch_group_arn = cloudwatch_group["arn"] +# +# cloudwatch_group_name = cloudwatch_group_name +# cloudwatch_stream_name = cloudwatch_stream_name +# +# sqs_queue_name = _time_based_id(suffix="source-sqs") +# s3_sqs_queue_name = _time_based_id(suffix="source-s3-sqs") +# +# sqs_queue = _sqs_create_queue(self.sqs_client, sqs_queue_name, self.localstack.get_url()) +# s3_sqs_queue = _sqs_create_queue(self.sqs_client, s3_sqs_queue_name, self.localstack.get_url()) +# +# sqs_queue_arn = sqs_queue["QueueArn"] +# sqs_queue_url = sqs_queue["QueueUrl"] +# sqs_queue_url_path = sqs_queue["QueueUrlPath"] +# +# s3_sqs_queue_arn = s3_sqs_queue["QueueArn"] +# s3_sqs_queue_url = s3_sqs_queue["QueueUrl"] +# +# _sqs_send_messages(self.sqs_client, sqs_queue_url, "".join(fixtures)) +# _sqs_send_s3_notifications(self.sqs_client, s3_sqs_queue_url, s3_bucket_name, [first_filename]) +# +# kinesis_stream_name = _time_based_id(suffix="source-kinesis") +# kinesis_stream = _kinesis_create_stream(self.kinesis_client, kinesis_stream_name) +# kinesis_stream_arn = kinesis_stream["StreamDescription"]["StreamARN"] +# +# _kinesis_put_records(self.kinesis_client, kinesis_stream_name, ["".join(fixtures)]) +# +# config_yaml: str = f""" +# inputs: +# - type: "kinesis-data-stream" +# id: "{kinesis_stream_arn}" +# tags: {self.default_tags} +# outputs: {self.default_outputs} +# - type: "cloudwatch-logs" +# id: "{cloudwatch_group_arn}" +# tags: {self.default_tags} +# outputs: {self.default_outputs} +# - type: sqs +# id: "{sqs_queue_arn}" +# tags: {self.default_tags} +# outputs: {self.default_outputs} +# - type: s3-sqs +# id: "{s3_sqs_queue_arn}" +# tags: {self.default_tags} +# outputs: {self.default_outputs} +# """ +# +# config_file_path = "config.yaml" +# config_bucket_name = _time_based_id(suffix="config-bucket") +# _s3_upload_content_to_bucket( +# client=self.s3_client, +# content=config_yaml.encode("utf-8"), +# content_type="text/plain", +# bucket_name=config_bucket_name, +# key=config_file_path, +# ) +# +# os.environ["S3_CONFIG_FILE"] = f"s3://{config_bucket_name}/{config_file_path}" +# +# events_s3, _ = _sqs_get_messages(self.sqs_client, s3_sqs_queue_url, s3_sqs_queue_arn) +# +# events_sqs, _ = _sqs_get_messages(self.sqs_client, sqs_queue_url, sqs_queue_arn) +# +# message_id = events_sqs["Records"][0]["messageId"] +# +# events_cloudwatch_logs, event_ids_cloudwatch_logs, _ = _logs_retrieve_event_from_cloudwatch_logs( +# self.logs_client, cloudwatch_group_name, cloudwatch_stream_name +# ) +# +# events_kinesis, _ = _kinesis_retrieve_event_from_kinesis_stream( +# self.kinesis_client, kinesis_stream_name, kinesis_stream_arn +# ) +# +# ctx = ContextMock() +# first_call = handler(events_s3, ctx) # type:ignore +# +# assert first_call == "continuing" +# +# self.elasticsearch.refresh(index="logs-generic-default") +# assert self.elasticsearch.count(index="logs-generic-default")["count"] == 1 +# +# res = self.elasticsearch.search(index="logs-generic-default", sort="_seq_no") +# assert res["hits"]["total"] == {"value": 1, "relation": "eq"} +# +# assert res["hits"]["hits"][0]["_source"]["message"] == fixtures[0].rstrip("\n") +# assert res["hits"]["hits"][0]["_source"]["log"]["offset"] == 0 +# assert ( +# res["hits"]["hits"][0]["_source"]["log"]["file"]["path"] +# == f"https://{s3_bucket_name}.s3.eu-central-1.amazonaws.com/{first_filename}" +# ) +# assert res["hits"]["hits"][0]["_source"]["aws"]["s3"]["bucket"]["name"] == s3_bucket_name +# assert res["hits"]["hits"][0]["_source"]["aws"]["s3"]["bucket"]["arn"] == f"arn:aws:s3:::{s3_bucket_name}" +# assert res["hits"]["hits"][0]["_source"]["aws"]["s3"]["object"]["key"] == first_filename +# assert res["hits"]["hits"][0]["_source"]["cloud"]["provider"] == "aws" +# assert res["hits"]["hits"][0]["_source"]["cloud"]["region"] == "eu-central-1" +# assert res["hits"]["hits"][0]["_source"]["cloud"]["account"]["id"] == "000000000000" +# assert res["hits"]["hits"][0]["_source"]["tags"] == ["forwarded", "generic", "tag1", "tag2", "tag3"] +# +# logstash_message = self.logstash.get_messages(expected=1) +# assert len(logstash_message) == 1 +# res["hits"]["hits"][0]["_source"]["tags"].remove("generic") +# assert res["hits"]["hits"][0]["_source"]["aws"] == logstash_message[0]["aws"] +# assert res["hits"]["hits"][0]["_source"]["cloud"] == logstash_message[0]["cloud"] +# assert res["hits"]["hits"][0]["_source"]["log"] == logstash_message[0]["log"] +# assert res["hits"]["hits"][0]["_source"]["message"] == logstash_message[0]["message"] +# assert res["hits"]["hits"][0]["_source"]["tags"] == logstash_message[0]["tags"] +# +# second_call = handler(events_sqs, ctx) # type:ignore +# +# assert second_call == "continuing" +# +# self.elasticsearch.refresh(index="logs-generic-default") +# assert self.elasticsearch.count(index="logs-generic-default")["count"] == 2 +# +# res = self.elasticsearch.search(index="logs-generic-default", sort="_seq_no") +# assert res["hits"]["total"] == {"value": 2, "relation": "eq"} +# +# assert res["hits"]["hits"][1]["_source"]["message"] == fixtures[0].rstrip("\n") +# assert res["hits"]["hits"][1]["_source"]["log"]["offset"] == 0 +# assert res["hits"]["hits"][1]["_source"]["log"]["file"]["path"] == sqs_queue_url_path +# assert res["hits"]["hits"][1]["_source"]["aws"]["sqs"]["name"] == sqs_queue_name +# assert res["hits"]["hits"][1]["_source"]["aws"]["sqs"]["message_id"] == message_id +# assert res["hits"]["hits"][1]["_source"]["cloud"]["provider"] == "aws" +# assert res["hits"]["hits"][1]["_source"]["cloud"]["region"] == "us-east-1" +# assert res["hits"]["hits"][1]["_source"]["cloud"]["account"]["id"] == "000000000000" +# assert res["hits"]["hits"][1]["_source"]["tags"] == ["forwarded", "generic", "tag1", "tag2", "tag3"] +# +# logstash_message = self.logstash.get_messages(expected=2) +# assert len(logstash_message) == 2 +# res["hits"]["hits"][1]["_source"]["tags"].remove("generic") +# assert res["hits"]["hits"][1]["_source"]["aws"] == logstash_message[1]["aws"] +# assert res["hits"]["hits"][1]["_source"]["cloud"] == logstash_message[1]["cloud"] +# assert res["hits"]["hits"][1]["_source"]["log"] == logstash_message[1]["log"] +# assert res["hits"]["hits"][1]["_source"]["message"] == logstash_message[1]["message"] +# assert res["hits"]["hits"][1]["_source"]["tags"] == logstash_message[1]["tags"] +# +# third_call = handler(events_cloudwatch_logs, ctx) # type:ignore +# +# assert third_call == "continuing" +# +# self.elasticsearch.refresh(index="logs-generic-default") +# assert self.elasticsearch.count(index="logs-generic-default")["count"] == 3 +# +# res = self.elasticsearch.search(index="logs-generic-default", sort="_seq_no") +# assert res["hits"]["total"] == {"value": 3, "relation": "eq"} +# +# assert res["hits"]["hits"][2]["_source"]["message"] == fixtures[0].rstrip("\n") +# assert res["hits"]["hits"][2]["_source"]["log"]["offset"] == 0 +# assert ( +# res["hits"]["hits"][2]["_source"]["log"]["file"]["path"] +# == f"{cloudwatch_group_name}/{cloudwatch_stream_name}" +# ) +# assert res["hits"]["hits"][2]["_source"]["aws"]["cloudwatch"]["log_group"] == cloudwatch_group_name +# assert res["hits"]["hits"][2]["_source"]["aws"]["cloudwatch"]["log_stream"] == cloudwatch_stream_name +# assert res["hits"]["hits"][2]["_source"]["aws"]["cloudwatch"]["event_id"] == event_ids_cloudwatch_logs[0] +# assert res["hits"]["hits"][2]["_source"]["cloud"]["provider"] == "aws" +# assert res["hits"]["hits"][2]["_source"]["cloud"]["region"] == "us-east-1" +# assert res["hits"]["hits"][2]["_source"]["cloud"]["account"]["id"] == "000000000000" +# assert res["hits"]["hits"][2]["_source"]["tags"] == ["forwarded", "generic", "tag1", "tag2", "tag3"] +# +# logstash_message = self.logstash.get_messages(expected=3) +# assert len(logstash_message) == 3 +# res["hits"]["hits"][2]["_source"]["tags"].remove("generic") +# assert res["hits"]["hits"][2]["_source"]["aws"] == logstash_message[2]["aws"] +# assert res["hits"]["hits"][2]["_source"]["cloud"] == logstash_message[2]["cloud"] +# assert res["hits"]["hits"][2]["_source"]["log"] == logstash_message[2]["log"] +# assert res["hits"]["hits"][2]["_source"]["message"] == logstash_message[2]["message"] +# assert res["hits"]["hits"][2]["_source"]["tags"] == logstash_message[2]["tags"] +# +# fourth_call = handler(events_kinesis, ctx) # type:ignore +# +# assert fourth_call == "continuing" +# +# self.elasticsearch.refresh(index="logs-generic-default") +# assert self.elasticsearch.count(index="logs-generic-default")["count"] == 4 +# +# res = self.elasticsearch.search(index="logs-generic-default", sort="_seq_no") +# assert res["hits"]["total"] == {"value": 4, "relation": "eq"} +# +# assert res["hits"]["hits"][3]["_source"]["message"] == fixtures[0].rstrip("\n") +# assert res["hits"]["hits"][3]["_source"]["log"]["offset"] == 0 +# assert res["hits"]["hits"][3]["_source"]["log"]["file"]["path"] == kinesis_stream_arn +# assert res["hits"]["hits"][3]["_source"]["aws"]["kinesis"]["type"] == "stream" +# assert res["hits"]["hits"][3]["_source"]["aws"]["kinesis"]["partition_key"] == "PartitionKey" +# assert res["hits"]["hits"][3]["_source"]["aws"]["kinesis"]["name"] == kinesis_stream_name +# assert ( +# res["hits"]["hits"][3]["_source"]["aws"]["kinesis"]["sequence_number"] +# == events_kinesis["Records"][0]["kinesis"]["sequenceNumber"] +# ) +# assert res["hits"]["hits"][3]["_source"]["cloud"]["provider"] == "aws" +# assert res["hits"]["hits"][3]["_source"]["cloud"]["region"] == "us-east-1" +# assert res["hits"]["hits"][3]["_source"]["cloud"]["account"]["id"] == "000000000000" +# assert res["hits"]["hits"][3]["_source"]["tags"] == ["forwarded", "generic", "tag1", "tag2", "tag3"] +# +# logstash_message = self.logstash.get_messages(expected=4) +# assert len(logstash_message) == 4 +# res["hits"]["hits"][3]["_source"]["tags"].remove("generic") +# assert res["hits"]["hits"][3]["_source"]["aws"] == logstash_message[3]["aws"] +# assert res["hits"]["hits"][3]["_source"]["cloud"] == logstash_message[3]["cloud"] +# assert res["hits"]["hits"][3]["_source"]["log"] == logstash_message[3]["log"] +# assert res["hits"]["hits"][3]["_source"]["message"] == logstash_message[3]["message"] +# assert res["hits"]["hits"][3]["_source"]["tags"] == logstash_message[3]["tags"] +# +# continued_events, _ = _sqs_get_messages( +# self.sqs_client, os.environ["SQS_CONTINUE_URL"], self.sqs_continue_queue_arn +# ) +# +# fifth_call = handler(continued_events, ctx) # type:ignore +# +# assert fifth_call == "continuing" +# +# self.elasticsearch.refresh(index="logs-generic-default") +# assert self.elasticsearch.count(index="logs-generic-default")["count"] == 5 +# +# res = self.elasticsearch.search(index="logs-generic-default", sort="_seq_no") +# assert res["hits"]["total"] == {"value": 5, "relation": "eq"} +# +# assert res["hits"]["hits"][4]["_source"]["message"] == fixtures[1].rstrip("\n") +# assert res["hits"]["hits"][4]["_source"]["log"]["offset"] == 94 +# assert ( +# res["hits"]["hits"][4]["_source"]["log"]["file"]["path"] +# == f"https://{s3_bucket_name}.s3.eu-central-1.amazonaws.com/{first_filename}" +# ) +# assert res["hits"]["hits"][4]["_source"]["aws"]["s3"]["bucket"]["name"] == s3_bucket_name +# assert res["hits"]["hits"][4]["_source"]["aws"]["s3"]["bucket"]["arn"] == f"arn:aws:s3:::{s3_bucket_name}" +# assert res["hits"]["hits"][4]["_source"]["aws"]["s3"]["object"]["key"] == first_filename +# assert res["hits"]["hits"][4]["_source"]["cloud"]["provider"] == "aws" +# assert res["hits"]["hits"][4]["_source"]["cloud"]["region"] == "eu-central-1" +# assert res["hits"]["hits"][4]["_source"]["cloud"]["account"]["id"] == "000000000000" +# assert res["hits"]["hits"][4]["_source"]["tags"] == ["forwarded", "generic", "tag1", "tag2", "tag3"] +# +# logstash_message = self.logstash.get_messages(expected=5) +# assert len(logstash_message) == 5 +# res["hits"]["hits"][4]["_source"]["tags"].remove("generic") +# assert res["hits"]["hits"][4]["_source"]["aws"] == logstash_message[4]["aws"] +# assert res["hits"]["hits"][4]["_source"]["cloud"] == logstash_message[4]["cloud"] +# assert res["hits"]["hits"][4]["_source"]["log"] == logstash_message[4]["log"] +# assert res["hits"]["hits"][4]["_source"]["message"] == logstash_message[4]["message"] +# assert res["hits"]["hits"][4]["_source"]["tags"] == logstash_message[4]["tags"] +# +# ctx = ContextMock(remaining_time_in_millis=_OVER_COMPLETION_GRACE_PERIOD_2m) +# +# continued_events, _ = _sqs_get_messages( +# self.sqs_client, os.environ["SQS_CONTINUE_URL"], self.sqs_continue_queue_arn +# ) +# sixth_call = handler(continued_events, ctx) # type:ignore +# +# assert sixth_call == "completed" +# +# self.elasticsearch.refresh(index="logs-generic-default") +# assert self.elasticsearch.count(index="logs-generic-default")["count"] == 8 +# +# res = self.elasticsearch.search(index="logs-generic-default", sort="_seq_no") +# assert res["hits"]["total"] == {"value": 8, "relation": "eq"} +# +# assert res["hits"]["hits"][5]["_source"]["message"] == fixtures[1].rstrip("\n") +# assert res["hits"]["hits"][5]["_source"]["log"]["offset"] == 94 +# assert res["hits"]["hits"][5]["_source"]["log"]["file"]["path"] == sqs_queue_url_path +# assert res["hits"]["hits"][5]["_source"]["aws"]["sqs"]["name"] == sqs_queue_name +# assert res["hits"]["hits"][5]["_source"]["aws"]["sqs"]["message_id"] == message_id +# assert res["hits"]["hits"][5]["_source"]["cloud"]["provider"] == "aws" +# assert res["hits"]["hits"][5]["_source"]["cloud"]["region"] == "us-east-1" +# assert res["hits"]["hits"][5]["_source"]["cloud"]["account"]["id"] == "000000000000" +# assert res["hits"]["hits"][5]["_source"]["tags"] == ["forwarded", "generic", "tag1", "tag2", "tag3"] +# +# assert res["hits"]["hits"][6]["_source"]["message"] == fixtures[1].rstrip("\n") +# assert res["hits"]["hits"][6]["_source"]["log"]["offset"] == 94 +# assert ( +# res["hits"]["hits"][6]["_source"]["log"]["file"]["path"] +# == f"{cloudwatch_group_name}/{cloudwatch_stream_name}" +# ) +# assert res["hits"]["hits"][6]["_source"]["aws"]["cloudwatch"]["log_group"] == cloudwatch_group_name +# assert res["hits"]["hits"][6]["_source"]["aws"]["cloudwatch"]["log_stream"] == cloudwatch_stream_name +# assert res["hits"]["hits"][6]["_source"]["aws"]["cloudwatch"]["event_id"] == event_ids_cloudwatch_logs[0] +# assert res["hits"]["hits"][6]["_source"]["cloud"]["provider"] == "aws" +# assert res["hits"]["hits"][6]["_source"]["cloud"]["region"] == "us-east-1" +# assert res["hits"]["hits"][6]["_source"]["cloud"]["account"]["id"] == "000000000000" +# assert res["hits"]["hits"][6]["_source"]["tags"] == ["forwarded", "generic", "tag1", "tag2", "tag3"] +# +# assert res["hits"]["hits"][7]["_source"]["message"] == fixtures[1].rstrip("\n") +# assert res["hits"]["hits"][7]["_source"]["log"]["offset"] == 94 +# assert res["hits"]["hits"][7]["_source"]["log"]["file"]["path"] == kinesis_stream_arn +# assert res["hits"]["hits"][7]["_source"]["aws"]["kinesis"]["type"] == "stream" +# assert res["hits"]["hits"][7]["_source"]["aws"]["kinesis"]["partition_key"] == "PartitionKey" +# assert res["hits"]["hits"][7]["_source"]["aws"]["kinesis"]["name"] == kinesis_stream_name +# assert ( +# res["hits"]["hits"][7]["_source"]["aws"]["kinesis"]["sequence_number"] +# == events_kinesis["Records"][0]["kinesis"]["sequenceNumber"] +# ) +# assert res["hits"]["hits"][7]["_source"]["cloud"]["provider"] == "aws" +# assert res["hits"]["hits"][7]["_source"]["cloud"]["region"] == "us-east-1" +# assert res["hits"]["hits"][7]["_source"]["cloud"]["account"]["id"] == "000000000000" +# assert res["hits"]["hits"][7]["_source"]["tags"] == ["forwarded", "generic", "tag1", "tag2", "tag3"] +# +# logstash_message = self.logstash.get_messages(expected=8) +# assert len(logstash_message) == 8 +# res["hits"]["hits"][5]["_source"]["tags"].remove("generic") +# res["hits"]["hits"][6]["_source"]["tags"].remove("generic") +# res["hits"]["hits"][7]["_source"]["tags"].remove("generic") +# +# assert res["hits"]["hits"][5]["_source"]["aws"] == logstash_message[5]["aws"] +# assert res["hits"]["hits"][5]["_source"]["cloud"] == logstash_message[5]["cloud"] +# assert res["hits"]["hits"][5]["_source"]["log"] == logstash_message[5]["log"] +# assert res["hits"]["hits"][5]["_source"]["message"] == logstash_message[5]["message"] +# assert res["hits"]["hits"][5]["_source"]["tags"] == logstash_message[5]["tags"] +# +# assert res["hits"]["hits"][6]["_source"]["aws"] == logstash_message[6]["aws"] +# assert res["hits"]["hits"][6]["_source"]["cloud"] == logstash_message[6]["cloud"] +# assert res["hits"]["hits"][6]["_source"]["log"] == logstash_message[6]["log"] +# assert res["hits"]["hits"][6]["_source"]["message"] == logstash_message[6]["message"] +# assert res["hits"]["hits"][6]["_source"]["tags"] == logstash_message[6]["tags"] +# +# assert res["hits"]["hits"][7]["_source"]["aws"] == logstash_message[7]["aws"] +# assert res["hits"]["hits"][7]["_source"]["cloud"] == logstash_message[7]["cloud"] +# assert res["hits"]["hits"][7]["_source"]["log"] == logstash_message[7]["log"] +# assert res["hits"]["hits"][7]["_source"]["message"] == logstash_message[7]["message"] +# assert res["hits"]["hits"][7]["_source"]["tags"] == logstash_message[7]["tags"] +# +# From 2d1a0b1e248d35372355ef3b341fb5b2d5b09704 Mon Sep 17 00:00:00 2001 From: constanca Date: Wed, 17 Apr 2024 12:50:21 +0200 Subject: [PATCH 18/26] . --- tests/handlers/aws/test_integrations.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/handlers/aws/test_integrations.py b/tests/handlers/aws/test_integrations.py index 094b61fe..cb8f5ac0 100644 --- a/tests/handlers/aws/test_integrations.py +++ b/tests/handlers/aws/test_integrations.py @@ -321,8 +321,8 @@ def test_ls_es_output(self) -> None: def test_continuing(self) -> None: assert isinstance(self.elasticsearch, ElasticsearchContainer) - assert isinstance(self.logstash, LogstashContainer) - assert isinstance(self.localstack, LocalStackContainer) + #assert isinstance(self.logstash, LogstashContainer) + #assert isinstance(self.localstack, LocalStackContainer) # fixtures = [ # _load_file_fixture("cloudwatch-log-1.json"), From a9431923f2116c07a30d748cf582808ae0c22dd9 Mon Sep 17 00:00:00 2001 From: constanca Date: Wed, 17 Apr 2024 14:19:27 +0200 Subject: [PATCH 19/26] . --- requirements-tests.txt | 2 +- tests/handlers/aws/test_integrations.py | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/requirements-tests.txt b/requirements-tests.txt index 9f3a87cd..c0e92d6e 100644 --- a/requirements-tests.txt +++ b/requirements-tests.txt @@ -9,5 +9,5 @@ pysimdjson==5.0.2 python-rapidjson==1.14 cysimdjson==23.8 responses==0.24.1 -testcontainers==3.7.1 +testcontainers==4.0.0 pyOpenSSL==24.0.0 diff --git a/tests/handlers/aws/test_integrations.py b/tests/handlers/aws/test_integrations.py index cb8f5ac0..094b61fe 100644 --- a/tests/handlers/aws/test_integrations.py +++ b/tests/handlers/aws/test_integrations.py @@ -321,8 +321,8 @@ def test_ls_es_output(self) -> None: def test_continuing(self) -> None: assert isinstance(self.elasticsearch, ElasticsearchContainer) - #assert isinstance(self.logstash, LogstashContainer) - #assert isinstance(self.localstack, LocalStackContainer) + assert isinstance(self.logstash, LogstashContainer) + assert isinstance(self.localstack, LocalStackContainer) # fixtures = [ # _load_file_fixture("cloudwatch-log-1.json"), From 767c24b33389f5cb739285d1ab44383300cbd155 Mon Sep 17 00:00:00 2001 From: constanca Date: Wed, 17 Apr 2024 14:23:09 +0200 Subject: [PATCH 20/26] . --- .github/workflows/test.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 57efb0b8..33b5317f 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -36,7 +36,7 @@ jobs: - uses: actions/checkout@v3 - - uses: actions/setup-python@v4 + - uses: actions/setup-python@v5 with: python-version: '3.9' # As defined in tests/scripts/docker/run_tests.sh cache: 'pip' # caching pip dependencies From e8719c59d5a95bbd4e1a357544813c7d1dd914e9 Mon Sep 17 00:00:00 2001 From: constanca Date: Wed, 17 Apr 2024 15:07:17 +0200 Subject: [PATCH 21/26] . --- .github/workflows/test.yml | 2 +- requirements-tests.txt | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 33b5317f..57efb0b8 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -36,7 +36,7 @@ jobs: - uses: actions/checkout@v3 - - uses: actions/setup-python@v5 + - uses: actions/setup-python@v4 with: python-version: '3.9' # As defined in tests/scripts/docker/run_tests.sh cache: 'pip' # caching pip dependencies diff --git a/requirements-tests.txt b/requirements-tests.txt index c0e92d6e..5340ee56 100644 --- a/requirements-tests.txt +++ b/requirements-tests.txt @@ -1,5 +1,5 @@ mock==5.1.0 -pytest==7.4.4 +pytest==8.1.1 pytest-cov==4.1.0 pytest-benchmark==4.0.0 coverage==7.4.1 From 909b30f0a2ceb739ee109c0c2e7ffa10506ce41c Mon Sep 17 00:00:00 2001 From: constanca Date: Wed, 17 Apr 2024 15:13:54 +0200 Subject: [PATCH 22/26] . --- tests/testcontainers/logstash.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/testcontainers/logstash.py b/tests/testcontainers/logstash.py index dba41a00..a7b466ed 100644 --- a/tests/testcontainers/logstash.py +++ b/tests/testcontainers/logstash.py @@ -201,8 +201,8 @@ def reset(self) -> None: self._last_reset_message_count = self._previous_message_count def start(self) -> LogstashContainer: - self._configure() super().start() + self._configure() self._connect() return self From b83c882785029ad362bf7e2e4400d8a298f0e834 Mon Sep 17 00:00:00 2001 From: constanca Date: Wed, 17 Apr 2024 15:17:38 +0200 Subject: [PATCH 23/26] . --- tests/testcontainers/logstash.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/testcontainers/logstash.py b/tests/testcontainers/logstash.py index a7b466ed..dba41a00 100644 --- a/tests/testcontainers/logstash.py +++ b/tests/testcontainers/logstash.py @@ -201,8 +201,8 @@ def reset(self) -> None: self._last_reset_message_count = self._previous_message_count def start(self) -> LogstashContainer: - super().start() self._configure() + super().start() self._connect() return self From 15956a9933e2c90535b457c08b27636f043dfddc Mon Sep 17 00:00:00 2001 From: constanca Date: Wed, 17 Apr 2024 15:18:57 +0200 Subject: [PATCH 24/26] . --- main_aws.py | 1 - requirements-tests.txt | 4 ++-- 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/main_aws.py b/main_aws.py index afbad7b6..729e3269 100644 --- a/main_aws.py +++ b/main_aws.py @@ -9,7 +9,6 @@ from handlers.aws import lambda_handler - def handler(lambda_event: dict[str, Any], lambda_context: context_.Context) -> Any: """ AWS Lambda handler as main entrypoint diff --git a/requirements-tests.txt b/requirements-tests.txt index 5340ee56..9f3a87cd 100644 --- a/requirements-tests.txt +++ b/requirements-tests.txt @@ -1,5 +1,5 @@ mock==5.1.0 -pytest==8.1.1 +pytest==7.4.4 pytest-cov==4.1.0 pytest-benchmark==4.0.0 coverage==7.4.1 @@ -9,5 +9,5 @@ pysimdjson==5.0.2 python-rapidjson==1.14 cysimdjson==23.8 responses==0.24.1 -testcontainers==4.0.0 +testcontainers==3.7.1 pyOpenSSL==24.0.0 From 513489bd8fbd80485514aec920bd2e43852ab4e7 Mon Sep 17 00:00:00 2001 From: constanca Date: Wed, 17 Apr 2024 15:49:18 +0200 Subject: [PATCH 25/26] . --- tests/testcontainers/es.py | 2 +- tests/testcontainers/logstash.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/testcontainers/es.py b/tests/testcontainers/es.py index 86443440..50a8abba 100644 --- a/tests/testcontainers/es.py +++ b/tests/testcontainers/es.py @@ -31,7 +31,7 @@ class ElasticsearchContainer(DockerContainer): # type: ignore """ _DEFAULT_IMAGE = "docker.elastic.co/elasticsearch/elasticsearch" - _DEFAULT_VERSION = "7.17.9" + _DEFAULT_VERSION = "7.17.20" _DEFAULT_PORT = 9200 _DEFAULT_USERNAME = DEFAULT_USERNAME _DEFAULT_PASSWORD = DEFAULT_PASSWORD diff --git a/tests/testcontainers/logstash.py b/tests/testcontainers/logstash.py index dba41a00..b00824a5 100644 --- a/tests/testcontainers/logstash.py +++ b/tests/testcontainers/logstash.py @@ -38,7 +38,7 @@ class LogstashContainer(DockerContainer): # type: ignore """ _DEFAULT_IMAGE = "docker.elastic.co/logstash/logstash" - _DEFAULT_VERSION = "7.17.0" + _DEFAULT_VERSION = "7.17.20" _DEFAULT_PORT = 5044 _DEFAULT_API_PORT = 9600 _DEFAULT_USERNAME = "USERNAME" From cae5b1544632b22270aa0307e1545fbf0b26d299 Mon Sep 17 00:00:00 2001 From: constanca Date: Wed, 17 Apr 2024 15:53:14 +0200 Subject: [PATCH 26/26] Revert changes --- tests/handlers/aws/test_handler.py | 1140 ++++++ tests/handlers/aws/test_integrations.py | 4027 +++++++++++++++++++-- tests/handlers/aws/test_replay_trigger.py | 87 + tests/handlers/aws/test_utils.py | 336 ++ 4 files changed, 5241 insertions(+), 349 deletions(-) create mode 100644 tests/handlers/aws/test_handler.py create mode 100644 tests/handlers/aws/test_replay_trigger.py create mode 100644 tests/handlers/aws/test_utils.py diff --git a/tests/handlers/aws/test_handler.py b/tests/handlers/aws/test_handler.py new file mode 100644 index 00000000..6858d0bd --- /dev/null +++ b/tests/handlers/aws/test_handler.py @@ -0,0 +1,1140 @@ +# Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one +# or more contributor license agreements. Licensed under the Elastic License 2.0; +# you may not use this file except in compliance with the Elastic License 2.0. + +import base64 +import datetime +import importlib +import os +import sys +from copy import deepcopy +from io import BytesIO +from typing import Any, Optional, Union +from unittest import TestCase + +import mock +import pytest +from botocore.exceptions import ClientError +from botocore.response import StreamingBody + +from handlers.aws.exceptions import ( + ConfigFileException, + InputConfigException, + OutputConfigException, + TriggerTypeException, +) +from main_aws import handler +from share import json_dumper, json_parser + +from .utils import ContextMock + + +class MockContent: + SECRETS_MANAGER_MOCK_DATA: dict[str, dict[str, str]] = { + "arn:aws:secretsmanager:eu-central-1:123456789:secret:es_secrets": { + "type": "SecretString", + "data": json_dumper( + { + "url": "mock_elastic_url", + "username": "mock_elastic_username", + "password": "mock_elastic_password", + "empty": "", + } + ), + }, + "arn:aws:secretsmanager:eu-central-1:123456789:secret:plain_secret": { + "type": "SecretString", + "data": "mock_plain_text_sqs_arn", + }, + "arn:aws:secretsmanager:eu-central-1:123456789:secret:plain_secret_not_str_byte": { + "type": "SecretString", + "data": b"i am not a string", # type:ignore + }, + "arn:aws:secretsmanager:eu-central-1:123456789:secret:plain_secret_not_str_int": { + "type": "SecretString", + "data": 2021, # type:ignore + }, + "arn:aws:secretsmanager:eu-central-1:123456789:secret:binary_secret": { + "type": "SecretBinary", + "data": "bW9ja19uZ2lueC5sb2c=", + }, + "arn:aws:secretsmanager:eu-central-1:123456789:secret:empty_secret": {"type": "SecretString", "data": ""}, + } + + @staticmethod + def _get_aws_sm_client(region_name: str) -> mock.MagicMock: + client = mock.Mock() + client.get_secret_value = MockContent.get_secret_value + return client + + @staticmethod + def get_secret_value(SecretId: str) -> Optional[dict[str, Union[bytes, str]]]: + secrets = MockContent.SECRETS_MANAGER_MOCK_DATA.get(SecretId) + + if secrets is None: + raise ClientError( + { + "Error": { + "Message": "Secrets Manager can't find the specified secret.", + "Code": "ResourceNotFoundException", + } + }, + "GetSecretValue", + ) + + if secrets["type"] == "SecretBinary": + return {"SecretBinary": base64.b64decode(secrets["data"])} + elif secrets["type"] == "SecretString": + return {"SecretString": secrets["data"]} + + return None + + +_now = datetime.datetime.utcnow().strftime("%Y-%m-%dT%H:%M:%S.%fZ") +_dummy_lambda_event: dict[str, Any] = { + "Records": [ + { + "messageId": "dummy_message_id", + "receiptHandle": "dummy_receipt_handle", + "body": json_dumper( + { + "Records": [ + { + "eventVersion": "2.1", + "eventSource": "aws:s3", + "awsRegion": "eu-central-1", + "eventTime": _now, + "eventName": "ObjectCreated:Put", + "userIdentity": {"principalId": "dummy_principal_id"}, + "requestParameters": {"sourceIPAddress": "dummy_source_ip_address"}, + "responseElements": { + "x-amz-request-id": "dummy_request_id", + "x-amz-id-2": "dummy_request_id_2", + }, + "s3": { + "s3SchemaVersion": "1.0", + "configurationId": "sqs_event", + "bucket": { + "name": "dummy_bucket_name", + "ownerIdentity": {"principalId": "dummy_principal_id"}, + "arn": "arn:aws:s3:::dummy_bucket_name", + }, + "object": { + "key": "file.log", + "size": 27, + "eTag": "", + "sequencer": "", + }, + }, + } + ] + } + ), + "attributes": { + "ApproximateReceiveCount": "1", + "SentTimestamp": _now, + "SenderId": "dummy_sender_id", + "ApproximateFirstReceiveTimestamp": _now, + }, + "messageAttributes": { + "config": { + "stringValue": "inputs:\n - type: s3-sqs" + "\n id: arn:aws:sqs:eu-central-1:123456789:sqs-queue\n outputs:" + "\n - type: elasticsearch\n args:" + "\n cloud_id: cloud_id:bG9jYWxob3N0OjkyMDAkMA==\n api_key: api_key\n" + }, + "originalEventSourceARN": {"stringValue": "arn:aws:sqs:eu-central-1:123456789:sqs-queue"}, + "originalLastEndingOffset": {"stringValue": "32"}, + }, + "md5OfBody": "dummy_hash", + "eventSource": "aws:sqs", + "eventSourceARN": "arn:aws:sqs:eu-central-1:123456789:s3-sqs-queue", + "awsRegion": "eu-central-1", + } + ] +} + + +def _get_queue_url_mock(QueueName: str, QueueOwnerAWSAccountId: str) -> dict[str, Any]: + return {"QueueUrl": ""} + + +def _send_message(QueueUrl: str, MessageBody: str, MessageAttributes: dict[str, Any]) -> None: + pass + + +def _describe_regions(AllRegions: bool) -> dict[str, Any]: + return { + "Regions": [ + { + "RegionName": "af-south-1", + }, + { + "RegionName": "ap-east-1", + }, + { + "RegionName": "ap-northeast-1", + }, + { + "RegionName": "ap-northeast-2", + }, + { + "RegionName": "ap-northeast-3", + }, + { + "RegionName": "ap-south-1", + }, + { + "RegionName": "ap-south-2", + }, + { + "RegionName": "ap-southeast-1", + }, + { + "RegionName": "ap-southeast-2", + }, + { + "RegionName": "ap-southeast-3", + }, + { + "RegionName": "ap-southeast-4", + }, + { + "RegionName": "ca-central-1", + }, + { + "RegionName": "eu-central-1", + }, + { + "RegionName": "eu-central-2", + }, + { + "RegionName": "eu-north-1", + }, + { + "RegionName": "eu-south-1", + }, + { + "RegionName": "eu-south-2", + }, + { + "RegionName": "eu-west-1", + }, + { + "RegionName": "eu-west-2", + }, + { + "RegionName": "eu-west-3", + }, + { + "RegionName": "me-central-1", + }, + { + "RegionName": "me-south-1", + }, + { + "RegionName": "sa-east-1", + }, + { + "RegionName": "us-east-1", + }, + { + "RegionName": "us-east-2", + }, + { + "RegionName": "us-gov-east-1", + }, + { + "RegionName": "us-gov-west-1", + }, + { + "RegionName": "us-west-1", + }, + { + "RegionName": "us-west-2", + }, + ] + } + + +_ec2_client_mock = mock.MagicMock() +_ec2_client_mock.describe_regions = _describe_regions + +_sqs_client_mock = mock.MagicMock() +_sqs_client_mock.get_queue_url = _get_queue_url_mock +_sqs_client_mock.send_message = _send_message + + +_s3_client_mock = mock.MagicMock() + + +_s3_client_mock.config_content = ( + b"inputs:\n" + b" - type: s3-sqs\n" + b" id: arn:aws:sqs:eu-central-1:123456789:s3-sqs-queue\n" + b" outputs:\n" + b" - type: elasticsearch\n" + b" args:\n" + b" cloud_id: cloud_id:bG9jYWxob3N0OjkyMDAkMA==\n" + b" api_key: api_key\n" + b" - type: logstash\n" + b" args:\n" + b" logstash_url: logstash_url\n" + b" - type: cloudwatch-logs\n" + b" id: arn:aws:logs:eu-central-1:123456789:log-group:logGroup:log-stream:logStream\n" + b" outputs:\n" + b" - type: elasticsearch\n" + b" args:\n" + b" cloud_id: cloud_id:bG9jYWxob3N0OjkyMDAkMA==\n" + b" api_key: api_key\n" + b" - type: logstash\n" + b" args:\n" + b" logstash_url: logstash_url\n" + b" - type: sqs\n" + b" id: arn:aws:sqs:eu-central-1:123456789:sqs-queue\n" + b" outputs:\n" + b" - type: elasticsearch\n" + b" args:\n" + b" cloud_id: cloud_id:bG9jYWxob3N0OjkyMDAkMA==\n" + b" api_key: api_key\n" + b" - type: logstash\n" + b" args:\n" + b" logstash_url: logstash_url\n" + b" - type: dummy\n" + b" id: arn:aws:dummy:eu-central-1:123456789:input\n" + b" outputs:\n" + b" - type: elasticsearch\n" + b" args:\n" + b" cloud_id: cloud_id:bG9jYWxob3N0OjkyMDAkMA==\n" + b" api_key: api_key\n" + b" - type: logstash\n" + b" args:\n" + b" logstash_url: logstash_url\n" + b" - type: s3-sqs\n" + b" id: arn:aws:sqs:eu-central-1:123456789:s3-sqs-queue-with-dummy-output\n" + b" outputs:\n" + b" - type: output_type\n" + b" args:\n" + b" output_arg: output_arg" +) + + +def _head_object(Bucket: str, Key: str) -> dict[str, Any]: + return {"ContentType": "ContentType", "ContentLength": 0} + + +def _get_object(Bucket: str, Key: str, Range: str) -> dict[str, Any]: + content = _s3_client_mock.config_content + content_body = BytesIO(content) + content_length = len(content) + return {"Body": StreamingBody(content_body, content_length), "ContentLength": content_length} + + +def _download_fileobj(Bucket: str, Key: str, Fileobj: BytesIO) -> None: + if Key == "please raise": + raise Exception("raised") + + +_s3_client_mock.head_object = _head_object +_s3_client_mock.download_fileobj = _download_fileobj +_s3_client_mock.get_object = _get_object + + +def _apm_capture_serverless() -> Any: + def wrapper(func: Any) -> Any: + def decorated(*args: Any, **kwds: Any) -> Any: + return func(*args, **kwds) + + return decorated + + return wrapper + + +def reload_handlers_aws_handler() -> None: + os.environ["ELASTIC_APM_ACTIVE"] = "ELASTIC_APM_ACTIVE" + os.environ["AWS_LAMBDA_FUNCTION_NAME"] = "AWS_LAMBDA_FUNCTION_NAME" + + from handlers.aws.utils import get_ec2_client, get_sqs_client + + os.environ["AWS_DEFAULT_REGION"] = "us-east-1" + _ = get_sqs_client() + _ = get_ec2_client() + + mock.patch("handlers.aws.utils.get_sqs_client", lambda: _sqs_client_mock).start() + mock.patch("handlers.aws.utils.get_ec2_client", lambda: _ec2_client_mock).start() + + handlers_aws_handler = sys.modules["handlers.aws.handler"] + importlib.reload(handlers_aws_handler) + + +def revert_handlers_aws_handler() -> None: + if "AWS_DEFAULT_REGION" in os.environ: + del os.environ["AWS_DEFAULT_REGION"] + + if "ELASTIC_APM_ACTIVE" in os.environ: + del os.environ["ELASTIC_APM_ACTIVE"] + + if "AWS_LAMBDA_FUNCTION_NAME" in os.environ: + del os.environ["AWS_LAMBDA_FUNCTION_NAME"] + + handlers_aws_handler = sys.modules["handlers.aws.handler"] + importlib.reload(handlers_aws_handler) + + +@pytest.mark.unit +class TestLambdaHandlerNoop(TestCase): + @mock.patch("share.config._available_output_types", new=["elasticsearch", "logstash", "output_type"]) + @mock.patch( + "share.config._available_input_types", new=["cloudwatch-logs", "s3-sqs", "sqs", "kinesis-data-stream", "dummy"] + ) + @mock.patch("storage.S3Storage._s3_client", _s3_client_mock) + @mock.patch("handlers.aws.utils.apm_capture_serverless", _apm_capture_serverless) + @mock.patch( + "handlers.aws.utils._available_triggers", + new={"aws:s3": "s3-sqs", "aws:sqs": "sqs", "aws:kinesis": "kinesis-data-stream", "dummy": "s3-sqs"}, + ) + def test_lambda_handler_noop(self) -> None: + reload_handlers_aws_handler() + + with self.subTest("no originalEventSourceARN in messageAttributes"): + ctx = ContextMock() + os.environ["S3_CONFIG_FILE"] = "s3://s3_config_file_bucket/s3_config_file_object_key" + lambda_event = deepcopy(_dummy_lambda_event) + del lambda_event["Records"][0]["messageAttributes"]["originalEventSourceARN"] + assert handler(lambda_event, ctx) == "completed" # type:ignore + + with self.subTest("no input defined for cloudwatch_logs"): + ctx = ContextMock() + os.environ["S3_CONFIG_FILE"] = "s3://s3_config_file_bucket/s3_config_file_object_key" + lambda_event = { + "awslogs": { + "data": json_dumper( + {"logGroup": "logGroup", "logStream": "logStream", "owner": "123456789", "logEvents": []} + ) + } + } + assert handler(lambda_event, ctx) == "completed" # type:ignore + + with self.subTest("output not elasticsearch from payload config"): + with mock.patch( + "handlers.aws.handler.get_shipper_for_replay_event", + lambda config, output_type, output_args, event_input_id, replay_handler: None, + ): + ctx = ContextMock() + event = { + "Records": [ + { + "eventSourceARN": "arn:aws:sqs:eu-central-1:123456789:replay-queue", + "receiptHandle": "receiptHandle", + "body": '{"output_type": "output_type", "output_args": {},' + '"event_input_id": "arn:aws:sqs:eu-central-1:123456789:s3-sqs-queue", ' + '"event_payload": {"_id": "_id"}}', + } + ] + } + assert handler(event, ctx) == "replayed" # type:ignore + + with self.subTest("no input defined for cloudwatch_logs in continuing queue"): + ctx = ContextMock() + os.environ["S3_CONFIG_FILE"] = "s3://s3_config_file_bucket/s3_config_file_object_key" + lambda_event = deepcopy(_dummy_lambda_event) + lambda_event["Records"][0]["messageAttributes"]["originalEventSourceARN"] = { + "stringValue": "arn:aws:logs:eu-central-1:123456789:log-group:test-not-existing-esf-loggroup:*" + } + assert handler(lambda_event, ctx) == "completed" # type:ignore + + with self.subTest("no output type elasticsearch in continuing queue"): + ctx = ContextMock() + os.environ["S3_CONFIG_FILE"] = "s3://s3_config_file_bucket/s3_config_file_object_key" + lambda_event = deepcopy(_dummy_lambda_event) + lambda_event["Records"][0][ + "eventSourceARN" + ] = "arn:aws:sqs:eu-central-1:123456789:s3-sqs-queue-with-dummy-output" + del lambda_event["Records"][0]["messageAttributes"]["originalEventSourceARN"] + assert handler(lambda_event, ctx) == "completed" # type:ignore + + with self.subTest("no input type for output type elasticsearch in continuing queue"): + ctx = ContextMock() + os.environ["S3_CONFIG_FILE"] = "s3://s3_config_file_bucket/s3_config_file_object_key" + lambda_event = deepcopy(_dummy_lambda_event) + lambda_event["Records"][0]["eventSource"] = "dummy" + lambda_event["Records"][0]["eventSourceARN"] = "arn:aws:dummy:eu-central-1:123456789:input" + del lambda_event["Records"][0]["messageAttributes"]["originalEventSourceARN"] + assert handler(lambda_event, ctx) == "completed" # type:ignore + + with self.subTest("no input defined for kinesis-data-stream"): + ctx = ContextMock() + os.environ["S3_CONFIG_FILE"] = "s3://s3_config_file_bucket/s3_config_file_object_key" + lambda_event = { + "Records": [ + { + "eventSource": "aws:kinesis", + "kinesis": {"data": ""}, + "eventSourceARN": "arn:aws:kinesis:eu-central-1:123456789:stream/test-esf-kinesis-stream", + } + ] + } + assert handler(lambda_event, ctx) == "completed" # type:ignore + + with self.subTest("body is neither replay queue nor s3-sqs"): + ctx = ContextMock() + os.environ["S3_CONFIG_FILE"] = "s3://s3_config_file_bucket/s3_config_file_object_key" + os.environ["SQS_REPLAY_URL"] = "https://sqs.us-east-2.amazonaws.com/123456789012/replay_queue" + os.environ["SQS_CONTINUE_URL"] = "https://sqs.us-east-2.amazonaws.com/123456789012/continue_queue" + lambda_event = deepcopy(_dummy_lambda_event) + lambda_event["Records"][0]["body"] = json_dumper({"Records": [{"key": "value"}]}) + lambda_event["Records"][0]["eventSourceARN"] = "arn:aws:sqs:eu-central-1:123456789:sqs-queue" + del lambda_event["Records"][0]["messageAttributes"]["originalEventSourceARN"] + assert handler(lambda_event, ctx) == "completed" # type:ignore + + with self.subTest("raising cannot find cloudwatch_logs ARN"): + ctx = ContextMock() + os.environ["S3_CONFIG_FILE"] = "s3://s3_config_file_bucket/s3_config_file_object_key" + lambda_event = { + "awslogs": { + "data": json_dumper( + {"logGroup": "logGroup", "logStream": "logStreamNotMatching", "owner": "owner", "logEvents": []} + ) + } + } + + assert handler(lambda_event, ctx) == "completed" # type:ignore + + with self.subTest("raising unexpected exception"): + ctx = ContextMock() + lambda_event = deepcopy(_dummy_lambda_event) + lambda_event_body = json_parser(lambda_event["Records"][0]["body"]) + lambda_event_body["Records"][0]["s3"]["object"]["key"] = "please raise" + + lambda_event["Records"][0]["body"] = json_dumper(lambda_event_body) + + assert handler(lambda_event, ctx) == "exception raised: Exception('raised')" # type:ignore + + with self.subTest("raising unexpected exception apm client not None"): + with mock.patch("handlers.aws.utils.get_apm_client", lambda: mock.MagicMock()): + ctx = ContextMock() + lambda_event = deepcopy(_dummy_lambda_event) + lambda_event_body = json_parser(lambda_event["Records"][0]["body"]) + lambda_event_body["Records"][0]["s3"]["object"]["key"] = "please raise" + + lambda_event["Records"][0]["body"] = json_dumper(lambda_event_body) + + assert handler(lambda_event, ctx) == "exception raised: Exception('raised')" # type:ignore + + +@pytest.mark.unit +class TestLambdaHandlerFailure(TestCase): + def setUp(self) -> None: + revert_handlers_aws_handler() + + @mock.patch("share.config._available_output_types", new=["elasticsearch", "logstash", "output_type"]) + @mock.patch( + "share.config._available_input_types", new=["cloudwatch-logs", "s3-sqs", "sqs", "kinesis-data-stream", "dummy"] + ) + @mock.patch("share.secretsmanager._get_aws_sm_client", new=MockContent._get_aws_sm_client) + @mock.patch("handlers.aws.utils.get_ec2_client", lambda: _ec2_client_mock) + @mock.patch("handlers.aws.handler.get_sqs_client", lambda: _sqs_client_mock) + @mock.patch("storage.S3Storage._s3_client", _s3_client_mock) + def test_lambda_handler_failure(self) -> None: + dummy_event: dict[str, Any] = { + "Records": [ + { + "eventSource": "aws:sqs", + "eventSourceARN": "arn:aws:sqs", + }, + ] + } + + with self.subTest("output not in config from replay payload body"): + os.environ["S3_CONFIG_FILE"] = "s3://s3_config_file_bucket/s3_config_file_object_key" + event = { + "Records": [ + { + "eventSourceARN": "arn:aws:sqs:eu-central-1:123456789:replay-queue", + "receiptHandle": "receiptHandle", + "body": '{"output_type": "output_type", "output_args": {},' + '"event_input_id": "arn:aws:dummy:eu-central-1:123456789:input", ' + '"event_payload": {"_id": "_id"}}', + } + ] + } + with self.assertRaisesRegex(OutputConfigException, "Cannot load output of type output_type"): + ctx = ContextMock() + + handler(event, ctx) # type:ignore + + with self.subTest("input not in config from replay payload body"): + os.environ["S3_CONFIG_FILE"] = "s3://s3_config_file_bucket/s3_config_file_object_key" + event = { + "Records": [ + { + "eventSourceARN": "arn:aws:sqs:eu-central-1:123456789:replay-queue", + "receiptHandle": "receiptHandle", + "body": '{"output_type": "output_type", "output_args": {},' + '"event_input_id": "arn:aws:dummy:eu-central-1:123456789:not-existing-input", ' + '"event_payload": {"_id": "_id"}}', + } + ] + } + with self.assertRaisesRegex( + InputConfigException, + "Cannot load input for input id arn:aws:dummy:eu-central-1:123456789:not-existing-input", + ): + ctx = ContextMock() + + handler(event, ctx) # type:ignore + + with self.subTest("empty config"): + os.environ["S3_CONFIG_FILE"] = "s3://s3_config_file_bucket/s3_config_file_object_key" + with self.assertRaisesRegex(ConfigFileException, "Empty config"): + ctx = ContextMock() + _s3_client_mock.config_content = b"" + + handler(dummy_event, ctx) # type:ignore + + with self.subTest("Invalid s3 uri apm client not None"): + with mock.patch("handlers.aws.utils.get_apm_client", lambda: mock.MagicMock()): + with self.assertRaisesRegex(ConfigFileException, "Invalid s3 uri provided: ``"): + os.environ["S3_CONFIG_FILE"] = "" + ctx = ContextMock() + + handler(dummy_event, ctx) # type:ignore + + with self.subTest("Invalid s3 uri"): + with self.assertRaisesRegex(ConfigFileException, "Invalid s3 uri provided: ``"): + os.environ["S3_CONFIG_FILE"] = "" + ctx = ContextMock() + + handler(dummy_event, ctx) # type:ignore + + with self.subTest("Invalid s3 uri no bucket and key"): + with self.assertRaisesRegex(ConfigFileException, "Invalid s3 uri provided: `s3://`"): + os.environ["S3_CONFIG_FILE"] = "s3://" + ctx = ContextMock() + + handler(dummy_event, ctx) # type:ignore + + with self.subTest("Invalid s3 uri no key"): + with self.assertRaisesRegex(ConfigFileException, "Invalid s3 uri provided: `s3://bucket`"): + os.environ["S3_CONFIG_FILE"] = "s3://bucket" + ctx = ContextMock() + + handler(dummy_event, ctx) # type:ignore + + with self.subTest("no Records in event"): + with self.assertRaisesRegex(TriggerTypeException, "Not supported trigger"): + ctx = ContextMock() + event = {} + + handler(event, ctx) # type:ignore + + with self.subTest("empty Records in event"): + with self.assertRaisesRegex(TriggerTypeException, "Not supported trigger"): + ctx = ContextMock() + event = {"Records": []} + + handler(event, ctx) # type:ignore + + with self.subTest("no eventSource in Records in event"): + with self.assertRaisesRegex(TriggerTypeException, "Not supported trigger"): + ctx = ContextMock() + event = {"Records": [{}]} + + handler(event, ctx) # type:ignore + + with self.subTest("no valid eventSource in Records in event"): + with self.assertRaisesRegex(TriggerTypeException, "Not supported trigger"): + ctx = ContextMock() + event = {"Records": [{"eventSource": "invalid"}]} + + handler(event, ctx) # type:ignore + + with self.subTest("no eventSource in body Records in event"): + with self.assertRaisesRegex(TriggerTypeException, "Not supported trigger"): + ctx = ContextMock() + event = {"Records": [{"body": ""}]} + + handler(event, ctx) # type:ignore + + with self.subTest("no valid eventSource in body Records in event"): + with self.assertRaisesRegex(TriggerTypeException, "Not supported trigger"): + ctx = ContextMock() + event = {"Records": [{"body": "", "eventSource": "invalid"}]} + + handler(event, ctx) # type:ignore + + with self.subTest("replay event loads config from s3"): + with self.assertRaisesRegex(ConfigFileException, "Invalid s3 uri provided: `s3://bucket`"): + ctx = ContextMock() + event = { + "Records": [ + { + "body": '{"output_type": "", "output_args": "", "event_payload": ""}', + } + ] + } + handler(event, ctx) # type:ignore + + with self.subTest("invalid secretsmanager: arn format too long"): + os.environ["S3_CONFIG_FILE"] = "s3://s3_config_file_bucket/s3_config_file_object_key" + with self.assertRaisesRegex( + ConfigFileException, + "Invalid arn format: " + "arn:aws:secretsmanager:eu-central-1:123456789:secret:plain_secret:THIS:IS:INVALID", + ): + ctx = ContextMock() + _s3_client_mock.config_content = b""" + inputs: + - type: "s3-sqs" + id: "arn:aws:secretsmanager:eu-central-1:123456789:secret:plain_secret:THIS:IS:INVALID" + outputs: + - type: "elasticsearch" + args: + elasticsearch_url: "arn:aws:secretsmanager:eu-central-1:123456789:secret:es_secrets:url" + username: "arn:aws:secretsmanager:eu-central-1:123456789:secret:es_secrets:username" + password: "arn:aws:secretsmanager:eu-central-1:123456789:secret:es_secrets:password" + es_datastream_name: "logs-redis.log-default" + """ + + event = deepcopy(dummy_event) + + handler(event, ctx) # type:ignore + + with self.subTest("invalid secretsmanager: empty region"): + os.environ["S3_CONFIG_FILE"] = "s3://s3_config_file_bucket/s3_config_file_object_key" + with self.assertRaisesRegex( + ConfigFileException, + "Must be provided region in arn: " "arn:aws:secretsmanager::123456789:secret:plain_secret", + ): + ctx = ContextMock() + # BEWARE region is empty at id + _s3_client_mock.config_content = b""" + inputs: + - type: "s3-sqs" + id: "arn:aws:secretsmanager::123456789:secret:plain_secret" + outputs: + - type: "elasticsearch" + args: + elasticsearch_url: "arn:aws:secretsmanager:eu-central-1:123456789:secret:es_secrets" + username: "arn:aws:secretsmanager:eu-central-1:123456789:secret:es_secrets:username" + password: "arn:aws:secretsmanager:eu-central-1:123456789:secret:es_secrets:password" + es_datastream_name: "logs-redis.log-default" + """ + + event = deepcopy(dummy_event) + + handler(event, ctx) # type:ignore + + with self.subTest("invalid secretsmanager: empty secrets manager name"): + os.environ["S3_CONFIG_FILE"] = "s3://s3_config_file_bucket/s3_config_file_object_key" + with self.assertRaisesRegex( + ConfigFileException, + "Must be provided secrets manager name in arn: " + "arn:aws:secretsmanager:eu-central-1:123456789:secret:", + ): + ctx = ContextMock() + # BEWARE empty secrets manager name at id + _s3_client_mock.config_content = b""" + inputs: + - type: "s3-sqs" + id: "arn:aws:secretsmanager:eu-central-1:123456789:secret:" + outputs: + - type: "elasticsearch" + args: + elasticsearch_url: "arn:aws:secretsmanager:eu-central-1:123456789:secret:es_secrets" + username: "arn:aws:secretsmanager:eu-central-1:123456789:secret:es_secrets:username" + password: "arn:aws:secretsmanager:eu-central-1:123456789:secret:es_secrets:password" + es_datastream_name: "logs-redis.log-default" + """ + + event = deepcopy(dummy_event) + + handler(event, ctx) # type:ignore + + with self.subTest("invalid secretsmanager: cannot use both plain text and key/value pairs"): + os.environ["S3_CONFIG_FILE"] = "s3://s3_config_file_bucket/s3_config_file_object_key" + with self.assertRaisesRegex( + ConfigFileException, + "You cannot have both plain text and json key for the same " + "secret: arn:aws:secretsmanager:eu-central-1:123456789:secret:es_secrets:username", + ): + ctx = ContextMock() + # BEWARE using es_secrets plain text for elasticsearch_url and es_secrets:username for username + _s3_client_mock.config_content = b""" + inputs: + - type: "s3-sqs" + id: "arn:aws:secretsmanager:eu-central-1:123456789:secret:plain_secrets" + outputs: + - type: "elasticsearch" + args: + elasticsearch_url: "arn:aws:secretsmanager:eu-central-1:123456789:secret:es_secrets" + username: "arn:aws:secretsmanager:eu-central-1:123456789:secret:es_secrets:username" + password: "arn:aws:secretsmanager:eu-central-1:123456789:secret:es_secrets:password" + es_datastream_name: "logs-redis.log-default" + """ + + event = deepcopy(dummy_event) + + handler(event, ctx) # type:ignore + + with self.subTest("invalid secretsmanager: empty secret key"): + os.environ["S3_CONFIG_FILE"] = "s3://s3_config_file_bucket/s3_config_file_object_key" + with self.assertRaisesRegex( + ConfigFileException, + "Error for secret " + "arn:aws:secretsmanager:eu-central-1:123456789:secret:es_secrets:: key must " + "not be empty", + ): + ctx = ContextMock() + # BEWARE empty key at elasticsearch_url + _s3_client_mock.config_content = b""" + inputs: + - type: "s3-sqs" + id: "arn:aws:secretsmanager:eu-central-1:123456789:secret:plain_secret" + outputs: + - type: "elasticsearch" + args: + elasticsearch_url: "arn:aws:secretsmanager:eu-central-1:123456789:secret:es_secrets:" + username: "arn:aws:secretsmanager:eu-central-1:123456789:secret:es_secrets:username" + password: "arn:aws:secretsmanager:eu-central-1:123456789:secret:es_secrets:password" + es_datastream_name: "logs-redis.log-default" + """ + + event = deepcopy(dummy_event) + + handler(event, ctx) # type:ignore + + with self.subTest("invalid secretsmanager: secret does not exist"): + os.environ["S3_CONFIG_FILE"] = "s3://s3_config_file_bucket/s3_config_file_object_key" + with self.assertRaisesRegex( + ConfigFileException, + r"An error occurred \(ResourceNotFoundException\) when calling " + "the GetSecretValue operation: Secrets Manager can't find the specified secret.", + ): + ctx = ContextMock() + _s3_client_mock.config_content = b""" + inputs: + - type: "s3-sqs" + id: "arn:aws:secretsmanager:eu-central-1:123456789:secret:DOES_NOT_EXIST" + outputs: + - type: "elasticsearch" + args: + elasticsearch_url: "arn:aws:secretsmanager:eu-central-1:123456789:secret:es_secrets:url" + username: "arn:aws:secretsmanager:eu-central-1:123456789:secret:es_secrets:username" + password: "arn:aws:secretsmanager:eu-central-1:123456789:secret:es_secrets:password" + es_datastream_name: "logs-redis.log-default" + """ + + event = deepcopy(dummy_event) + + handler(event, ctx) # type:ignore + + with self.subTest("invalid secretsmanager: empty plain secret value"): + os.environ["S3_CONFIG_FILE"] = "s3://s3_config_file_bucket/s3_config_file_object_key" + with self.assertRaisesRegex( + ConfigFileException, + "Error for secret " + "arn:aws:secretsmanager:eu-central-1:123456789:secret:empty_secret: must " + "not be empty", + ): + ctx = ContextMock() + _s3_client_mock.config_content = b""" + inputs: + - type: "s3-sqs" + id: "arn:aws:secretsmanager:eu-central-1:123456789:secret:empty_secret" + outputs: + - type: "elasticsearch" + args: + elasticsearch_url: "arn:aws:secretsmanager:eu-central-1:123456789:secret:es_secrets:url" + username: "arn:aws:secretsmanager:eu-central-1:123456789:secret:es_secrets:username" + password: "arn:aws:secretsmanager:eu-central-1:123456789:secret:es_secrets:password" + es_datastream_name: "logs-redis.log-default" + """ + + event = deepcopy(dummy_event) + + handler(event, ctx) # type:ignore + + with self.subTest("invalid secretsmanager: empty key/value secret value"): + os.environ["S3_CONFIG_FILE"] = "s3://s3_config_file_bucket/s3_config_file_object_key" + with self.assertRaisesRegex( + ConfigFileException, + "Error for secret " + "arn:aws:secretsmanager:eu-central-1:123456789:secret:es_secrets:empty: must " + "not be empty", + ): + ctx = ContextMock() + _s3_client_mock.config_content = b""" + inputs: + - type: "s3-sqs" + id: "arn:aws:secretsmanager:eu-central-1:123456789:secret:es_secrets:empty" + outputs: + - type: "elasticsearch" + args: + elasticsearch_url: "arn:aws:secretsmanager:eu-central-1:123456789:secret:es_secrets:url" + username: "arn:aws:secretsmanager:eu-central-1:123456789:secret:es_secrets:username" + password: "arn:aws:secretsmanager:eu-central-1:123456789:secret:es_secrets:password" + es_datastream_name: "logs-redis.log-default" + """ + + event = deepcopy(dummy_event) + + handler(event, ctx) # type:ignore + + with self.subTest("invalid secretsmanager: plain text used as key/value"): + os.environ["S3_CONFIG_FILE"] = "s3://s3_config_file_bucket/s3_config_file_object_key" + with self.assertRaisesRegex( + ConfigFileException, + "Error for secret " + "arn:aws:secretsmanager:eu-central-1:123456789:secret:plain_secret:SHOULD_NOT_HAVE_A_KEY: " + "expected to be keys/values pair", + ): + ctx = ContextMock() + _s3_client_mock.config_content = b""" + inputs: + - type: "s3-sqs" + id: "arn:aws:secretsmanager:eu-central-1:123456789:secret:plain_secret:SHOULD_NOT_HAVE_A_KEY" + outputs: + - type: "elasticsearch" + args: + elasticsearch_url: "arn:aws:secretsmanager:eu-central-1:123456789:secret:es_secrets:url" + username: "arn:aws:secretsmanager:eu-central-1:123456789:secret:es_secrets:username" + password: "arn:aws:secretsmanager:eu-central-1:123456789:secret:es_secrets:password" + es_datastream_name: "logs-redis.log-default" + """ + + event = deepcopy(dummy_event) + + handler(event, ctx) # type:ignore + + with self.subTest("invalid secretsmanager: key does not exist in secret manager"): + os.environ["S3_CONFIG_FILE"] = "s3://s3_config_file_bucket/s3_config_file_object_key" + with self.assertRaisesRegex( + ConfigFileException, + "Error for secret " + "arn:aws:secretsmanager:eu-central-1:123456789:secret:es_secrets:I_DO_NOT_EXIST: " + "key not found", + ): + ctx = ContextMock() + _s3_client_mock.config_content = b""" + inputs: + - type: "s3-sqs" + id: "arn:aws:secretsmanager:eu-central-1:123456789:secret:es_secrets:I_DO_NOT_EXIST" + outputs: + - type: "elasticsearch" + args: + elasticsearch_url: "arn:aws:secretsmanager:eu-central-1:123456789:secret:es_secrets:url" + username: "arn:aws:secretsmanager:eu-central-1:123456789:secret:es_secrets:username" + password: "arn:aws:secretsmanager:eu-central-1:123456789:secret:es_secrets:password" + es_datastream_name: "logs-redis.log-default" + """ + + event = deepcopy(dummy_event) + + handler(event, ctx) # type:ignore + + with self.subTest("invalid secretsmanager: plain text secret not str"): + os.environ["S3_CONFIG_FILE"] = "s3://s3_config_file_bucket/s3_config_file_object_key" + with self.assertRaisesRegex( + ConfigFileException, + "Error for secret " + "arn:aws:secretsmanager:eu-central-1:123456789:secret:plain_secret_not_str_byte: " + "expected to be a string", + ): + ctx = ContextMock() + _s3_client_mock.config_content = b""" + inputs: + - type: "s3-sqs" + id: "arn:aws:secretsmanager:eu-central-1:123456789:secret:plain_secret_not_str_byte" + outputs: + - type: "elasticsearch" + args: + elasticsearch_url: "arn:aws:secretsmanager:eu-central-1:123456789:secret:es_secrets:url" + username: "arn:aws:secretsmanager:eu-central-1:123456789:secret:es_secrets:username" + password: "arn:aws:secretsmanager:eu-central-1:123456789:secret:es_secrets:password" + es_datastream_name: "logs-redis.log-default" + """ + + event = deepcopy(dummy_event) + + handler(event, ctx) # type:ignore + + with self.subTest("invalid secretsmanager: json TypeError raised"): + os.environ["S3_CONFIG_FILE"] = "s3://s3_config_file_bucket/s3_config_file_object_key" + with self.assertRaisesRegex( + ConfigFileException, + "Error for secret " + "arn:aws:secretsmanager:eu-central-1:123456789:secret:plain_secret_not_str_int: " + "expected to be a string", + ): + ctx = ContextMock() + _s3_client_mock.config_content = b""" + inputs: + - type: "s3-sqs" + id: "arn:aws:secretsmanager:eu-central-1:123456789:secret:plain_secret_not_str_int" + outputs: + - type: "elasticsearch" + args: + elasticsearch_url: "arn:aws:secretsmanager:eu-central-1:123456789:secret:es_secrets:url" + username: "arn:aws:secretsmanager:eu-central-1:123456789:secret:es_secrets:username" + password: "arn:aws:secretsmanager:eu-central-1:123456789:secret:es_secrets:password" + es_datastream_name: "logs-redis.log-default" + """ + + event = deepcopy(dummy_event) + + handler(event, ctx) # type:ignore + + with self.subTest("tags not list"): + os.environ["S3_CONFIG_FILE"] = "s3://s3_config_file_bucket/s3_config_file_object_key" + with self.assertRaisesRegex( + ConfigFileException, "`tags` must be provided as list for input mock_plain_text_sqs_arn" + ): + ctx = ContextMock() + _s3_client_mock.config_content = b""" + inputs: + - type: "s3-sqs" + id: "arn:aws:secretsmanager:eu-central-1:123456789:secret:plain_secret" + tags: "tag1" + outputs: + - type: "elasticsearch" + args: + elasticsearch_url: "arn:aws:secretsmanager:eu-central-1:123456789:secret:es_secrets:url" + username: "arn:aws:secretsmanager:eu-central-1:123456789:secret:es_secrets:username" + password: "arn:aws:secretsmanager:eu-central-1:123456789:secret:es_secrets:password" + es_datastream_name: "logs-redis.log-default" + """ + + event = deepcopy(dummy_event) + + handler(event, ctx) # type:ignore + + with self.subTest("each tag must be of type str"): + os.environ["S3_CONFIG_FILE"] = "s3://s3_config_file_bucket/s3_config_file_object_key" + with self.assertRaisesRegex( + ConfigFileException, + r"Each tag in `tags` must be provided as string for input " + r"mock_plain_text_sqs_arn, given: \['tag1', 2, 'tag3'\]", + ): + ctx = ContextMock() + _s3_client_mock.config_content = b""" + inputs: + - type: "s3-sqs" + id: "arn:aws:secretsmanager:eu-central-1:123456789:secret:plain_secret" + tags: + - "tag1" + - 2 + - "tag3" + outputs: + - type: "elasticsearch" + args: + elasticsearch_url: "arn:aws:secretsmanager:eu-central-1:123456789:secret:es_secrets:url" + username: "arn:aws:secretsmanager:eu-central-1:123456789:secret:es_secrets:username" + password: "arn:aws:secretsmanager:eu-central-1:123456789:secret:es_secrets:password" + es_datastream_name: "logs-redis.log-default" + """ + + event = deepcopy(dummy_event) + + handler(event, ctx) # type:ignore + + with self.subTest("expand_event_list_from_field not str"): + os.environ["S3_CONFIG_FILE"] = "s3://s3_config_file_bucket/s3_config_file_object_key" + with self.assertRaisesRegex( + ConfigFileException, + "`expand_event_list_from_field` must be provided as string for input mock_plain_text_sqs_arn", + ): + ctx = ContextMock() + _s3_client_mock.config_content = b""" + inputs: + - type: "s3-sqs" + id: "arn:aws:secretsmanager:eu-central-1:123456789:secret:plain_secret" + expand_event_list_from_field: 0 + outputs: + - type: "elasticsearch" + args: + elasticsearch_url: "arn:aws:secretsmanager:eu-central-1:123456789:secret:es_secrets:url" + username: "arn:aws:secretsmanager:eu-central-1:123456789:secret:es_secrets:username" + password: "arn:aws:secretsmanager:eu-central-1:123456789:secret:es_secrets:password" + es_datastream_name: "logs-redis.log-default" + """ + + event = deepcopy(dummy_event) + + handler(event, ctx) # type:ignore + + with self.subTest("root_fields_to_add_to_expanded_event not `all` when string"): + os.environ["S3_CONFIG_FILE"] = "s3://s3_config_file_bucket/s3_config_file_object_key" + with self.assertRaisesRegex( + ConfigFileException, + "`root_fields_to_add_to_expanded_event` must be provided as `all` or a list of strings", + ): + ctx = ContextMock() + _s3_client_mock.config_content = b""" + inputs: + - type: "s3-sqs" + id: "arn:aws:secretsmanager:eu-central-1:123456789:secret:plain_secret" + root_fields_to_add_to_expanded_event: not_all + outputs: + - type: "elasticsearch" + args: + elasticsearch_url: "arn:aws:secretsmanager:eu-central-1:123456789:secret:es_secrets:url" + username: "arn:aws:secretsmanager:eu-central-1:123456789:secret:es_secrets:username" + password: "arn:aws:secretsmanager:eu-central-1:123456789:secret:es_secrets:password" + es_datastream_name: "logs-redis.log-default" + """ + + event = deepcopy(dummy_event) + + handler(event, ctx) # type:ignore + + with self.subTest("root_fields_to_add_to_expanded_event not `all` neither list of strings"): + os.environ["S3_CONFIG_FILE"] = "s3://s3_config_file_bucket/s3_config_file_object_key" + with self.assertRaisesRegex( + ConfigFileException, + "`root_fields_to_add_to_expanded_event` must be provided as `all` or a list of strings", + ): + ctx = ContextMock() + _s3_client_mock.config_content = b""" + inputs: + - type: "s3-sqs" + id: "arn:aws:secretsmanager:eu-central-1:123456789:secret:plain_secret" + root_fields_to_add_to_expanded_event: 0 + outputs: + - type: "elasticsearch" + args: + elasticsearch_url: "arn:aws:secretsmanager:eu-central-1:123456789:secret:es_secrets:url" + username: "arn:aws:secretsmanager:eu-central-1:123456789:secret:es_secrets:username" + password: "arn:aws:secretsmanager:eu-central-1:123456789:secret:es_secrets:password" + es_datastream_name: "logs-redis.log-default" + """ + + event = deepcopy(dummy_event) + + handler(event, ctx) # type:ignore + + with self.subTest("json_content_type not valid"): + os.environ["S3_CONFIG_FILE"] = "s3://s3_config_file_bucket/s3_config_file_object_key" + with self.assertRaisesRegex( + ConfigFileException, + "`json_content_type` must be one of ndjson,single,disabled " + "for input mock_plain_text_sqs_arn: whatever given", + ): + ctx = ContextMock() + _s3_client_mock.config_content = b""" + inputs: + - type: "s3-sqs" + id: "arn:aws:secretsmanager:eu-central-1:123456789:secret:plain_secret" + json_content_type: whatever + outputs: + - type: "elasticsearch" + args: + elasticsearch_url: "arn:aws:secretsmanager:eu-central-1:123456789:secret:es_secrets:url" + username: "arn:aws:secretsmanager:eu-central-1:123456789:secret:es_secrets:username" + password: "arn:aws:secretsmanager:eu-central-1:123456789:secret:es_secrets:password" + es_datastream_name: "logs-redis.log-default" + """ + + event = deepcopy(dummy_event) + + handler(event, ctx) # type:ignore diff --git a/tests/handlers/aws/test_integrations.py b/tests/handlers/aws/test_integrations.py index 094b61fe..bf2a632c 100644 --- a/tests/handlers/aws/test_integrations.py +++ b/tests/handlers/aws/test_integrations.py @@ -324,352 +324,3681 @@ def test_continuing(self) -> None: assert isinstance(self.logstash, LogstashContainer) assert isinstance(self.localstack, LocalStackContainer) -# fixtures = [ -# _load_file_fixture("cloudwatch-log-1.json"), -# _load_file_fixture("cloudwatch-log-2.json"), -# ] -# -# s3_bucket_name = _time_based_id(suffix="test-bucket") -# first_filename = "exportedlog/uuid/yyyy-mm-dd-[$LATEST]hash/000000.gz" -# _s3_upload_content_to_bucket( -# client=self.s3_client, -# content=gzip.compress("".join(fixtures).encode("utf-8")), -# content_type="application/x-gzip", -# bucket_name=s3_bucket_name, -# key=first_filename, -# ) -# -# cloudwatch_group_name = _time_based_id(suffix="source-group") -# cloudwatch_group = _logs_create_cloudwatch_logs_group(self.logs_client, group_name=cloudwatch_group_name) -# -# cloudwatch_stream_name = _time_based_id(suffix="source-stream") -# _logs_create_cloudwatch_logs_stream( -# self.logs_client, group_name=cloudwatch_group_name, stream_name=cloudwatch_stream_name -# ) -# -# _logs_upload_event_to_cloudwatch_logs( -# self.logs_client, -# group_name=cloudwatch_group_name, -# stream_name=cloudwatch_stream_name, -# messages_body=["".join(fixtures)], -# ) -# -# cloudwatch_group_arn = cloudwatch_group["arn"] -# -# cloudwatch_group_name = cloudwatch_group_name -# cloudwatch_stream_name = cloudwatch_stream_name -# -# sqs_queue_name = _time_based_id(suffix="source-sqs") -# s3_sqs_queue_name = _time_based_id(suffix="source-s3-sqs") -# -# sqs_queue = _sqs_create_queue(self.sqs_client, sqs_queue_name, self.localstack.get_url()) -# s3_sqs_queue = _sqs_create_queue(self.sqs_client, s3_sqs_queue_name, self.localstack.get_url()) -# -# sqs_queue_arn = sqs_queue["QueueArn"] -# sqs_queue_url = sqs_queue["QueueUrl"] -# sqs_queue_url_path = sqs_queue["QueueUrlPath"] -# -# s3_sqs_queue_arn = s3_sqs_queue["QueueArn"] -# s3_sqs_queue_url = s3_sqs_queue["QueueUrl"] -# -# _sqs_send_messages(self.sqs_client, sqs_queue_url, "".join(fixtures)) -# _sqs_send_s3_notifications(self.sqs_client, s3_sqs_queue_url, s3_bucket_name, [first_filename]) -# -# kinesis_stream_name = _time_based_id(suffix="source-kinesis") -# kinesis_stream = _kinesis_create_stream(self.kinesis_client, kinesis_stream_name) -# kinesis_stream_arn = kinesis_stream["StreamDescription"]["StreamARN"] -# -# _kinesis_put_records(self.kinesis_client, kinesis_stream_name, ["".join(fixtures)]) -# -# config_yaml: str = f""" -# inputs: -# - type: "kinesis-data-stream" -# id: "{kinesis_stream_arn}" -# tags: {self.default_tags} -# outputs: {self.default_outputs} -# - type: "cloudwatch-logs" -# id: "{cloudwatch_group_arn}" -# tags: {self.default_tags} -# outputs: {self.default_outputs} -# - type: sqs -# id: "{sqs_queue_arn}" -# tags: {self.default_tags} -# outputs: {self.default_outputs} -# - type: s3-sqs -# id: "{s3_sqs_queue_arn}" -# tags: {self.default_tags} -# outputs: {self.default_outputs} -# """ -# -# config_file_path = "config.yaml" -# config_bucket_name = _time_based_id(suffix="config-bucket") -# _s3_upload_content_to_bucket( -# client=self.s3_client, -# content=config_yaml.encode("utf-8"), -# content_type="text/plain", -# bucket_name=config_bucket_name, -# key=config_file_path, -# ) -# -# os.environ["S3_CONFIG_FILE"] = f"s3://{config_bucket_name}/{config_file_path}" -# -# events_s3, _ = _sqs_get_messages(self.sqs_client, s3_sqs_queue_url, s3_sqs_queue_arn) -# -# events_sqs, _ = _sqs_get_messages(self.sqs_client, sqs_queue_url, sqs_queue_arn) -# -# message_id = events_sqs["Records"][0]["messageId"] -# -# events_cloudwatch_logs, event_ids_cloudwatch_logs, _ = _logs_retrieve_event_from_cloudwatch_logs( -# self.logs_client, cloudwatch_group_name, cloudwatch_stream_name -# ) -# -# events_kinesis, _ = _kinesis_retrieve_event_from_kinesis_stream( -# self.kinesis_client, kinesis_stream_name, kinesis_stream_arn -# ) -# -# ctx = ContextMock() -# first_call = handler(events_s3, ctx) # type:ignore -# -# assert first_call == "continuing" -# -# self.elasticsearch.refresh(index="logs-generic-default") -# assert self.elasticsearch.count(index="logs-generic-default")["count"] == 1 -# -# res = self.elasticsearch.search(index="logs-generic-default", sort="_seq_no") -# assert res["hits"]["total"] == {"value": 1, "relation": "eq"} -# -# assert res["hits"]["hits"][0]["_source"]["message"] == fixtures[0].rstrip("\n") -# assert res["hits"]["hits"][0]["_source"]["log"]["offset"] == 0 -# assert ( -# res["hits"]["hits"][0]["_source"]["log"]["file"]["path"] -# == f"https://{s3_bucket_name}.s3.eu-central-1.amazonaws.com/{first_filename}" -# ) -# assert res["hits"]["hits"][0]["_source"]["aws"]["s3"]["bucket"]["name"] == s3_bucket_name -# assert res["hits"]["hits"][0]["_source"]["aws"]["s3"]["bucket"]["arn"] == f"arn:aws:s3:::{s3_bucket_name}" -# assert res["hits"]["hits"][0]["_source"]["aws"]["s3"]["object"]["key"] == first_filename -# assert res["hits"]["hits"][0]["_source"]["cloud"]["provider"] == "aws" -# assert res["hits"]["hits"][0]["_source"]["cloud"]["region"] == "eu-central-1" -# assert res["hits"]["hits"][0]["_source"]["cloud"]["account"]["id"] == "000000000000" -# assert res["hits"]["hits"][0]["_source"]["tags"] == ["forwarded", "generic", "tag1", "tag2", "tag3"] -# -# logstash_message = self.logstash.get_messages(expected=1) -# assert len(logstash_message) == 1 -# res["hits"]["hits"][0]["_source"]["tags"].remove("generic") -# assert res["hits"]["hits"][0]["_source"]["aws"] == logstash_message[0]["aws"] -# assert res["hits"]["hits"][0]["_source"]["cloud"] == logstash_message[0]["cloud"] -# assert res["hits"]["hits"][0]["_source"]["log"] == logstash_message[0]["log"] -# assert res["hits"]["hits"][0]["_source"]["message"] == logstash_message[0]["message"] -# assert res["hits"]["hits"][0]["_source"]["tags"] == logstash_message[0]["tags"] -# -# second_call = handler(events_sqs, ctx) # type:ignore -# -# assert second_call == "continuing" -# -# self.elasticsearch.refresh(index="logs-generic-default") -# assert self.elasticsearch.count(index="logs-generic-default")["count"] == 2 -# -# res = self.elasticsearch.search(index="logs-generic-default", sort="_seq_no") -# assert res["hits"]["total"] == {"value": 2, "relation": "eq"} -# -# assert res["hits"]["hits"][1]["_source"]["message"] == fixtures[0].rstrip("\n") -# assert res["hits"]["hits"][1]["_source"]["log"]["offset"] == 0 -# assert res["hits"]["hits"][1]["_source"]["log"]["file"]["path"] == sqs_queue_url_path -# assert res["hits"]["hits"][1]["_source"]["aws"]["sqs"]["name"] == sqs_queue_name -# assert res["hits"]["hits"][1]["_source"]["aws"]["sqs"]["message_id"] == message_id -# assert res["hits"]["hits"][1]["_source"]["cloud"]["provider"] == "aws" -# assert res["hits"]["hits"][1]["_source"]["cloud"]["region"] == "us-east-1" -# assert res["hits"]["hits"][1]["_source"]["cloud"]["account"]["id"] == "000000000000" -# assert res["hits"]["hits"][1]["_source"]["tags"] == ["forwarded", "generic", "tag1", "tag2", "tag3"] -# -# logstash_message = self.logstash.get_messages(expected=2) -# assert len(logstash_message) == 2 -# res["hits"]["hits"][1]["_source"]["tags"].remove("generic") -# assert res["hits"]["hits"][1]["_source"]["aws"] == logstash_message[1]["aws"] -# assert res["hits"]["hits"][1]["_source"]["cloud"] == logstash_message[1]["cloud"] -# assert res["hits"]["hits"][1]["_source"]["log"] == logstash_message[1]["log"] -# assert res["hits"]["hits"][1]["_source"]["message"] == logstash_message[1]["message"] -# assert res["hits"]["hits"][1]["_source"]["tags"] == logstash_message[1]["tags"] -# -# third_call = handler(events_cloudwatch_logs, ctx) # type:ignore -# -# assert third_call == "continuing" -# -# self.elasticsearch.refresh(index="logs-generic-default") -# assert self.elasticsearch.count(index="logs-generic-default")["count"] == 3 -# -# res = self.elasticsearch.search(index="logs-generic-default", sort="_seq_no") -# assert res["hits"]["total"] == {"value": 3, "relation": "eq"} -# -# assert res["hits"]["hits"][2]["_source"]["message"] == fixtures[0].rstrip("\n") -# assert res["hits"]["hits"][2]["_source"]["log"]["offset"] == 0 -# assert ( -# res["hits"]["hits"][2]["_source"]["log"]["file"]["path"] -# == f"{cloudwatch_group_name}/{cloudwatch_stream_name}" -# ) -# assert res["hits"]["hits"][2]["_source"]["aws"]["cloudwatch"]["log_group"] == cloudwatch_group_name -# assert res["hits"]["hits"][2]["_source"]["aws"]["cloudwatch"]["log_stream"] == cloudwatch_stream_name -# assert res["hits"]["hits"][2]["_source"]["aws"]["cloudwatch"]["event_id"] == event_ids_cloudwatch_logs[0] -# assert res["hits"]["hits"][2]["_source"]["cloud"]["provider"] == "aws" -# assert res["hits"]["hits"][2]["_source"]["cloud"]["region"] == "us-east-1" -# assert res["hits"]["hits"][2]["_source"]["cloud"]["account"]["id"] == "000000000000" -# assert res["hits"]["hits"][2]["_source"]["tags"] == ["forwarded", "generic", "tag1", "tag2", "tag3"] -# -# logstash_message = self.logstash.get_messages(expected=3) -# assert len(logstash_message) == 3 -# res["hits"]["hits"][2]["_source"]["tags"].remove("generic") -# assert res["hits"]["hits"][2]["_source"]["aws"] == logstash_message[2]["aws"] -# assert res["hits"]["hits"][2]["_source"]["cloud"] == logstash_message[2]["cloud"] -# assert res["hits"]["hits"][2]["_source"]["log"] == logstash_message[2]["log"] -# assert res["hits"]["hits"][2]["_source"]["message"] == logstash_message[2]["message"] -# assert res["hits"]["hits"][2]["_source"]["tags"] == logstash_message[2]["tags"] -# -# fourth_call = handler(events_kinesis, ctx) # type:ignore -# -# assert fourth_call == "continuing" -# -# self.elasticsearch.refresh(index="logs-generic-default") -# assert self.elasticsearch.count(index="logs-generic-default")["count"] == 4 -# -# res = self.elasticsearch.search(index="logs-generic-default", sort="_seq_no") -# assert res["hits"]["total"] == {"value": 4, "relation": "eq"} -# -# assert res["hits"]["hits"][3]["_source"]["message"] == fixtures[0].rstrip("\n") -# assert res["hits"]["hits"][3]["_source"]["log"]["offset"] == 0 -# assert res["hits"]["hits"][3]["_source"]["log"]["file"]["path"] == kinesis_stream_arn -# assert res["hits"]["hits"][3]["_source"]["aws"]["kinesis"]["type"] == "stream" -# assert res["hits"]["hits"][3]["_source"]["aws"]["kinesis"]["partition_key"] == "PartitionKey" -# assert res["hits"]["hits"][3]["_source"]["aws"]["kinesis"]["name"] == kinesis_stream_name -# assert ( -# res["hits"]["hits"][3]["_source"]["aws"]["kinesis"]["sequence_number"] -# == events_kinesis["Records"][0]["kinesis"]["sequenceNumber"] -# ) -# assert res["hits"]["hits"][3]["_source"]["cloud"]["provider"] == "aws" -# assert res["hits"]["hits"][3]["_source"]["cloud"]["region"] == "us-east-1" -# assert res["hits"]["hits"][3]["_source"]["cloud"]["account"]["id"] == "000000000000" -# assert res["hits"]["hits"][3]["_source"]["tags"] == ["forwarded", "generic", "tag1", "tag2", "tag3"] -# -# logstash_message = self.logstash.get_messages(expected=4) -# assert len(logstash_message) == 4 -# res["hits"]["hits"][3]["_source"]["tags"].remove("generic") -# assert res["hits"]["hits"][3]["_source"]["aws"] == logstash_message[3]["aws"] -# assert res["hits"]["hits"][3]["_source"]["cloud"] == logstash_message[3]["cloud"] -# assert res["hits"]["hits"][3]["_source"]["log"] == logstash_message[3]["log"] -# assert res["hits"]["hits"][3]["_source"]["message"] == logstash_message[3]["message"] -# assert res["hits"]["hits"][3]["_source"]["tags"] == logstash_message[3]["tags"] -# -# continued_events, _ = _sqs_get_messages( -# self.sqs_client, os.environ["SQS_CONTINUE_URL"], self.sqs_continue_queue_arn -# ) -# -# fifth_call = handler(continued_events, ctx) # type:ignore -# -# assert fifth_call == "continuing" -# -# self.elasticsearch.refresh(index="logs-generic-default") -# assert self.elasticsearch.count(index="logs-generic-default")["count"] == 5 -# -# res = self.elasticsearch.search(index="logs-generic-default", sort="_seq_no") -# assert res["hits"]["total"] == {"value": 5, "relation": "eq"} -# -# assert res["hits"]["hits"][4]["_source"]["message"] == fixtures[1].rstrip("\n") -# assert res["hits"]["hits"][4]["_source"]["log"]["offset"] == 94 -# assert ( -# res["hits"]["hits"][4]["_source"]["log"]["file"]["path"] -# == f"https://{s3_bucket_name}.s3.eu-central-1.amazonaws.com/{first_filename}" -# ) -# assert res["hits"]["hits"][4]["_source"]["aws"]["s3"]["bucket"]["name"] == s3_bucket_name -# assert res["hits"]["hits"][4]["_source"]["aws"]["s3"]["bucket"]["arn"] == f"arn:aws:s3:::{s3_bucket_name}" -# assert res["hits"]["hits"][4]["_source"]["aws"]["s3"]["object"]["key"] == first_filename -# assert res["hits"]["hits"][4]["_source"]["cloud"]["provider"] == "aws" -# assert res["hits"]["hits"][4]["_source"]["cloud"]["region"] == "eu-central-1" -# assert res["hits"]["hits"][4]["_source"]["cloud"]["account"]["id"] == "000000000000" -# assert res["hits"]["hits"][4]["_source"]["tags"] == ["forwarded", "generic", "tag1", "tag2", "tag3"] -# -# logstash_message = self.logstash.get_messages(expected=5) -# assert len(logstash_message) == 5 -# res["hits"]["hits"][4]["_source"]["tags"].remove("generic") -# assert res["hits"]["hits"][4]["_source"]["aws"] == logstash_message[4]["aws"] -# assert res["hits"]["hits"][4]["_source"]["cloud"] == logstash_message[4]["cloud"] -# assert res["hits"]["hits"][4]["_source"]["log"] == logstash_message[4]["log"] -# assert res["hits"]["hits"][4]["_source"]["message"] == logstash_message[4]["message"] -# assert res["hits"]["hits"][4]["_source"]["tags"] == logstash_message[4]["tags"] -# -# ctx = ContextMock(remaining_time_in_millis=_OVER_COMPLETION_GRACE_PERIOD_2m) -# -# continued_events, _ = _sqs_get_messages( -# self.sqs_client, os.environ["SQS_CONTINUE_URL"], self.sqs_continue_queue_arn -# ) -# sixth_call = handler(continued_events, ctx) # type:ignore -# -# assert sixth_call == "completed" -# -# self.elasticsearch.refresh(index="logs-generic-default") -# assert self.elasticsearch.count(index="logs-generic-default")["count"] == 8 -# -# res = self.elasticsearch.search(index="logs-generic-default", sort="_seq_no") -# assert res["hits"]["total"] == {"value": 8, "relation": "eq"} -# -# assert res["hits"]["hits"][5]["_source"]["message"] == fixtures[1].rstrip("\n") -# assert res["hits"]["hits"][5]["_source"]["log"]["offset"] == 94 -# assert res["hits"]["hits"][5]["_source"]["log"]["file"]["path"] == sqs_queue_url_path -# assert res["hits"]["hits"][5]["_source"]["aws"]["sqs"]["name"] == sqs_queue_name -# assert res["hits"]["hits"][5]["_source"]["aws"]["sqs"]["message_id"] == message_id -# assert res["hits"]["hits"][5]["_source"]["cloud"]["provider"] == "aws" -# assert res["hits"]["hits"][5]["_source"]["cloud"]["region"] == "us-east-1" -# assert res["hits"]["hits"][5]["_source"]["cloud"]["account"]["id"] == "000000000000" -# assert res["hits"]["hits"][5]["_source"]["tags"] == ["forwarded", "generic", "tag1", "tag2", "tag3"] -# -# assert res["hits"]["hits"][6]["_source"]["message"] == fixtures[1].rstrip("\n") -# assert res["hits"]["hits"][6]["_source"]["log"]["offset"] == 94 -# assert ( -# res["hits"]["hits"][6]["_source"]["log"]["file"]["path"] -# == f"{cloudwatch_group_name}/{cloudwatch_stream_name}" -# ) -# assert res["hits"]["hits"][6]["_source"]["aws"]["cloudwatch"]["log_group"] == cloudwatch_group_name -# assert res["hits"]["hits"][6]["_source"]["aws"]["cloudwatch"]["log_stream"] == cloudwatch_stream_name -# assert res["hits"]["hits"][6]["_source"]["aws"]["cloudwatch"]["event_id"] == event_ids_cloudwatch_logs[0] -# assert res["hits"]["hits"][6]["_source"]["cloud"]["provider"] == "aws" -# assert res["hits"]["hits"][6]["_source"]["cloud"]["region"] == "us-east-1" -# assert res["hits"]["hits"][6]["_source"]["cloud"]["account"]["id"] == "000000000000" -# assert res["hits"]["hits"][6]["_source"]["tags"] == ["forwarded", "generic", "tag1", "tag2", "tag3"] -# -# assert res["hits"]["hits"][7]["_source"]["message"] == fixtures[1].rstrip("\n") -# assert res["hits"]["hits"][7]["_source"]["log"]["offset"] == 94 -# assert res["hits"]["hits"][7]["_source"]["log"]["file"]["path"] == kinesis_stream_arn -# assert res["hits"]["hits"][7]["_source"]["aws"]["kinesis"]["type"] == "stream" -# assert res["hits"]["hits"][7]["_source"]["aws"]["kinesis"]["partition_key"] == "PartitionKey" -# assert res["hits"]["hits"][7]["_source"]["aws"]["kinesis"]["name"] == kinesis_stream_name -# assert ( -# res["hits"]["hits"][7]["_source"]["aws"]["kinesis"]["sequence_number"] -# == events_kinesis["Records"][0]["kinesis"]["sequenceNumber"] -# ) -# assert res["hits"]["hits"][7]["_source"]["cloud"]["provider"] == "aws" -# assert res["hits"]["hits"][7]["_source"]["cloud"]["region"] == "us-east-1" -# assert res["hits"]["hits"][7]["_source"]["cloud"]["account"]["id"] == "000000000000" -# assert res["hits"]["hits"][7]["_source"]["tags"] == ["forwarded", "generic", "tag1", "tag2", "tag3"] -# -# logstash_message = self.logstash.get_messages(expected=8) -# assert len(logstash_message) == 8 -# res["hits"]["hits"][5]["_source"]["tags"].remove("generic") -# res["hits"]["hits"][6]["_source"]["tags"].remove("generic") -# res["hits"]["hits"][7]["_source"]["tags"].remove("generic") -# -# assert res["hits"]["hits"][5]["_source"]["aws"] == logstash_message[5]["aws"] -# assert res["hits"]["hits"][5]["_source"]["cloud"] == logstash_message[5]["cloud"] -# assert res["hits"]["hits"][5]["_source"]["log"] == logstash_message[5]["log"] -# assert res["hits"]["hits"][5]["_source"]["message"] == logstash_message[5]["message"] -# assert res["hits"]["hits"][5]["_source"]["tags"] == logstash_message[5]["tags"] -# -# assert res["hits"]["hits"][6]["_source"]["aws"] == logstash_message[6]["aws"] -# assert res["hits"]["hits"][6]["_source"]["cloud"] == logstash_message[6]["cloud"] -# assert res["hits"]["hits"][6]["_source"]["log"] == logstash_message[6]["log"] -# assert res["hits"]["hits"][6]["_source"]["message"] == logstash_message[6]["message"] -# assert res["hits"]["hits"][6]["_source"]["tags"] == logstash_message[6]["tags"] -# -# assert res["hits"]["hits"][7]["_source"]["aws"] == logstash_message[7]["aws"] -# assert res["hits"]["hits"][7]["_source"]["cloud"] == logstash_message[7]["cloud"] -# assert res["hits"]["hits"][7]["_source"]["log"] == logstash_message[7]["log"] -# assert res["hits"]["hits"][7]["_source"]["message"] == logstash_message[7]["message"] -# assert res["hits"]["hits"][7]["_source"]["tags"] == logstash_message[7]["tags"] -# -# + fixtures = [ + _load_file_fixture("cloudwatch-log-1.json"), + _load_file_fixture("cloudwatch-log-2.json"), + ] + + s3_bucket_name = _time_based_id(suffix="test-bucket") + first_filename = "exportedlog/uuid/yyyy-mm-dd-[$LATEST]hash/000000.gz" + _s3_upload_content_to_bucket( + client=self.s3_client, + content=gzip.compress("".join(fixtures).encode("utf-8")), + content_type="application/x-gzip", + bucket_name=s3_bucket_name, + key=first_filename, + ) + + cloudwatch_group_name = _time_based_id(suffix="source-group") + cloudwatch_group = _logs_create_cloudwatch_logs_group(self.logs_client, group_name=cloudwatch_group_name) + + cloudwatch_stream_name = _time_based_id(suffix="source-stream") + _logs_create_cloudwatch_logs_stream( + self.logs_client, group_name=cloudwatch_group_name, stream_name=cloudwatch_stream_name + ) + + _logs_upload_event_to_cloudwatch_logs( + self.logs_client, + group_name=cloudwatch_group_name, + stream_name=cloudwatch_stream_name, + messages_body=["".join(fixtures)], + ) + + cloudwatch_group_arn = cloudwatch_group["arn"] + + cloudwatch_group_name = cloudwatch_group_name + cloudwatch_stream_name = cloudwatch_stream_name + + sqs_queue_name = _time_based_id(suffix="source-sqs") + s3_sqs_queue_name = _time_based_id(suffix="source-s3-sqs") + + sqs_queue = _sqs_create_queue(self.sqs_client, sqs_queue_name, self.localstack.get_url()) + s3_sqs_queue = _sqs_create_queue(self.sqs_client, s3_sqs_queue_name, self.localstack.get_url()) + + sqs_queue_arn = sqs_queue["QueueArn"] + sqs_queue_url = sqs_queue["QueueUrl"] + sqs_queue_url_path = sqs_queue["QueueUrlPath"] + + s3_sqs_queue_arn = s3_sqs_queue["QueueArn"] + s3_sqs_queue_url = s3_sqs_queue["QueueUrl"] + + _sqs_send_messages(self.sqs_client, sqs_queue_url, "".join(fixtures)) + _sqs_send_s3_notifications(self.sqs_client, s3_sqs_queue_url, s3_bucket_name, [first_filename]) + + kinesis_stream_name = _time_based_id(suffix="source-kinesis") + kinesis_stream = _kinesis_create_stream(self.kinesis_client, kinesis_stream_name) + kinesis_stream_arn = kinesis_stream["StreamDescription"]["StreamARN"] + + _kinesis_put_records(self.kinesis_client, kinesis_stream_name, ["".join(fixtures)]) + + config_yaml: str = f""" + inputs: + - type: "kinesis-data-stream" + id: "{kinesis_stream_arn}" + tags: {self.default_tags} + outputs: {self.default_outputs} + - type: "cloudwatch-logs" + id: "{cloudwatch_group_arn}" + tags: {self.default_tags} + outputs: {self.default_outputs} + - type: sqs + id: "{sqs_queue_arn}" + tags: {self.default_tags} + outputs: {self.default_outputs} + - type: s3-sqs + id: "{s3_sqs_queue_arn}" + tags: {self.default_tags} + outputs: {self.default_outputs} + """ + + config_file_path = "config.yaml" + config_bucket_name = _time_based_id(suffix="config-bucket") + _s3_upload_content_to_bucket( + client=self.s3_client, + content=config_yaml.encode("utf-8"), + content_type="text/plain", + bucket_name=config_bucket_name, + key=config_file_path, + ) + + os.environ["S3_CONFIG_FILE"] = f"s3://{config_bucket_name}/{config_file_path}" + + events_s3, _ = _sqs_get_messages(self.sqs_client, s3_sqs_queue_url, s3_sqs_queue_arn) + + events_sqs, _ = _sqs_get_messages(self.sqs_client, sqs_queue_url, sqs_queue_arn) + + message_id = events_sqs["Records"][0]["messageId"] + + events_cloudwatch_logs, event_ids_cloudwatch_logs, _ = _logs_retrieve_event_from_cloudwatch_logs( + self.logs_client, cloudwatch_group_name, cloudwatch_stream_name + ) + + events_kinesis, _ = _kinesis_retrieve_event_from_kinesis_stream( + self.kinesis_client, kinesis_stream_name, kinesis_stream_arn + ) + + ctx = ContextMock() + first_call = handler(events_s3, ctx) # type:ignore + + assert first_call == "continuing" + + self.elasticsearch.refresh(index="logs-generic-default") + assert self.elasticsearch.count(index="logs-generic-default")["count"] == 1 + + res = self.elasticsearch.search(index="logs-generic-default", sort="_seq_no") + assert res["hits"]["total"] == {"value": 1, "relation": "eq"} + + assert res["hits"]["hits"][0]["_source"]["message"] == fixtures[0].rstrip("\n") + assert res["hits"]["hits"][0]["_source"]["log"]["offset"] == 0 + assert ( + res["hits"]["hits"][0]["_source"]["log"]["file"]["path"] + == f"https://{s3_bucket_name}.s3.eu-central-1.amazonaws.com/{first_filename}" + ) + assert res["hits"]["hits"][0]["_source"]["aws"]["s3"]["bucket"]["name"] == s3_bucket_name + assert res["hits"]["hits"][0]["_source"]["aws"]["s3"]["bucket"]["arn"] == f"arn:aws:s3:::{s3_bucket_name}" + assert res["hits"]["hits"][0]["_source"]["aws"]["s3"]["object"]["key"] == first_filename + assert res["hits"]["hits"][0]["_source"]["cloud"]["provider"] == "aws" + assert res["hits"]["hits"][0]["_source"]["cloud"]["region"] == "eu-central-1" + assert res["hits"]["hits"][0]["_source"]["cloud"]["account"]["id"] == "000000000000" + assert res["hits"]["hits"][0]["_source"]["tags"] == ["forwarded", "generic", "tag1", "tag2", "tag3"] + + logstash_message = self.logstash.get_messages(expected=1) + assert len(logstash_message) == 1 + res["hits"]["hits"][0]["_source"]["tags"].remove("generic") + assert res["hits"]["hits"][0]["_source"]["aws"] == logstash_message[0]["aws"] + assert res["hits"]["hits"][0]["_source"]["cloud"] == logstash_message[0]["cloud"] + assert res["hits"]["hits"][0]["_source"]["log"] == logstash_message[0]["log"] + assert res["hits"]["hits"][0]["_source"]["message"] == logstash_message[0]["message"] + assert res["hits"]["hits"][0]["_source"]["tags"] == logstash_message[0]["tags"] + + second_call = handler(events_sqs, ctx) # type:ignore + + assert second_call == "continuing" + + self.elasticsearch.refresh(index="logs-generic-default") + assert self.elasticsearch.count(index="logs-generic-default")["count"] == 2 + + res = self.elasticsearch.search(index="logs-generic-default", sort="_seq_no") + assert res["hits"]["total"] == {"value": 2, "relation": "eq"} + + assert res["hits"]["hits"][1]["_source"]["message"] == fixtures[0].rstrip("\n") + assert res["hits"]["hits"][1]["_source"]["log"]["offset"] == 0 + assert res["hits"]["hits"][1]["_source"]["log"]["file"]["path"] == sqs_queue_url_path + assert res["hits"]["hits"][1]["_source"]["aws"]["sqs"]["name"] == sqs_queue_name + assert res["hits"]["hits"][1]["_source"]["aws"]["sqs"]["message_id"] == message_id + assert res["hits"]["hits"][1]["_source"]["cloud"]["provider"] == "aws" + assert res["hits"]["hits"][1]["_source"]["cloud"]["region"] == "us-east-1" + assert res["hits"]["hits"][1]["_source"]["cloud"]["account"]["id"] == "000000000000" + assert res["hits"]["hits"][1]["_source"]["tags"] == ["forwarded", "generic", "tag1", "tag2", "tag3"] + + logstash_message = self.logstash.get_messages(expected=2) + assert len(logstash_message) == 2 + res["hits"]["hits"][1]["_source"]["tags"].remove("generic") + assert res["hits"]["hits"][1]["_source"]["aws"] == logstash_message[1]["aws"] + assert res["hits"]["hits"][1]["_source"]["cloud"] == logstash_message[1]["cloud"] + assert res["hits"]["hits"][1]["_source"]["log"] == logstash_message[1]["log"] + assert res["hits"]["hits"][1]["_source"]["message"] == logstash_message[1]["message"] + assert res["hits"]["hits"][1]["_source"]["tags"] == logstash_message[1]["tags"] + + third_call = handler(events_cloudwatch_logs, ctx) # type:ignore + + assert third_call == "continuing" + + self.elasticsearch.refresh(index="logs-generic-default") + assert self.elasticsearch.count(index="logs-generic-default")["count"] == 3 + + res = self.elasticsearch.search(index="logs-generic-default", sort="_seq_no") + assert res["hits"]["total"] == {"value": 3, "relation": "eq"} + + assert res["hits"]["hits"][2]["_source"]["message"] == fixtures[0].rstrip("\n") + assert res["hits"]["hits"][2]["_source"]["log"]["offset"] == 0 + assert ( + res["hits"]["hits"][2]["_source"]["log"]["file"]["path"] + == f"{cloudwatch_group_name}/{cloudwatch_stream_name}" + ) + assert res["hits"]["hits"][2]["_source"]["aws"]["cloudwatch"]["log_group"] == cloudwatch_group_name + assert res["hits"]["hits"][2]["_source"]["aws"]["cloudwatch"]["log_stream"] == cloudwatch_stream_name + assert res["hits"]["hits"][2]["_source"]["aws"]["cloudwatch"]["event_id"] == event_ids_cloudwatch_logs[0] + assert res["hits"]["hits"][2]["_source"]["cloud"]["provider"] == "aws" + assert res["hits"]["hits"][2]["_source"]["cloud"]["region"] == "us-east-1" + assert res["hits"]["hits"][2]["_source"]["cloud"]["account"]["id"] == "000000000000" + assert res["hits"]["hits"][2]["_source"]["tags"] == ["forwarded", "generic", "tag1", "tag2", "tag3"] + + logstash_message = self.logstash.get_messages(expected=3) + assert len(logstash_message) == 3 + res["hits"]["hits"][2]["_source"]["tags"].remove("generic") + assert res["hits"]["hits"][2]["_source"]["aws"] == logstash_message[2]["aws"] + assert res["hits"]["hits"][2]["_source"]["cloud"] == logstash_message[2]["cloud"] + assert res["hits"]["hits"][2]["_source"]["log"] == logstash_message[2]["log"] + assert res["hits"]["hits"][2]["_source"]["message"] == logstash_message[2]["message"] + assert res["hits"]["hits"][2]["_source"]["tags"] == logstash_message[2]["tags"] + + fourth_call = handler(events_kinesis, ctx) # type:ignore + + assert fourth_call == "continuing" + + self.elasticsearch.refresh(index="logs-generic-default") + assert self.elasticsearch.count(index="logs-generic-default")["count"] == 4 + + res = self.elasticsearch.search(index="logs-generic-default", sort="_seq_no") + assert res["hits"]["total"] == {"value": 4, "relation": "eq"} + + assert res["hits"]["hits"][3]["_source"]["message"] == fixtures[0].rstrip("\n") + assert res["hits"]["hits"][3]["_source"]["log"]["offset"] == 0 + assert res["hits"]["hits"][3]["_source"]["log"]["file"]["path"] == kinesis_stream_arn + assert res["hits"]["hits"][3]["_source"]["aws"]["kinesis"]["type"] == "stream" + assert res["hits"]["hits"][3]["_source"]["aws"]["kinesis"]["partition_key"] == "PartitionKey" + assert res["hits"]["hits"][3]["_source"]["aws"]["kinesis"]["name"] == kinesis_stream_name + assert ( + res["hits"]["hits"][3]["_source"]["aws"]["kinesis"]["sequence_number"] + == events_kinesis["Records"][0]["kinesis"]["sequenceNumber"] + ) + assert res["hits"]["hits"][3]["_source"]["cloud"]["provider"] == "aws" + assert res["hits"]["hits"][3]["_source"]["cloud"]["region"] == "us-east-1" + assert res["hits"]["hits"][3]["_source"]["cloud"]["account"]["id"] == "000000000000" + assert res["hits"]["hits"][3]["_source"]["tags"] == ["forwarded", "generic", "tag1", "tag2", "tag3"] + + logstash_message = self.logstash.get_messages(expected=4) + assert len(logstash_message) == 4 + res["hits"]["hits"][3]["_source"]["tags"].remove("generic") + assert res["hits"]["hits"][3]["_source"]["aws"] == logstash_message[3]["aws"] + assert res["hits"]["hits"][3]["_source"]["cloud"] == logstash_message[3]["cloud"] + assert res["hits"]["hits"][3]["_source"]["log"] == logstash_message[3]["log"] + assert res["hits"]["hits"][3]["_source"]["message"] == logstash_message[3]["message"] + assert res["hits"]["hits"][3]["_source"]["tags"] == logstash_message[3]["tags"] + + continued_events, _ = _sqs_get_messages( + self.sqs_client, os.environ["SQS_CONTINUE_URL"], self.sqs_continue_queue_arn + ) + + fifth_call = handler(continued_events, ctx) # type:ignore + + assert fifth_call == "continuing" + + self.elasticsearch.refresh(index="logs-generic-default") + assert self.elasticsearch.count(index="logs-generic-default")["count"] == 5 + + res = self.elasticsearch.search(index="logs-generic-default", sort="_seq_no") + assert res["hits"]["total"] == {"value": 5, "relation": "eq"} + + assert res["hits"]["hits"][4]["_source"]["message"] == fixtures[1].rstrip("\n") + assert res["hits"]["hits"][4]["_source"]["log"]["offset"] == 94 + assert ( + res["hits"]["hits"][4]["_source"]["log"]["file"]["path"] + == f"https://{s3_bucket_name}.s3.eu-central-1.amazonaws.com/{first_filename}" + ) + assert res["hits"]["hits"][4]["_source"]["aws"]["s3"]["bucket"]["name"] == s3_bucket_name + assert res["hits"]["hits"][4]["_source"]["aws"]["s3"]["bucket"]["arn"] == f"arn:aws:s3:::{s3_bucket_name}" + assert res["hits"]["hits"][4]["_source"]["aws"]["s3"]["object"]["key"] == first_filename + assert res["hits"]["hits"][4]["_source"]["cloud"]["provider"] == "aws" + assert res["hits"]["hits"][4]["_source"]["cloud"]["region"] == "eu-central-1" + assert res["hits"]["hits"][4]["_source"]["cloud"]["account"]["id"] == "000000000000" + assert res["hits"]["hits"][4]["_source"]["tags"] == ["forwarded", "generic", "tag1", "tag2", "tag3"] + + logstash_message = self.logstash.get_messages(expected=5) + assert len(logstash_message) == 5 + res["hits"]["hits"][4]["_source"]["tags"].remove("generic") + assert res["hits"]["hits"][4]["_source"]["aws"] == logstash_message[4]["aws"] + assert res["hits"]["hits"][4]["_source"]["cloud"] == logstash_message[4]["cloud"] + assert res["hits"]["hits"][4]["_source"]["log"] == logstash_message[4]["log"] + assert res["hits"]["hits"][4]["_source"]["message"] == logstash_message[4]["message"] + assert res["hits"]["hits"][4]["_source"]["tags"] == logstash_message[4]["tags"] + + ctx = ContextMock(remaining_time_in_millis=_OVER_COMPLETION_GRACE_PERIOD_2m) + + continued_events, _ = _sqs_get_messages( + self.sqs_client, os.environ["SQS_CONTINUE_URL"], self.sqs_continue_queue_arn + ) + sixth_call = handler(continued_events, ctx) # type:ignore + + assert sixth_call == "completed" + + self.elasticsearch.refresh(index="logs-generic-default") + assert self.elasticsearch.count(index="logs-generic-default")["count"] == 8 + + res = self.elasticsearch.search(index="logs-generic-default", sort="_seq_no") + assert res["hits"]["total"] == {"value": 8, "relation": "eq"} + + assert res["hits"]["hits"][5]["_source"]["message"] == fixtures[1].rstrip("\n") + assert res["hits"]["hits"][5]["_source"]["log"]["offset"] == 94 + assert res["hits"]["hits"][5]["_source"]["log"]["file"]["path"] == sqs_queue_url_path + assert res["hits"]["hits"][5]["_source"]["aws"]["sqs"]["name"] == sqs_queue_name + assert res["hits"]["hits"][5]["_source"]["aws"]["sqs"]["message_id"] == message_id + assert res["hits"]["hits"][5]["_source"]["cloud"]["provider"] == "aws" + assert res["hits"]["hits"][5]["_source"]["cloud"]["region"] == "us-east-1" + assert res["hits"]["hits"][5]["_source"]["cloud"]["account"]["id"] == "000000000000" + assert res["hits"]["hits"][5]["_source"]["tags"] == ["forwarded", "generic", "tag1", "tag2", "tag3"] + + assert res["hits"]["hits"][6]["_source"]["message"] == fixtures[1].rstrip("\n") + assert res["hits"]["hits"][6]["_source"]["log"]["offset"] == 94 + assert ( + res["hits"]["hits"][6]["_source"]["log"]["file"]["path"] + == f"{cloudwatch_group_name}/{cloudwatch_stream_name}" + ) + assert res["hits"]["hits"][6]["_source"]["aws"]["cloudwatch"]["log_group"] == cloudwatch_group_name + assert res["hits"]["hits"][6]["_source"]["aws"]["cloudwatch"]["log_stream"] == cloudwatch_stream_name + assert res["hits"]["hits"][6]["_source"]["aws"]["cloudwatch"]["event_id"] == event_ids_cloudwatch_logs[0] + assert res["hits"]["hits"][6]["_source"]["cloud"]["provider"] == "aws" + assert res["hits"]["hits"][6]["_source"]["cloud"]["region"] == "us-east-1" + assert res["hits"]["hits"][6]["_source"]["cloud"]["account"]["id"] == "000000000000" + assert res["hits"]["hits"][6]["_source"]["tags"] == ["forwarded", "generic", "tag1", "tag2", "tag3"] + + assert res["hits"]["hits"][7]["_source"]["message"] == fixtures[1].rstrip("\n") + assert res["hits"]["hits"][7]["_source"]["log"]["offset"] == 94 + assert res["hits"]["hits"][7]["_source"]["log"]["file"]["path"] == kinesis_stream_arn + assert res["hits"]["hits"][7]["_source"]["aws"]["kinesis"]["type"] == "stream" + assert res["hits"]["hits"][7]["_source"]["aws"]["kinesis"]["partition_key"] == "PartitionKey" + assert res["hits"]["hits"][7]["_source"]["aws"]["kinesis"]["name"] == kinesis_stream_name + assert ( + res["hits"]["hits"][7]["_source"]["aws"]["kinesis"]["sequence_number"] + == events_kinesis["Records"][0]["kinesis"]["sequenceNumber"] + ) + assert res["hits"]["hits"][7]["_source"]["cloud"]["provider"] == "aws" + assert res["hits"]["hits"][7]["_source"]["cloud"]["region"] == "us-east-1" + assert res["hits"]["hits"][7]["_source"]["cloud"]["account"]["id"] == "000000000000" + assert res["hits"]["hits"][7]["_source"]["tags"] == ["forwarded", "generic", "tag1", "tag2", "tag3"] + + logstash_message = self.logstash.get_messages(expected=8) + assert len(logstash_message) == 8 + res["hits"]["hits"][5]["_source"]["tags"].remove("generic") + res["hits"]["hits"][6]["_source"]["tags"].remove("generic") + res["hits"]["hits"][7]["_source"]["tags"].remove("generic") + + assert res["hits"]["hits"][5]["_source"]["aws"] == logstash_message[5]["aws"] + assert res["hits"]["hits"][5]["_source"]["cloud"] == logstash_message[5]["cloud"] + assert res["hits"]["hits"][5]["_source"]["log"] == logstash_message[5]["log"] + assert res["hits"]["hits"][5]["_source"]["message"] == logstash_message[5]["message"] + assert res["hits"]["hits"][5]["_source"]["tags"] == logstash_message[5]["tags"] + + assert res["hits"]["hits"][6]["_source"]["aws"] == logstash_message[6]["aws"] + assert res["hits"]["hits"][6]["_source"]["cloud"] == logstash_message[6]["cloud"] + assert res["hits"]["hits"][6]["_source"]["log"] == logstash_message[6]["log"] + assert res["hits"]["hits"][6]["_source"]["message"] == logstash_message[6]["message"] + assert res["hits"]["hits"][6]["_source"]["tags"] == logstash_message[6]["tags"] + + assert res["hits"]["hits"][7]["_source"]["aws"] == logstash_message[7]["aws"] + assert res["hits"]["hits"][7]["_source"]["cloud"] == logstash_message[7]["cloud"] + assert res["hits"]["hits"][7]["_source"]["log"] == logstash_message[7]["log"] + assert res["hits"]["hits"][7]["_source"]["message"] == logstash_message[7]["message"] + assert res["hits"]["hits"][7]["_source"]["tags"] == logstash_message[7]["tags"] + + def test_continuing_no_timeout_input_from_originalEventSourceARN_message_attribute(self) -> None: + assert isinstance(self.logstash, LogstashContainer) + assert isinstance(self.localstack, LocalStackContainer) + + fixtures = [ + _load_file_fixture("cloudwatch-log-1.json"), + _load_file_fixture("cloudwatch-log-2.json"), + _load_file_fixture("cloudwatch-log-3.json"), + ] + + sqs_queue_name = _time_based_id(suffix="source-sqs") + + sqs_queue = _sqs_create_queue(self.sqs_client, sqs_queue_name, self.localstack.get_url()) + + sqs_queue_arn = sqs_queue["QueueArn"] + sqs_queue_url = sqs_queue["QueueUrl"] + sqs_queue_url_path = sqs_queue["QueueUrlPath"] + + _sqs_send_messages(self.sqs_client, sqs_queue_url, fixtures[0]) + _sqs_send_messages(self.sqs_client, sqs_queue_url, fixtures[1]) + _sqs_send_messages(self.sqs_client, sqs_queue_url, fixtures[2]) + + config_yaml: str = f""" + inputs: + - type: sqs + id: "{sqs_queue_arn}" + tags: {self.default_tags} + outputs: + - type: "logstash" + args: + logstash_url: "{self.logstash.get_url()}" + ssl_assert_fingerprint: {self.logstash.ssl_assert_fingerprint} + username: "{self.logstash.logstash_user}" + password: "{self.logstash.logstash_password}" + """ + + config_file_path = "config.yaml" + config_bucket_name = _time_based_id(suffix="config-bucket") + _s3_upload_content_to_bucket( + client=self.s3_client, + content=config_yaml.encode("utf-8"), + content_type="text/plain", + bucket_name=config_bucket_name, + key=config_file_path, + ) + + os.environ["S3_CONFIG_FILE"] = f"s3://{config_bucket_name}/{config_file_path}" + + events_sqs, _ = _sqs_get_messages(self.sqs_client, sqs_queue_url, sqs_queue_arn) + + first_message_id = events_sqs["Records"][0]["messageId"] + second_message_id = events_sqs["Records"][1]["messageId"] + + ctx = ContextMock() + first_call = handler(events_sqs, ctx) # type:ignore + + assert first_call == "continuing" + + logstash_message = self.logstash.get_messages(expected=1) + assert len(logstash_message) == 1 + + assert logstash_message[0]["message"] == fixtures[0].rstrip("\n") + assert logstash_message[0]["log"]["offset"] == 0 + assert logstash_message[0]["log"]["file"]["path"] == sqs_queue_url_path + assert logstash_message[0]["aws"]["sqs"]["name"] == sqs_queue_name + assert logstash_message[0]["aws"]["sqs"]["message_id"] == first_message_id + assert logstash_message[0]["cloud"]["provider"] == "aws" + assert logstash_message[0]["cloud"]["region"] == "us-east-1" + assert logstash_message[0]["cloud"]["account"]["id"] == "000000000000" + assert logstash_message[0]["tags"] == ["forwarded", "tag1", "tag2", "tag3"] + + continued_events, _ = _sqs_get_messages( + self.sqs_client, os.environ["SQS_CONTINUE_URL"], self.sqs_continue_queue_arn + ) + + continued_events["Records"][2]["messageAttributes"]["originalEventSourceARN"][ + "stringValue" + ] += "-not-configured-arn" + second_call = handler(continued_events, ctx) # type:ignore + + assert second_call == "continuing" + + logstash_message = self.logstash.get_messages(expected=2) + assert len(logstash_message) == 2 + + assert logstash_message[1]["message"] == fixtures[1].rstrip("\n") + assert logstash_message[1]["log"]["offset"] == 0 + assert logstash_message[1]["log"]["file"]["path"] == sqs_queue_url_path + assert logstash_message[1]["aws"]["sqs"]["name"] == sqs_queue_name + assert logstash_message[1]["aws"]["sqs"]["message_id"] == second_message_id + assert logstash_message[1]["cloud"]["provider"] == "aws" + assert logstash_message[1]["cloud"]["region"] == "us-east-1" + assert logstash_message[1]["cloud"]["account"]["id"] == "000000000000" + assert logstash_message[1]["tags"] == ["forwarded", "tag1", "tag2", "tag3"] + + ctx = ContextMock(remaining_time_in_millis=_OVER_COMPLETION_GRACE_PERIOD_2m) + continued_events, _ = _sqs_get_messages( + self.sqs_client, os.environ["SQS_CONTINUE_URL"], self.sqs_continue_queue_arn + ) + + third_call = handler(continued_events, ctx) # type:ignore + + assert third_call == "completed" + + logstash_message = self.logstash.get_messages(expected=2) + assert len(logstash_message) == 2 + + def test_replay(self) -> None: + assert isinstance(self.elasticsearch, ElasticsearchContainer) + assert isinstance(self.logstash, LogstashContainer) + assert isinstance(self.localstack, LocalStackContainer) + + fixtures = [ + _load_file_fixture("cloudwatch-log-1.json"), + _load_file_fixture("cloudwatch-log-2.json"), + ] + + s3_bucket_name = _time_based_id(suffix="test-bucket") + first_filename = "exportedlog/uuid/yyyy-mm-dd-[$LATEST]hash/000000.gz" + _s3_upload_content_to_bucket( + client=self.s3_client, + content=gzip.compress("".join(fixtures).encode("utf-8")), + content_type="application/x-gzip", + bucket_name=s3_bucket_name, + key=first_filename, + ) + + cloudwatch_group_name = _time_based_id(suffix="source-group") + cloudwatch_group = _logs_create_cloudwatch_logs_group(self.logs_client, group_name=cloudwatch_group_name) + + cloudwatch_stream_name = _time_based_id(suffix="source-stream") + _logs_create_cloudwatch_logs_stream( + self.logs_client, group_name=cloudwatch_group_name, stream_name=cloudwatch_stream_name + ) + + _logs_upload_event_to_cloudwatch_logs( + self.logs_client, + group_name=cloudwatch_group_name, + stream_name=cloudwatch_stream_name, + messages_body=["".join(fixtures)], + ) + + cloudwatch_group_arn = cloudwatch_group["arn"] + + cloudwatch_group_name = cloudwatch_group_name + cloudwatch_stream_name = cloudwatch_stream_name + + sqs_queue_name = _time_based_id(suffix="source-sqs") + s3_sqs_queue_name = _time_based_id(suffix="source-s3-sqs") + + sqs_queue = _sqs_create_queue(self.sqs_client, sqs_queue_name, self.localstack.get_url()) + s3_sqs_queue = _sqs_create_queue(self.sqs_client, s3_sqs_queue_name, self.localstack.get_url()) + + sqs_queue_arn = sqs_queue["QueueArn"] + sqs_queue_url = sqs_queue["QueueUrl"] + sqs_queue_url_path = sqs_queue["QueueUrlPath"] + + s3_sqs_queue_arn = s3_sqs_queue["QueueArn"] + s3_sqs_queue_url = s3_sqs_queue["QueueUrl"] + + _sqs_send_messages(self.sqs_client, sqs_queue_url, "".join(fixtures)) + _sqs_send_s3_notifications(self.sqs_client, s3_sqs_queue_url, s3_bucket_name, [first_filename]) + + kinesis_stream_name = _time_based_id(suffix="source-kinesis") + kinesis_stream = _kinesis_create_stream(self.kinesis_client, kinesis_stream_name) + kinesis_stream_arn = kinesis_stream["StreamDescription"]["StreamARN"] + + _kinesis_put_records(self.kinesis_client, kinesis_stream_name, ["".join(fixtures)]) + + # the way to let logstash fail is to give wrong credentials + config_yaml: str = f""" + inputs: + - type: "kinesis-data-stream" + id: "{kinesis_stream_arn}" + tags: {self.default_tags} + outputs: + - type: "elasticsearch" + args: + elasticsearch_url: "{self.elasticsearch.get_url()}" + ssl_assert_fingerprint: {self.elasticsearch.ssl_assert_fingerprint} + username: "{self.secret_arn}:username" + password: "{self.secret_arn}:password" + - type: "logstash" + args: + logstash_url: "{self.logstash.get_url()}" + ssl_assert_fingerprint: {self.logstash.ssl_assert_fingerprint} + username: "wrong_username" + password: "wrong_username" + - type: "cloudwatch-logs" + id: "{cloudwatch_group_arn}" + tags: {self.default_tags} + outputs: + - type: "elasticsearch" + args: + elasticsearch_url: "{self.elasticsearch.get_url()}" + ssl_assert_fingerprint: {self.elasticsearch.ssl_assert_fingerprint} + username: "{self.secret_arn}:username" + password: "{self.secret_arn}:password" + - type: "logstash" + args: + logstash_url: "{self.logstash.get_url()}" + ssl_assert_fingerprint: {self.logstash.ssl_assert_fingerprint} + username: "wrong_username" + password: "wrong_username" + - type: sqs + id: "{sqs_queue_arn}" + tags: {self.default_tags} + outputs: + - type: "elasticsearch" + args: + elasticsearch_url: "{self.elasticsearch.get_url()}" + ssl_assert_fingerprint: {self.elasticsearch.ssl_assert_fingerprint} + username: "{self.secret_arn}:username" + password: "{self.secret_arn}:password" + - type: "logstash" + args: + logstash_url: "{self.logstash.get_url()}" + ssl_assert_fingerprint: {self.logstash.ssl_assert_fingerprint} + username: "wrong_username" + password: "wrong_username" + - type: s3-sqs + id: "{s3_sqs_queue_arn}" + tags: {self.default_tags} + outputs: + - type: "elasticsearch" + args: + elasticsearch_url: "{self.elasticsearch.get_url()}" + ssl_assert_fingerprint: {self.elasticsearch.ssl_assert_fingerprint} + username: "{self.secret_arn}:username" + password: "{self.secret_arn}:password" + - type: "logstash" + args: + logstash_url: "{self.logstash.get_url()}" + ssl_assert_fingerprint: {self.logstash.ssl_assert_fingerprint} + username: "wrong_username" + password: "wrong_username" + """ + + config_file_path = "config.yaml" + config_bucket_name = _time_based_id(suffix="config-bucket") + _s3_upload_content_to_bucket( + client=self.s3_client, + content=config_yaml.encode("utf-8"), + content_type="text/plain", + bucket_name=config_bucket_name, + key=config_file_path, + ) + + os.environ["S3_CONFIG_FILE"] = f"s3://{config_bucket_name}/{config_file_path}" + + events_s3, _ = _sqs_get_messages(self.sqs_client, s3_sqs_queue_url, s3_sqs_queue_arn) + + bucket_arn: str = f"arn:aws:s3:::{s3_bucket_name}" + event_time = int( + datetime.datetime.strptime(_S3_NOTIFICATION_EVENT_TIME, "%Y-%m-%dT%H:%M:%S.%fZ").timestamp() * 1000 + ) + + hash_first = get_hex_prefix(f"{bucket_arn}-{first_filename}") + prefix_s3_first = f"{event_time}-{hash_first}" + + events_sqs, events_sent_timestamps_sqs = _sqs_get_messages(self.sqs_client, sqs_queue_url, sqs_queue_arn) + + message_id = events_sqs["Records"][0]["messageId"] + hash_sqs = get_hex_prefix(f"{sqs_queue_name}-{message_id}") + prefix_sqs: str = f"{events_sent_timestamps_sqs[0]}-{hash_sqs}" + + ( + events_cloudwatch_logs, + event_ids_cloudwatch_logs, + event_timestamps_cloudwatch_logs, + ) = _logs_retrieve_event_from_cloudwatch_logs(self.logs_client, cloudwatch_group_name, cloudwatch_stream_name) + + hash_cw_logs = get_hex_prefix( + f"{cloudwatch_group_name}-{cloudwatch_stream_name}-{event_ids_cloudwatch_logs[0]}" + ) + prefix_cloudwatch_logs = f"{event_timestamps_cloudwatch_logs[0]}-{hash_cw_logs}" + + events_kinesis, event_timestamps_kinesis_records = _kinesis_retrieve_event_from_kinesis_stream( + self.kinesis_client, kinesis_stream_name, kinesis_stream_arn + ) + sequence_number = events_kinesis["Records"][0]["kinesis"]["sequenceNumber"] + hash_kinesis_record = get_hex_prefix(f"stream-{kinesis_stream_name}-PartitionKey-{sequence_number}") + prefix_kinesis = f"{int(float(event_timestamps_kinesis_records[0]) * 1000)}-{hash_kinesis_record}" + + # Create an expected id for s3-sqs so that es.send will fail + self.elasticsearch.index( + index="logs-generic-default", + op_type="create", + id=f"{prefix_s3_first}-000000000000", + document={"@timestamp": datetime.datetime.utcnow().strftime("%Y-%m-%dT%H:%M:%S.%fZ")}, + ) + + # Create an expected id for sqs so that es.send will fail + self.elasticsearch.index( + index="logs-generic-default", + op_type="create", + id=f"{prefix_sqs}-000000000000", + document={"@timestamp": datetime.datetime.utcnow().strftime("%Y-%m-%dT%H:%M:%S.%fZ")}, + ) + + # Create an expected id for cloudwatch-logs so that es.send will fail + self.elasticsearch.index( + index="logs-generic-default", + op_type="create", + id=f"{prefix_cloudwatch_logs}-000000000000", + document={"@timestamp": datetime.datetime.utcnow().strftime("%Y-%m-%dT%H:%M:%S.%fZ")}, + ) + + # Create an expected id for kinesis-data-stream so that es.send will fail + self.elasticsearch.index( + index="logs-generic-default", + op_type="create", + id=f"{prefix_kinesis}-000000000000", + document={"@timestamp": datetime.datetime.utcnow().strftime("%Y-%m-%dT%H:%M:%S.%fZ")}, + ) + + self.elasticsearch.refresh(index="logs-generic-default") + + res = self.elasticsearch.search(index="logs-generic-default") + assert res["hits"]["total"] == {"value": 4, "relation": "eq"} + + ctx = ContextMock(remaining_time_in_millis=_OVER_COMPLETION_GRACE_PERIOD_2m) + + first_call = handler(events_s3, ctx) # type:ignore + + assert first_call == "completed" + + self.elasticsearch.refresh(index="logs-generic-default") + res = self.elasticsearch.search( + index="logs-generic-default", + query={ + "bool": { + "must_not": { + "ids": { + "values": [ + f"{prefix_s3_first}-000000000000", + f"{prefix_sqs}-000000000000", + f"{prefix_cloudwatch_logs}-000000000000", + f"{prefix_kinesis}-000000000000", + ] + } + } + } + }, + sort="_seq_no", + ) + + assert res["hits"]["total"] == {"value": 1, "relation": "eq"} + + assert res["hits"]["hits"][0]["_source"]["message"] == fixtures[1].rstrip("\n") + assert res["hits"]["hits"][0]["_source"]["log"]["offset"] == 94 + assert ( + res["hits"]["hits"][0]["_source"]["log"]["file"]["path"] + == f"https://{s3_bucket_name}.s3.eu-central-1.amazonaws.com/{first_filename}" + ) + assert res["hits"]["hits"][0]["_source"]["aws"]["s3"]["bucket"]["name"] == s3_bucket_name + assert res["hits"]["hits"][0]["_source"]["aws"]["s3"]["bucket"]["arn"] == f"arn:aws:s3:::{s3_bucket_name}" + assert res["hits"]["hits"][0]["_source"]["aws"]["s3"]["object"]["key"] == first_filename + assert res["hits"]["hits"][0]["_source"]["cloud"]["provider"] == "aws" + assert res["hits"]["hits"][0]["_source"]["cloud"]["region"] == "eu-central-1" + assert res["hits"]["hits"][0]["_source"]["cloud"]["account"]["id"] == "000000000000" + assert res["hits"]["hits"][0]["_source"]["tags"] == ["forwarded", "generic", "tag1", "tag2", "tag3"] + + logstash_message = self.logstash.get_messages(expected=0) + assert len(logstash_message) == 0 + + second_call = handler(events_sqs, ctx) # type:ignore + + assert second_call == "completed" + + self.elasticsearch.refresh(index="logs-generic-default") + res = self.elasticsearch.search( + index="logs-generic-default", + query={ + "bool": { + "must_not": { + "ids": { + "values": [ + f"{prefix_s3_first}-000000000000", + f"{prefix_sqs}-000000000000", + f"{prefix_cloudwatch_logs}-000000000000", + f"{prefix_kinesis}-000000000000", + ] + } + } + } + }, + sort="_seq_no", + ) + + assert res["hits"]["total"] == {"value": 2, "relation": "eq"} + + assert res["hits"]["hits"][1]["_source"]["message"] == fixtures[1].rstrip("\n") + assert res["hits"]["hits"][1]["_source"]["log"]["offset"] == 94 + assert res["hits"]["hits"][1]["_source"]["log"]["file"]["path"] == sqs_queue_url_path + assert res["hits"]["hits"][1]["_source"]["aws"]["sqs"]["name"] == sqs_queue_name + assert res["hits"]["hits"][1]["_source"]["aws"]["sqs"]["message_id"] == message_id + assert res["hits"]["hits"][1]["_source"]["cloud"]["provider"] == "aws" + assert res["hits"]["hits"][1]["_source"]["cloud"]["region"] == "us-east-1" + assert res["hits"]["hits"][1]["_source"]["cloud"]["account"]["id"] == "000000000000" + assert res["hits"]["hits"][1]["_source"]["tags"] == ["forwarded", "generic", "tag1", "tag2", "tag3"] + + logstash_message = self.logstash.get_messages(expected=0) + assert len(logstash_message) == 0 + + third_call = handler(events_cloudwatch_logs, ctx) # type:ignore + + assert third_call == "completed" + + self.elasticsearch.refresh(index="logs-generic-default") + res = self.elasticsearch.search( + index="logs-generic-default", + query={ + "bool": { + "must_not": { + "ids": { + "values": [ + f"{prefix_s3_first}-000000000000", + f"{prefix_sqs}-000000000000", + f"{prefix_cloudwatch_logs}-000000000000", + f"{prefix_kinesis}-000000000000", + ] + } + } + } + }, + sort="_seq_no", + ) + + assert res["hits"]["total"] == {"value": 3, "relation": "eq"} + + assert res["hits"]["hits"][2]["_source"]["message"] == fixtures[1].rstrip("\n") + assert res["hits"]["hits"][2]["_source"]["log"]["offset"] == 94 + assert ( + res["hits"]["hits"][2]["_source"]["log"]["file"]["path"] + == f"{cloudwatch_group_name}/{cloudwatch_stream_name}" + ) + assert res["hits"]["hits"][2]["_source"]["aws"]["cloudwatch"]["log_group"] == cloudwatch_group_name + assert res["hits"]["hits"][2]["_source"]["aws"]["cloudwatch"]["log_stream"] == cloudwatch_stream_name + assert res["hits"]["hits"][2]["_source"]["aws"]["cloudwatch"]["event_id"] == event_ids_cloudwatch_logs[0] + assert res["hits"]["hits"][2]["_source"]["cloud"]["provider"] == "aws" + assert res["hits"]["hits"][2]["_source"]["cloud"]["region"] == "us-east-1" + assert res["hits"]["hits"][2]["_source"]["cloud"]["account"]["id"] == "000000000000" + assert res["hits"]["hits"][2]["_source"]["tags"] == ["forwarded", "generic", "tag1", "tag2", "tag3"] + + logstash_message = self.logstash.get_messages(expected=0) + assert len(logstash_message) == 0 + + fourth_call = handler(events_kinesis, ctx) # type:ignore + + assert fourth_call == "completed" + + self.elasticsearch.refresh(index="logs-generic-default") + res = self.elasticsearch.search( + index="logs-generic-default", + query={ + "bool": { + "must_not": { + "ids": { + "values": [ + f"{prefix_s3_first}-000000000000", + f"{prefix_sqs}-000000000000", + f"{prefix_cloudwatch_logs}-000000000000", + f"{prefix_kinesis}-000000000000", + ] + } + } + } + }, + sort="_seq_no", + ) + + assert res["hits"]["total"] == {"value": 4, "relation": "eq"} + + assert res["hits"]["hits"][3]["_source"]["message"] == fixtures[1].rstrip("\n") + assert res["hits"]["hits"][3]["_source"]["log"]["offset"] == 94 + assert res["hits"]["hits"][3]["_source"]["log"]["file"]["path"] == kinesis_stream_arn + assert res["hits"]["hits"][3]["_source"]["aws"]["kinesis"]["type"] == "stream" + assert res["hits"]["hits"][3]["_source"]["aws"]["kinesis"]["partition_key"] == "PartitionKey" + assert res["hits"]["hits"][3]["_source"]["aws"]["kinesis"]["name"] == kinesis_stream_name + assert ( + res["hits"]["hits"][3]["_source"]["aws"]["kinesis"]["sequence_number"] + == events_kinesis["Records"][0]["kinesis"]["sequenceNumber"] + ) + assert res["hits"]["hits"][3]["_source"]["cloud"]["provider"] == "aws" + assert res["hits"]["hits"][3]["_source"]["cloud"]["region"] == "us-east-1" + assert res["hits"]["hits"][3]["_source"]["cloud"]["account"]["id"] == "000000000000" + assert res["hits"]["hits"][3]["_source"]["tags"] == ["forwarded", "generic", "tag1", "tag2", "tag3"] + + logstash_message = self.logstash.get_messages(expected=0) + assert len(logstash_message) == 0 + + replayed_events, _ = _sqs_get_messages(self.sqs_client, os.environ["SQS_REPLAY_URL"], self.sqs_replay_queue_arn) + with self.assertRaises(ReplayHandlerException): + handler(replayed_events, ctx) # type:ignore + + self.elasticsearch.refresh(index="logs-generic-default") + + # Remove the expected id for s3-sqs so that it can be replayed + self.elasticsearch.delete_by_query( + index="logs-generic-default", body={"query": {"ids": {"values": [f"{prefix_s3_first}-000000000000"]}}} + ) + + # Remove the expected id for sqs so that it can be replayed + self.elasticsearch.delete_by_query( + index="logs-generic-default", body={"query": {"ids": {"values": [f"{prefix_sqs}-000000000000"]}}} + ) + + # Remove the expected id for cloudwatch logs so that it can be replayed + self.elasticsearch.delete_by_query( + index="logs-generic-default", + body={"query": {"ids": {"values": [f"{prefix_cloudwatch_logs}-000000000000"]}}}, + ) + + # Remove the expected id for kinesis data stream so that it can be replayed + self.elasticsearch.delete_by_query( + index="logs-generic-default", + body={"query": {"ids": {"values": [f"{prefix_kinesis}-000000000000"]}}}, + ) + + self.elasticsearch.refresh(index="logs-generic-default") + + # let's update the config file so that logstash won't fail anymore + config_yaml = f""" + inputs: + - type: "kinesis-data-stream" + id: "{kinesis_stream_arn}" + tags: {self.default_tags} + outputs: {self.default_outputs} + - type: "cloudwatch-logs" + id: "{cloudwatch_group_arn}" + tags: {self.default_tags} + outputs: {self.default_outputs} + - type: sqs + id: "{sqs_queue_arn}" + tags: {self.default_tags} + outputs: {self.default_outputs} + - type: s3-sqs + id: "{s3_sqs_queue_arn}" + tags: {self.default_tags} + outputs: {self.default_outputs} + """ + + _s3_upload_content_to_bucket( + client=self.s3_client, + content=config_yaml.encode("utf-8"), + content_type="text/plain", + bucket_name=config_bucket_name, + key=config_file_path, + create_bucket=False, + ) + + ctx = ContextMock(remaining_time_in_millis=_REMAINING_TIME_FORCE_CONTINUE_0ms) + + # implicit wait for the message to be back on the queue + time.sleep(35) + replayed_events, _ = _sqs_get_messages(self.sqs_client, os.environ["SQS_REPLAY_URL"], self.sqs_replay_queue_arn) + fifth_call = handler(replayed_events, ctx) # type:ignore + + assert fifth_call == "replayed" + + self.elasticsearch.refresh(index="logs-generic-default") + assert self.elasticsearch.count(index="logs-generic-default")["count"] == 5 + + self.elasticsearch.refresh(index="logs-generic-default") + res = self.elasticsearch.search(index="logs-generic-default", sort="_seq_no") + + assert res["hits"]["total"] == {"value": 5, "relation": "eq"} + + assert res["hits"]["hits"][4]["_source"]["message"] == fixtures[0].rstrip("\n") + assert res["hits"]["hits"][4]["_source"]["log"]["offset"] == 0 + assert ( + res["hits"]["hits"][4]["_source"]["log"]["file"]["path"] + == f"https://{s3_bucket_name}.s3.eu-central-1.amazonaws.com/{first_filename}" + ) + assert res["hits"]["hits"][4]["_source"]["aws"]["s3"]["bucket"]["name"] == s3_bucket_name + assert res["hits"]["hits"][4]["_source"]["aws"]["s3"]["bucket"]["arn"] == f"arn:aws:s3:::{s3_bucket_name}" + assert res["hits"]["hits"][4]["_source"]["aws"]["s3"]["object"]["key"] == first_filename + assert res["hits"]["hits"][4]["_source"]["cloud"]["provider"] == "aws" + assert res["hits"]["hits"][4]["_source"]["cloud"]["region"] == "eu-central-1" + assert res["hits"]["hits"][4]["_source"]["cloud"]["account"]["id"] == "000000000000" + assert res["hits"]["hits"][4]["_source"]["tags"] == ["forwarded", "generic", "tag1", "tag2", "tag3"] + + logstash_message = self.logstash.get_messages(expected=0) + assert len(logstash_message) == 0 + + # implicit wait for the message to be back on the queue + time.sleep(35) + replayed_events, _ = _sqs_get_messages(self.sqs_client, os.environ["SQS_REPLAY_URL"], self.sqs_replay_queue_arn) + sixth_call = handler(replayed_events, ctx) # type:ignore + + assert sixth_call == "replayed" + + self.elasticsearch.refresh(index="logs-generic-default") + assert self.elasticsearch.count(index="logs-generic-default")["count"] == 5 + + res = self.elasticsearch.search(index="logs-generic-default", sort="_seq_no") + assert res["hits"]["total"] == {"value": 5, "relation": "eq"} + + logstash_message = self.logstash.get_messages(expected=1) + assert len(logstash_message) == 1 + # positions on res["hits"]["hits"] are skewed compared to logstash_message + # in elasticsearch we inserted the second event of each input before the first one + res["hits"]["hits"][4]["_source"]["tags"].remove("generic") + assert res["hits"]["hits"][4]["_source"]["aws"] == logstash_message[0]["aws"] + assert res["hits"]["hits"][4]["_source"]["cloud"] == logstash_message[0]["cloud"] + assert res["hits"]["hits"][4]["_source"]["log"] == logstash_message[0]["log"] + assert res["hits"]["hits"][4]["_source"]["message"] == logstash_message[0]["message"] + assert res["hits"]["hits"][4]["_source"]["tags"] == logstash_message[0]["tags"] + + ctx = ContextMock(remaining_time_in_millis=_OVER_COMPLETION_GRACE_PERIOD_2m) + + # implicit wait for the message to be back on the queue + time.sleep(35) + replayed_events, _ = _sqs_get_messages(self.sqs_client, os.environ["SQS_REPLAY_URL"], self.sqs_replay_queue_arn) + seventh_call = handler(replayed_events, ctx) # type:ignore + + assert seventh_call == "replayed" + + self.elasticsearch.refresh(index="logs-generic-default") + assert self.elasticsearch.count(index="logs-generic-default")["count"] == 8 + + self.elasticsearch.refresh(index="logs-generic-default") + res = self.elasticsearch.search(index="logs-generic-default", sort="_seq_no") + + assert res["hits"]["total"] == {"value": 8, "relation": "eq"} + + assert res["hits"]["hits"][5]["_source"]["message"] == fixtures[0].rstrip("\n") + assert res["hits"]["hits"][5]["_source"]["log"]["offset"] == 0 + assert res["hits"]["hits"][5]["_source"]["log"]["file"]["path"] == sqs_queue_url_path + assert res["hits"]["hits"][5]["_source"]["aws"]["sqs"]["name"] == sqs_queue_name + assert res["hits"]["hits"][5]["_source"]["aws"]["sqs"]["message_id"] == message_id + assert res["hits"]["hits"][5]["_source"]["cloud"]["provider"] == "aws" + assert res["hits"]["hits"][5]["_source"]["cloud"]["region"] == "us-east-1" + assert res["hits"]["hits"][5]["_source"]["cloud"]["account"]["id"] == "000000000000" + assert res["hits"]["hits"][5]["_source"]["tags"] == ["forwarded", "generic", "tag1", "tag2", "tag3"] + + assert res["hits"]["hits"][6]["_source"]["message"] == fixtures[0].rstrip("\n") + assert res["hits"]["hits"][6]["_source"]["log"]["offset"] == 0 + assert ( + res["hits"]["hits"][6]["_source"]["log"]["file"]["path"] + == f"{cloudwatch_group_name}/{cloudwatch_stream_name}" + ) + assert res["hits"]["hits"][6]["_source"]["aws"]["cloudwatch"]["log_group"] == cloudwatch_group_name + assert res["hits"]["hits"][6]["_source"]["aws"]["cloudwatch"]["log_stream"] == cloudwatch_stream_name + assert res["hits"]["hits"][6]["_source"]["aws"]["cloudwatch"]["event_id"] == event_ids_cloudwatch_logs[0] + assert res["hits"]["hits"][6]["_source"]["cloud"]["provider"] == "aws" + assert res["hits"]["hits"][6]["_source"]["cloud"]["region"] == "us-east-1" + assert res["hits"]["hits"][6]["_source"]["cloud"]["account"]["id"] == "000000000000" + assert res["hits"]["hits"][6]["_source"]["tags"] == ["forwarded", "generic", "tag1", "tag2", "tag3"] + + assert res["hits"]["hits"][7]["_source"]["message"] == fixtures[0].rstrip("\n") + assert res["hits"]["hits"][7]["_source"]["log"]["offset"] == 0 + assert res["hits"]["hits"][7]["_source"]["log"]["file"]["path"] == kinesis_stream_arn + assert res["hits"]["hits"][7]["_source"]["aws"]["kinesis"]["type"] == "stream" + assert res["hits"]["hits"][7]["_source"]["aws"]["kinesis"]["partition_key"] == "PartitionKey" + assert res["hits"]["hits"][7]["_source"]["aws"]["kinesis"]["name"] == kinesis_stream_name + assert ( + res["hits"]["hits"][7]["_source"]["aws"]["kinesis"]["sequence_number"] + == events_kinesis["Records"][0]["kinesis"]["sequenceNumber"] + ) + assert res["hits"]["hits"][7]["_source"]["cloud"]["provider"] == "aws" + assert res["hits"]["hits"][7]["_source"]["cloud"]["region"] == "us-east-1" + assert res["hits"]["hits"][7]["_source"]["cloud"]["account"]["id"] == "000000000000" + assert res["hits"]["hits"][7]["_source"]["tags"] == ["forwarded", "generic", "tag1", "tag2", "tag3"] + + logstash_message = self.logstash.get_messages(expected=8) + assert len(logstash_message) == 8 + res["hits"]["hits"][0]["_source"]["tags"].remove("generic") + res["hits"]["hits"][1]["_source"]["tags"].remove("generic") + res["hits"]["hits"][2]["_source"]["tags"].remove("generic") + res["hits"]["hits"][3]["_source"]["tags"].remove("generic") + res["hits"]["hits"][5]["_source"]["tags"].remove("generic") + res["hits"]["hits"][6]["_source"]["tags"].remove("generic") + res["hits"]["hits"][7]["_source"]["tags"].remove("generic") + + # positions on res["hits"]["hits"] are skewed compared to logstash_message + # in elasticsearch we inserted the second event of each input before the first one + assert res["hits"]["hits"][0]["_source"]["aws"] == logstash_message[1]["aws"] + assert res["hits"]["hits"][0]["_source"]["cloud"] == logstash_message[1]["cloud"] + assert res["hits"]["hits"][0]["_source"]["log"] == logstash_message[1]["log"] + assert res["hits"]["hits"][0]["_source"]["message"] == logstash_message[1]["message"] + assert res["hits"]["hits"][0]["_source"]["tags"] == logstash_message[1]["tags"] + + assert res["hits"]["hits"][5]["_source"]["aws"] == logstash_message[2]["aws"] + assert res["hits"]["hits"][5]["_source"]["cloud"] == logstash_message[2]["cloud"] + assert res["hits"]["hits"][5]["_source"]["log"] == logstash_message[2]["log"] + assert res["hits"]["hits"][5]["_source"]["message"] == logstash_message[2]["message"] + assert res["hits"]["hits"][5]["_source"]["tags"] == logstash_message[2]["tags"] + + assert res["hits"]["hits"][1]["_source"]["aws"] == logstash_message[3]["aws"] + assert res["hits"]["hits"][1]["_source"]["cloud"] == logstash_message[3]["cloud"] + assert res["hits"]["hits"][1]["_source"]["log"] == logstash_message[3]["log"] + assert res["hits"]["hits"][1]["_source"]["message"] == logstash_message[3]["message"] + assert res["hits"]["hits"][1]["_source"]["tags"] == logstash_message[3]["tags"] + + assert res["hits"]["hits"][6]["_source"]["aws"] == logstash_message[4]["aws"] + assert res["hits"]["hits"][6]["_source"]["cloud"] == logstash_message[4]["cloud"] + assert res["hits"]["hits"][6]["_source"]["log"] == logstash_message[4]["log"] + assert res["hits"]["hits"][6]["_source"]["message"] == logstash_message[4]["message"] + assert res["hits"]["hits"][6]["_source"]["tags"] == logstash_message[4]["tags"] + + assert res["hits"]["hits"][2]["_source"]["aws"] == logstash_message[5]["aws"] + assert res["hits"]["hits"][2]["_source"]["cloud"] == logstash_message[5]["cloud"] + assert res["hits"]["hits"][2]["_source"]["log"] == logstash_message[5]["log"] + assert res["hits"]["hits"][2]["_source"]["message"] == logstash_message[5]["message"] + assert res["hits"]["hits"][2]["_source"]["tags"] == logstash_message[5]["tags"] + + assert res["hits"]["hits"][7]["_source"]["aws"] == logstash_message[6]["aws"] + assert res["hits"]["hits"][7]["_source"]["cloud"] == logstash_message[6]["cloud"] + assert res["hits"]["hits"][7]["_source"]["log"] == logstash_message[6]["log"] + assert res["hits"]["hits"][7]["_source"]["message"] == logstash_message[6]["message"] + assert res["hits"]["hits"][7]["_source"]["tags"] == logstash_message[6]["tags"] + + assert res["hits"]["hits"][3]["_source"]["aws"] == logstash_message[7]["aws"] + assert res["hits"]["hits"][3]["_source"]["cloud"] == logstash_message[7]["cloud"] + assert res["hits"]["hits"][3]["_source"]["log"] == logstash_message[7]["log"] + assert res["hits"]["hits"][3]["_source"]["message"] == logstash_message[7]["message"] + assert res["hits"]["hits"][3]["_source"]["tags"] == logstash_message[7]["tags"] + + def test_empty(self) -> None: + assert isinstance(self.elasticsearch, ElasticsearchContainer) + assert isinstance(self.logstash, LogstashContainer) + assert isinstance(self.localstack, LocalStackContainer) + + fixtures = [" \n"] # once stripped it is an empty event + + s3_bucket_name = _time_based_id(suffix="test-bucket") + first_filename = "exportedlog/uuid/yyyy-mm-dd-[$LATEST]hash/000000.gz" + _s3_upload_content_to_bucket( + client=self.s3_client, + content=gzip.compress("".join(fixtures).encode("utf-8")), + content_type="application/x-gzip", + bucket_name=s3_bucket_name, + key=first_filename, + ) + + cloudwatch_group_name = _time_based_id(suffix="source-group") + cloudwatch_group = _logs_create_cloudwatch_logs_group(self.logs_client, group_name=cloudwatch_group_name) + + cloudwatch_stream_name = _time_based_id(suffix="source-stream") + _logs_create_cloudwatch_logs_stream( + self.logs_client, group_name=cloudwatch_group_name, stream_name=cloudwatch_stream_name + ) + + _logs_upload_event_to_cloudwatch_logs( + self.logs_client, + group_name=cloudwatch_group_name, + stream_name=cloudwatch_stream_name, + messages_body=["".join(fixtures)], + ) + + cloudwatch_group_arn = cloudwatch_group["arn"] + + cloudwatch_group_name = cloudwatch_group_name + cloudwatch_stream_name = cloudwatch_stream_name + + sqs_queue_name = _time_based_id(suffix="source-sqs") + s3_sqs_queue_name = _time_based_id(suffix="source-s3-sqs") + + sqs_queue = _sqs_create_queue(self.sqs_client, sqs_queue_name, self.localstack.get_url()) + s3_sqs_queue = _sqs_create_queue(self.sqs_client, s3_sqs_queue_name, self.localstack.get_url()) + + sqs_queue_arn = sqs_queue["QueueArn"] + sqs_queue_url = sqs_queue["QueueUrl"] + + s3_sqs_queue_arn = s3_sqs_queue["QueueArn"] + s3_sqs_queue_url = s3_sqs_queue["QueueUrl"] + + _sqs_send_messages(self.sqs_client, sqs_queue_url, "".join(fixtures)) + _sqs_send_s3_notifications(self.sqs_client, s3_sqs_queue_url, s3_bucket_name, [first_filename]) + + kinesis_stream_name = _time_based_id(suffix="source-kinesis") + kinesis_stream = _kinesis_create_stream(self.kinesis_client, kinesis_stream_name) + kinesis_stream_arn = kinesis_stream["StreamDescription"]["StreamARN"] + + _kinesis_put_records(self.kinesis_client, kinesis_stream_name, ["".join(fixtures)]) + + config_yaml: str = f""" + inputs: + - type: "kinesis-data-stream" + id: "{kinesis_stream_arn}" + outputs: {self.default_outputs} + - type: "cloudwatch-logs" + id: "{cloudwatch_group_arn}" + outputs: {self.default_outputs} + - type: sqs + id: "{sqs_queue_arn}" + outputs: {self.default_outputs} + - type: s3-sqs + id: "{s3_sqs_queue_arn}" + outputs: {self.default_outputs} + """ + + config_file_path = "config.yaml" + config_bucket_name = _time_based_id(suffix="config-bucket") + _s3_upload_content_to_bucket( + client=self.s3_client, + content=config_yaml.encode("utf-8"), + content_type="text/plain", + bucket_name=config_bucket_name, + key=config_file_path, + ) + + os.environ["S3_CONFIG_FILE"] = f"s3://{config_bucket_name}/{config_file_path}" + + events_s3, _ = _sqs_get_messages(self.sqs_client, s3_sqs_queue_url, s3_sqs_queue_arn) + + events_sqs, _ = _sqs_get_messages(self.sqs_client, sqs_queue_url, sqs_queue_arn) + + events_cloudwatch_logs, _, _ = _logs_retrieve_event_from_cloudwatch_logs( + self.logs_client, cloudwatch_group_name, cloudwatch_stream_name + ) + + events_kinesis, _ = _kinesis_retrieve_event_from_kinesis_stream( + self.kinesis_client, kinesis_stream_name, kinesis_stream_arn + ) + + ctx = ContextMock(remaining_time_in_millis=_OVER_COMPLETION_GRACE_PERIOD_2m) + first_call = handler(events_s3, ctx) # type:ignore + + assert first_call == "completed" + + self.elasticsearch.refresh(index="logs-generic-default", ignore_unavailable=True) + assert self.elasticsearch.count(index="logs-generic-default", ignore_unavailable=True)["count"] == 0 + + logstash_message = self.logstash.get_messages(expected=0) + assert len(logstash_message) == 0 + + second_call = handler(events_sqs, ctx) # type:ignore + + assert second_call == "completed" + + self.elasticsearch.refresh(index="logs-generic-default", ignore_unavailable=True) + assert self.elasticsearch.count(index="logs-generic-default", ignore_unavailable=True)["count"] == 0 + + logstash_message = self.logstash.get_messages(expected=0) + assert len(logstash_message) == 0 + + third_call = handler(events_cloudwatch_logs, ctx) # type:ignore + + assert third_call == "completed" + + self.elasticsearch.refresh(index="logs-generic-default", ignore_unavailable=True) + assert self.elasticsearch.count(index="logs-generic-default", ignore_unavailable=True)["count"] == 0 + + logstash_message = self.logstash.get_messages(expected=0) + assert len(logstash_message) == 0 + + fourth_call = handler(events_kinesis, ctx) # type:ignore + + assert fourth_call == "completed" + + self.elasticsearch.refresh(index="logs-generic-default", ignore_unavailable=True) + assert self.elasticsearch.count(index="logs-generic-default", ignore_unavailable=True)["count"] == 0 + + logstash_message = self.logstash.get_messages(expected=0) + assert len(logstash_message) == 0 + + def test_filtered(self) -> None: + assert isinstance(self.elasticsearch, ElasticsearchContainer) + assert isinstance(self.logstash, LogstashContainer) + assert isinstance(self.localstack, LocalStackContainer) + + fixtures = ["excluded"] + + s3_bucket_name = _time_based_id(suffix="test-bucket") + first_filename = "exportedlog/uuid/yyyy-mm-dd-[$LATEST]hash/000000.gz" + _s3_upload_content_to_bucket( + client=self.s3_client, + content=gzip.compress("".join(fixtures).encode("utf-8")), + content_type="application/x-gzip", + bucket_name=s3_bucket_name, + key=first_filename, + ) + + cloudwatch_group_name = _time_based_id(suffix="source-group") + cloudwatch_group = _logs_create_cloudwatch_logs_group(self.logs_client, group_name=cloudwatch_group_name) + + cloudwatch_stream_name = _time_based_id(suffix="source-stream") + _logs_create_cloudwatch_logs_stream( + self.logs_client, group_name=cloudwatch_group_name, stream_name=cloudwatch_stream_name + ) + + _logs_upload_event_to_cloudwatch_logs( + self.logs_client, + group_name=cloudwatch_group_name, + stream_name=cloudwatch_stream_name, + messages_body=["".join(fixtures)], + ) + + cloudwatch_group_arn = cloudwatch_group["arn"] + + cloudwatch_group_name = cloudwatch_group_name + cloudwatch_stream_name = cloudwatch_stream_name + + sqs_queue_name = _time_based_id(suffix="source-sqs") + s3_sqs_queue_name = _time_based_id(suffix="source-s3-sqs") + + sqs_queue = _sqs_create_queue(self.sqs_client, sqs_queue_name, self.localstack.get_url()) + s3_sqs_queue = _sqs_create_queue(self.sqs_client, s3_sqs_queue_name, self.localstack.get_url()) + + sqs_queue_arn = sqs_queue["QueueArn"] + sqs_queue_url = sqs_queue["QueueUrl"] + + s3_sqs_queue_arn = s3_sqs_queue["QueueArn"] + s3_sqs_queue_url = s3_sqs_queue["QueueUrl"] + + _sqs_send_messages(self.sqs_client, sqs_queue_url, "".join(fixtures)) + _sqs_send_s3_notifications(self.sqs_client, s3_sqs_queue_url, s3_bucket_name, [first_filename]) + + kinesis_stream_name = _time_based_id(suffix="source-kinesis") + kinesis_stream = _kinesis_create_stream(self.kinesis_client, kinesis_stream_name) + kinesis_stream_arn = kinesis_stream["StreamDescription"]["StreamARN"] + + _kinesis_put_records(self.kinesis_client, kinesis_stream_name, ["".join(fixtures)]) + + config_yaml: str = f""" + inputs: + - type: "kinesis-data-stream" + id: "{kinesis_stream_arn}" + exclude: + - "excluded" + outputs: {self.default_outputs} + - type: "cloudwatch-logs" + id: "{cloudwatch_group_arn}" + exclude: + - "excluded" + outputs: {self.default_outputs} + - type: sqs + id: "{sqs_queue_arn}" + exclude: + - "excluded" + outputs: {self.default_outputs} + - type: s3-sqs + id: "{s3_sqs_queue_arn}" + exclude: + - "excluded" + outputs: {self.default_outputs} + """ + + config_file_path = "config.yaml" + config_bucket_name = _time_based_id(suffix="config-bucket") + _s3_upload_content_to_bucket( + client=self.s3_client, + content=config_yaml.encode("utf-8"), + content_type="text/plain", + bucket_name=config_bucket_name, + key=config_file_path, + ) + + os.environ["S3_CONFIG_FILE"] = f"s3://{config_bucket_name}/{config_file_path}" + + events_s3, _ = _sqs_get_messages(self.sqs_client, s3_sqs_queue_url, s3_sqs_queue_arn) + + events_sqs, _ = _sqs_get_messages(self.sqs_client, sqs_queue_url, sqs_queue_arn) + + events_cloudwatch_logs, _, _ = _logs_retrieve_event_from_cloudwatch_logs( + self.logs_client, cloudwatch_group_name, cloudwatch_stream_name + ) + + events_kinesis, _ = _kinesis_retrieve_event_from_kinesis_stream( + self.kinesis_client, kinesis_stream_name, kinesis_stream_arn + ) + + ctx = ContextMock(remaining_time_in_millis=_OVER_COMPLETION_GRACE_PERIOD_2m) + first_call = handler(events_s3, ctx) # type:ignore + + assert first_call == "completed" + + self.elasticsearch.refresh(index="logs-generic-default", ignore_unavailable=True) + assert self.elasticsearch.count(index="logs-generic-default", ignore_unavailable=True)["count"] == 0 + + logstash_message = self.logstash.get_messages(expected=0) + assert len(logstash_message) == 0 + + second_call = handler(events_sqs, ctx) # type:ignore + + assert second_call == "completed" + + self.elasticsearch.refresh(index="logs-generic-default", ignore_unavailable=True) + assert self.elasticsearch.count(index="logs-generic-default", ignore_unavailable=True)["count"] == 0 + + logstash_message = self.logstash.get_messages(expected=0) + assert len(logstash_message) == 0 + + third_call = handler(events_cloudwatch_logs, ctx) # type:ignore + + assert third_call == "completed" + + self.elasticsearch.refresh(index="logs-generic-default", ignore_unavailable=True) + assert self.elasticsearch.count(index="logs-generic-default", ignore_unavailable=True)["count"] == 0 + + logstash_message = self.logstash.get_messages(expected=0) + assert len(logstash_message) == 0 + + fourth_call = handler(events_kinesis, ctx) # type:ignore + + assert fourth_call == "completed" + + self.elasticsearch.refresh(index="logs-generic-default", ignore_unavailable=True) + assert self.elasticsearch.count(index="logs-generic-default", ignore_unavailable=True)["count"] == 0 + + logstash_message = self.logstash.get_messages(expected=0) + assert len(logstash_message) == 0 + + def test_expand_event_from_list_empty_line(self) -> None: + assert isinstance(self.logstash, LogstashContainer) + assert isinstance(self.localstack, LocalStackContainer) + + first_expanded_event: str = _load_file_fixture("cloudwatch-log-1.json") + second_expanded_event: str = _load_file_fixture("cloudwatch-log-2.json") + third_expanded_event: str = _load_file_fixture("cloudwatch-log-3.json") + + fixtures = [ + f"""{{"aField": [{first_expanded_event},{second_expanded_event}]}}\n""" + f"""\n{{"aField": [{third_expanded_event}]}}""" + ] + + sqs_queue_name = _time_based_id(suffix="source-sqs") + + sqs_queue = _sqs_create_queue(self.sqs_client, sqs_queue_name, self.localstack.get_url()) + + sqs_queue_arn = sqs_queue["QueueArn"] + sqs_queue_url = sqs_queue["QueueUrl"] + sqs_queue_url_path = sqs_queue["QueueUrlPath"] + + _sqs_send_messages(self.sqs_client, sqs_queue_url, "".join(fixtures)) + + config_yaml: str = f""" + inputs: + - type: "sqs" + id: "{sqs_queue_arn}" + expand_event_list_from_field: aField + tags: {self.default_tags} + outputs: + - type: "logstash" + args: + logstash_url: "{self.logstash.get_url()}" + ssl_assert_fingerprint: {self.logstash.ssl_assert_fingerprint} + username: "{self.logstash.logstash_user}" + password: "{self.logstash.logstash_password}" + """ + + config_file_path = "config.yaml" + config_bucket_name = _time_based_id(suffix="config-bucket") + _s3_upload_content_to_bucket( + client=self.s3_client, + content=config_yaml.encode("utf-8"), + content_type="text/plain", + bucket_name=config_bucket_name, + key=config_file_path, + ) + + os.environ["S3_CONFIG_FILE"] = f"s3://{config_bucket_name}/{config_file_path}" + + events_sqs, _ = _sqs_get_messages(self.sqs_client, sqs_queue_url, sqs_queue_arn) + + message_id = events_sqs["Records"][0]["messageId"] + + ctx = ContextMock(remaining_time_in_millis=_OVER_COMPLETION_GRACE_PERIOD_2m) + + first_call = handler(events_sqs, ctx) # type:ignore + + assert first_call == "completed" + + logstash_message = self.logstash.get_messages(expected=3) + assert len(logstash_message) == 3 + + assert logstash_message[0]["message"] == json_dumper(json_parser(first_expanded_event)) + assert logstash_message[0]["log"]["offset"] == 0 + assert logstash_message[0]["log"]["file"]["path"] == sqs_queue_url_path + assert logstash_message[0]["aws"]["sqs"]["name"] == sqs_queue_name + assert logstash_message[0]["aws"]["sqs"]["message_id"] == message_id + assert logstash_message[0]["cloud"]["provider"] == "aws" + assert logstash_message[0]["cloud"]["region"] == "us-east-1" + assert logstash_message[0]["cloud"]["account"]["id"] == "000000000000" + assert logstash_message[0]["tags"] == ["forwarded", "tag1", "tag2", "tag3"] + + assert logstash_message[1]["message"] == json_dumper(json_parser(second_expanded_event)) + assert logstash_message[1]["log"]["offset"] == 174 + assert logstash_message[1]["log"]["file"]["path"] == sqs_queue_url_path + assert logstash_message[1]["aws"]["sqs"]["name"] == sqs_queue_name + assert logstash_message[1]["aws"]["sqs"]["message_id"] == message_id + assert logstash_message[1]["cloud"]["provider"] == "aws" + assert logstash_message[1]["cloud"]["region"] == "us-east-1" + assert logstash_message[1]["cloud"]["account"]["id"] == "000000000000" + assert logstash_message[1]["tags"] == ["forwarded", "tag1", "tag2", "tag3"] + + assert logstash_message[2]["message"] == json_dumper(json_parser(third_expanded_event)) + assert logstash_message[2]["log"]["offset"] == 349 + assert logstash_message[2]["log"]["file"]["path"] == sqs_queue_url_path + assert logstash_message[2]["aws"]["sqs"]["name"] == sqs_queue_name + assert logstash_message[2]["aws"]["sqs"]["message_id"] == message_id + assert logstash_message[2]["cloud"]["provider"] == "aws" + assert logstash_message[2]["cloud"]["region"] == "us-east-1" + assert logstash_message[2]["cloud"]["account"]["id"] == "000000000000" + assert logstash_message[2]["tags"] == ["forwarded", "tag1", "tag2", "tag3"] + + def test_expand_event_from_list_empty_event_not_expanded(self) -> None: + assert isinstance(self.logstash, LogstashContainer) + assert isinstance(self.localstack, LocalStackContainer) + + first_expanded_event: str = _load_file_fixture("cloudwatch-log-1.json") + second_expanded_event: str = _load_file_fixture("cloudwatch-log-2.json") + + fixtures = [f"""{{"aField": [{first_expanded_event},"",{second_expanded_event}]}}"""] + + sqs_queue_name = _time_based_id(suffix="source-sqs") + + sqs_queue = _sqs_create_queue(self.sqs_client, sqs_queue_name, self.localstack.get_url()) + + sqs_queue_arn = sqs_queue["QueueArn"] + sqs_queue_url = sqs_queue["QueueUrl"] + sqs_queue_url_path = sqs_queue["QueueUrlPath"] + + _sqs_send_messages(self.sqs_client, sqs_queue_url, "".join(fixtures)) + + config_yaml: str = f""" + inputs: + - type: "sqs" + id: "{sqs_queue_arn}" + expand_event_list_from_field: aField + tags: {self.default_tags} + outputs: + - type: "logstash" + args: + logstash_url: "{self.logstash.get_url()}" + ssl_assert_fingerprint: {self.logstash.ssl_assert_fingerprint} + username: "{self.logstash.logstash_user}" + password: "{self.logstash.logstash_password}" + """ + + config_file_path = "config.yaml" + config_bucket_name = _time_based_id(suffix="config-bucket") + _s3_upload_content_to_bucket( + client=self.s3_client, + content=config_yaml.encode("utf-8"), + content_type="text/plain", + bucket_name=config_bucket_name, + key=config_file_path, + ) + + os.environ["S3_CONFIG_FILE"] = f"s3://{config_bucket_name}/{config_file_path}" + + events_sqs, _ = _sqs_get_messages(self.sqs_client, sqs_queue_url, sqs_queue_arn) + + message_id = events_sqs["Records"][0]["messageId"] + + ctx = ContextMock(remaining_time_in_millis=_OVER_COMPLETION_GRACE_PERIOD_2m) + + first_call = handler(events_sqs, ctx) # type:ignore + + assert first_call == "completed" + + logstash_message = self.logstash.get_messages(expected=2) + assert len(logstash_message) == 2 + + assert logstash_message[0]["message"] == json_dumper(json_parser(first_expanded_event)) + assert logstash_message[0]["log"]["offset"] == 0 + assert logstash_message[0]["log"]["file"]["path"] == sqs_queue_url_path + assert logstash_message[0]["aws"]["sqs"]["name"] == sqs_queue_name + assert logstash_message[0]["aws"]["sqs"]["message_id"] == message_id + assert logstash_message[0]["cloud"]["provider"] == "aws" + assert logstash_message[0]["cloud"]["region"] == "us-east-1" + assert logstash_message[0]["cloud"]["account"]["id"] == "000000000000" + assert logstash_message[0]["tags"] == ["forwarded", "tag1", "tag2", "tag3"] + + assert logstash_message[1]["message"] == json_dumper(json_parser(second_expanded_event)) + assert logstash_message[1]["log"]["offset"] == 233 + assert logstash_message[1]["log"]["file"]["path"] == sqs_queue_url_path + assert logstash_message[1]["aws"]["sqs"]["name"] == sqs_queue_name + assert logstash_message[1]["aws"]["sqs"]["message_id"] == message_id + assert logstash_message[1]["cloud"]["provider"] == "aws" + assert logstash_message[1]["cloud"]["region"] == "us-east-1" + assert logstash_message[1]["cloud"]["account"]["id"] == "000000000000" + assert logstash_message[1]["tags"] == ["forwarded", "tag1", "tag2", "tag3"] + + def test_root_fields_to_add_to_expanded_event_no_dict_event(self) -> None: + assert isinstance(self.logstash, LogstashContainer) + assert isinstance(self.localstack, LocalStackContainer) + + first_expanded_event: str = '"first_expanded_event"' + second_expanded_event: str = '"second_expanded_event"' + third_expanded_event: str = '"third_expanded_event"' + + fixtures = [ + f"""{{"firstRootField": "firstRootField", "secondRootField":"secondRootField", + "aField": [{first_expanded_event},{second_expanded_event},{third_expanded_event}]}}""" + ] + + sqs_queue_name = _time_based_id(suffix="source-sqs") + + sqs_queue = _sqs_create_queue(self.sqs_client, sqs_queue_name, self.localstack.get_url()) + + sqs_queue_arn = sqs_queue["QueueArn"] + sqs_queue_url = sqs_queue["QueueUrl"] + sqs_queue_url_path = sqs_queue["QueueUrlPath"] + + _sqs_send_messages(self.sqs_client, sqs_queue_url, "".join(fixtures)) + + config_yaml: str = f""" + inputs: + - type: "sqs" + id: "{sqs_queue_arn}" + expand_event_list_from_field: aField + root_fields_to_add_to_expanded_event: ["secondRootField"] + tags: {self.default_tags} + outputs: + - type: "logstash" + args: + logstash_url: "{self.logstash.get_url()}" + ssl_assert_fingerprint: {self.logstash.ssl_assert_fingerprint} + username: "{self.logstash.logstash_user}" + password: "{self.logstash.logstash_password}" + """ + + config_file_path = "config.yaml" + config_bucket_name = _time_based_id(suffix="config-bucket") + _s3_upload_content_to_bucket( + client=self.s3_client, + content=config_yaml.encode("utf-8"), + content_type="text/plain", + bucket_name=config_bucket_name, + key=config_file_path, + ) + + os.environ["S3_CONFIG_FILE"] = f"s3://{config_bucket_name}/{config_file_path}" + + events_sqs, _ = _sqs_get_messages(self.sqs_client, sqs_queue_url, sqs_queue_arn) + + message_id = events_sqs["Records"][0]["messageId"] + + ctx = ContextMock() + first_call = handler(events_sqs, ctx) # type:ignore + + assert first_call == "continuing" + + logstash_message = self.logstash.get_messages(expected=1) + assert len(logstash_message) == 1 + + assert logstash_message[0]["message"] == first_expanded_event + assert logstash_message[0]["log"]["offset"] == 0 + assert logstash_message[0]["log"]["file"]["path"] == sqs_queue_url_path + assert logstash_message[0]["aws"]["sqs"]["name"] == sqs_queue_name + assert logstash_message[0]["aws"]["sqs"]["message_id"] == message_id + assert logstash_message[0]["cloud"]["provider"] == "aws" + assert logstash_message[0]["cloud"]["region"] == "us-east-1" + assert logstash_message[0]["cloud"]["account"]["id"] == "000000000000" + assert logstash_message[0]["tags"] == ["forwarded", "tag1", "tag2", "tag3"] + + ctx = ContextMock(remaining_time_in_millis=_OVER_COMPLETION_GRACE_PERIOD_2m) + + continued_events, _ = _sqs_get_messages( + self.sqs_client, os.environ["SQS_CONTINUE_URL"], self.sqs_continue_queue_arn + ) + second_call = handler(continued_events, ctx) # type:ignore + + assert second_call == "completed" + + logstash_message = self.logstash.get_messages(expected=3) + assert len(logstash_message) == 3 + + assert logstash_message[1]["message"] == second_expanded_event + assert logstash_message[1]["log"]["offset"] == 56 + assert logstash_message[1]["log"]["file"]["path"] == sqs_queue_url_path + assert logstash_message[1]["aws"]["sqs"]["name"] == sqs_queue_name + assert logstash_message[1]["aws"]["sqs"]["message_id"] == message_id + assert logstash_message[1]["cloud"]["provider"] == "aws" + assert logstash_message[1]["cloud"]["region"] == "us-east-1" + assert logstash_message[1]["cloud"]["account"]["id"] == "000000000000" + assert logstash_message[1]["tags"] == ["forwarded", "tag1", "tag2", "tag3"] + + assert logstash_message[2]["message"] == third_expanded_event + assert logstash_message[2]["log"]["offset"] == 112 + assert logstash_message[2]["log"]["file"]["path"] == sqs_queue_url_path + assert logstash_message[2]["aws"]["sqs"]["name"] == sqs_queue_name + assert logstash_message[2]["aws"]["sqs"]["message_id"] == message_id + assert logstash_message[2]["cloud"]["provider"] == "aws" + assert logstash_message[2]["cloud"]["region"] == "us-east-1" + assert logstash_message[2]["cloud"]["account"]["id"] == "000000000000" + assert logstash_message[2]["tags"] == ["forwarded", "tag1", "tag2", "tag3"] + + def test_root_fields_to_add_to_expanded_event_event_not_expanded(self) -> None: + assert isinstance(self.logstash, LogstashContainer) + assert isinstance(self.localstack, LocalStackContainer) + + first_expanded_event: str = _load_file_fixture("cloudwatch-log-1.json") + first_expanded_with_root_fields: dict[str, Any] = json_parser(first_expanded_event) + first_expanded_with_root_fields["secondRootField"] = "secondRootField" + + second_expanded_event: str = _load_file_fixture("cloudwatch-log-3.json") + second_expanded_with_root_fields: dict[str, Any] = json_parser(second_expanded_event) + second_expanded_with_root_fields["secondRootField"] = "secondRootField" + + fixtures = [ + f"""{{"firstRootField": "firstRootField", "secondRootField":"secondRootField", + "aField": [{first_expanded_event},{{}},{second_expanded_event}]}}""" + ] + + sqs_queue_name = _time_based_id(suffix="source-sqs") + + sqs_queue = _sqs_create_queue(self.sqs_client, sqs_queue_name, self.localstack.get_url()) + + sqs_queue_arn = sqs_queue["QueueArn"] + sqs_queue_url = sqs_queue["QueueUrl"] + sqs_queue_url_path = sqs_queue["QueueUrlPath"] + + _sqs_send_messages(self.sqs_client, sqs_queue_url, "".join(fixtures)) + + config_yaml: str = f""" + inputs: + - type: "sqs" + id: "{sqs_queue_arn}" + expand_event_list_from_field: aField + root_fields_to_add_to_expanded_event: ["secondRootField"] + tags: {self.default_tags} + outputs: + - type: "logstash" + args: + logstash_url: "{self.logstash.get_url()}" + ssl_assert_fingerprint: {self.logstash.ssl_assert_fingerprint} + username: "{self.logstash.logstash_user}" + password: "{self.logstash.logstash_password}" + """ + + config_file_path = "config.yaml" + config_bucket_name = _time_based_id(suffix="config-bucket") + _s3_upload_content_to_bucket( + client=self.s3_client, + content=config_yaml.encode("utf-8"), + content_type="text/plain", + bucket_name=config_bucket_name, + key=config_file_path, + ) + + os.environ["S3_CONFIG_FILE"] = f"s3://{config_bucket_name}/{config_file_path}" + + events_sqs, _ = _sqs_get_messages(self.sqs_client, sqs_queue_url, sqs_queue_arn) + + message_id = events_sqs["Records"][0]["messageId"] + + ctx = ContextMock(remaining_time_in_millis=_OVER_COMPLETION_GRACE_PERIOD_2m) + + first_call = handler(events_sqs, ctx) # type:ignore + + assert first_call == "completed" + + logstash_message = self.logstash.get_messages(expected=2) + assert len(logstash_message) == 2 + + assert logstash_message[0]["message"] == json_dumper(first_expanded_with_root_fields) + assert logstash_message[0]["log"]["offset"] == 0 + assert logstash_message[0]["log"]["file"]["path"] == sqs_queue_url_path + assert logstash_message[0]["aws"]["sqs"]["name"] == sqs_queue_name + assert logstash_message[0]["aws"]["sqs"]["message_id"] == message_id + assert logstash_message[0]["cloud"]["provider"] == "aws" + assert logstash_message[0]["cloud"]["region"] == "us-east-1" + assert logstash_message[0]["cloud"]["account"]["id"] == "000000000000" + assert logstash_message[0]["tags"] == ["forwarded", "tag1", "tag2", "tag3"] + + assert logstash_message[1]["message"] == json_dumper(second_expanded_with_root_fields) + assert logstash_message[1]["log"]["offset"] == 180 + assert logstash_message[1]["log"]["file"]["path"] == sqs_queue_url_path + assert logstash_message[1]["aws"]["sqs"]["name"] == sqs_queue_name + assert logstash_message[1]["aws"]["sqs"]["message_id"] == message_id + assert logstash_message[1]["cloud"]["provider"] == "aws" + assert logstash_message[1]["cloud"]["region"] == "us-east-1" + assert logstash_message[1]["cloud"]["account"]["id"] == "000000000000" + assert logstash_message[1]["tags"] == ["forwarded", "tag1", "tag2", "tag3"] + + def test_root_fields_to_add_to_expanded_event_list(self) -> None: + assert isinstance(self.logstash, LogstashContainer) + assert isinstance(self.localstack, LocalStackContainer) + + first_expanded_event: str = _load_file_fixture("cloudwatch-log-1.json") + first_expanded_with_root_fields: dict[str, Any] = json_parser(first_expanded_event) + first_expanded_with_root_fields["secondRootField"] = "secondRootField" + + second_expanded_event: str = _load_file_fixture("cloudwatch-log-3.json") + second_expanded_with_root_fields: dict[str, Any] = json_parser(second_expanded_event) + second_expanded_with_root_fields["secondRootField"] = "secondRootField" + + third_expanded_event: str = _load_file_fixture("cloudwatch-log-3.json") + third_expanded_event_with_root_fields: dict[str, Any] = json_parser(third_expanded_event) + third_expanded_event_with_root_fields["secondRootField"] = "secondRootField" + + fixtures = [ + f"""{{"firstRootField": "firstRootField", "secondRootField":"secondRootField", + "aField": [{first_expanded_event},{second_expanded_event},{third_expanded_event}]}}""" + ] + + sqs_queue_name = _time_based_id(suffix="source-sqs") + + sqs_queue = _sqs_create_queue(self.sqs_client, sqs_queue_name, self.localstack.get_url()) + + sqs_queue_arn = sqs_queue["QueueArn"] + sqs_queue_url = sqs_queue["QueueUrl"] + sqs_queue_url_path = sqs_queue["QueueUrlPath"] + + _sqs_send_messages(self.sqs_client, sqs_queue_url, "".join(fixtures)) + + config_yaml: str = f""" + inputs: + - type: "sqs" + id: "{sqs_queue_arn}" + expand_event_list_from_field: aField + root_fields_to_add_to_expanded_event: ["secondRootField"] + tags: {self.default_tags} + outputs: + - type: "logstash" + args: + logstash_url: "{self.logstash.get_url()}" + ssl_assert_fingerprint: {self.logstash.ssl_assert_fingerprint} + username: "{self.logstash.logstash_user}" + password: "{self.logstash.logstash_password}" + """ + + config_file_path = "config.yaml" + config_bucket_name = _time_based_id(suffix="config-bucket") + _s3_upload_content_to_bucket( + client=self.s3_client, + content=config_yaml.encode("utf-8"), + content_type="text/plain", + bucket_name=config_bucket_name, + key=config_file_path, + ) + + os.environ["S3_CONFIG_FILE"] = f"s3://{config_bucket_name}/{config_file_path}" + + events_sqs, _ = _sqs_get_messages(self.sqs_client, sqs_queue_url, sqs_queue_arn) + + message_id = events_sqs["Records"][0]["messageId"] + + ctx = ContextMock() + first_call = handler(events_sqs, ctx) # type:ignore + + assert first_call == "continuing" + + logstash_message = self.logstash.get_messages(expected=1) + assert len(logstash_message) == 1 + + assert logstash_message[0]["message"] == json_dumper(first_expanded_with_root_fields) + assert logstash_message[0]["log"]["offset"] == 0 + assert logstash_message[0]["log"]["file"]["path"] == sqs_queue_url_path + assert logstash_message[0]["aws"]["sqs"]["name"] == sqs_queue_name + assert logstash_message[0]["aws"]["sqs"]["message_id"] == message_id + assert logstash_message[0]["cloud"]["provider"] == "aws" + assert logstash_message[0]["cloud"]["region"] == "us-east-1" + assert logstash_message[0]["cloud"]["account"]["id"] == "000000000000" + assert logstash_message[0]["tags"] == ["forwarded", "tag1", "tag2", "tag3"] + + ctx = ContextMock(remaining_time_in_millis=_OVER_COMPLETION_GRACE_PERIOD_2m) + + continued_events, _ = _sqs_get_messages( + self.sqs_client, os.environ["SQS_CONTINUE_URL"], self.sqs_continue_queue_arn + ) + second_call = handler(continued_events, ctx) # type:ignore + + assert second_call == "completed" + + logstash_message = self.logstash.get_messages(expected=3) + assert len(logstash_message) == 3 + + assert logstash_message[1]["message"] == json_dumper(second_expanded_with_root_fields) + assert logstash_message[1]["log"]["offset"] == 114 + assert logstash_message[1]["log"]["file"]["path"] == sqs_queue_url_path + assert logstash_message[1]["aws"]["sqs"]["name"] == sqs_queue_name + assert logstash_message[1]["aws"]["sqs"]["message_id"] == message_id + assert logstash_message[1]["cloud"]["provider"] == "aws" + assert logstash_message[1]["cloud"]["region"] == "us-east-1" + assert logstash_message[1]["cloud"]["account"]["id"] == "000000000000" + assert logstash_message[1]["tags"] == ["forwarded", "tag1", "tag2", "tag3"] + + assert logstash_message[2]["message"] == json_dumper(third_expanded_event_with_root_fields) + assert logstash_message[2]["log"]["offset"] == 228 + assert logstash_message[2]["log"]["file"]["path"] == sqs_queue_url_path + assert logstash_message[2]["aws"]["sqs"]["name"] == sqs_queue_name + assert logstash_message[2]["aws"]["sqs"]["message_id"] == message_id + assert logstash_message[2]["cloud"]["provider"] == "aws" + assert logstash_message[2]["cloud"]["region"] == "us-east-1" + assert logstash_message[2]["cloud"]["account"]["id"] == "000000000000" + assert logstash_message[2]["tags"] == ["forwarded", "tag1", "tag2", "tag3"] + + def test_root_fields_to_add_to_expanded_event_list_no_fields_in_root(self) -> None: + assert isinstance(self.logstash, LogstashContainer) + assert isinstance(self.localstack, LocalStackContainer) + + first_expanded_event: str = _load_file_fixture("cloudwatch-log-1.json") + first_expanded_with_root_fields: dict[str, Any] = json_parser(first_expanded_event) + first_expanded_with_root_fields["secondRootField"] = "secondRootField" + + second_expanded_event: str = _load_file_fixture("cloudwatch-log-3.json") + second_expanded_with_root_fields: dict[str, Any] = json_parser(second_expanded_event) + second_expanded_with_root_fields["secondRootField"] = "secondRootField" + + third_expanded_event: str = _load_file_fixture("cloudwatch-log-3.json") + third_expanded_event_with_root_fields: dict[str, Any] = json_parser(third_expanded_event) + third_expanded_event_with_root_fields["secondRootField"] = "secondRootField" + + fixtures = [ + f"""{{"firstRootField": "firstRootField", "secondRootField":"secondRootField", + "aField": [{first_expanded_event},{second_expanded_event},{third_expanded_event}]}}""" + ] + + sqs_queue_name = _time_based_id(suffix="source-sqs") + + sqs_queue = _sqs_create_queue(self.sqs_client, sqs_queue_name, self.localstack.get_url()) + + sqs_queue_arn = sqs_queue["QueueArn"] + sqs_queue_url = sqs_queue["QueueUrl"] + sqs_queue_url_path = sqs_queue["QueueUrlPath"] + + _sqs_send_messages(self.sqs_client, sqs_queue_url, "".join(fixtures)) + + config_yaml: str = f""" + inputs: + - type: "sqs" + id: "{sqs_queue_arn}" + expand_event_list_from_field: aField + root_fields_to_add_to_expanded_event: ["secondRootField", "thirdRootField"] + tags: {self.default_tags} + outputs: + - type: "logstash" + args: + logstash_url: "{self.logstash.get_url()}" + ssl_assert_fingerprint: {self.logstash.ssl_assert_fingerprint} + username: "{self.logstash.logstash_user}" + password: "{self.logstash.logstash_password}" + """ + + config_file_path = "config.yaml" + config_bucket_name = _time_based_id(suffix="config-bucket") + _s3_upload_content_to_bucket( + client=self.s3_client, + content=config_yaml.encode("utf-8"), + content_type="text/plain", + bucket_name=config_bucket_name, + key=config_file_path, + ) + + os.environ["S3_CONFIG_FILE"] = f"s3://{config_bucket_name}/{config_file_path}" + + events_sqs, _ = _sqs_get_messages(self.sqs_client, sqs_queue_url, sqs_queue_arn) + + message_id = events_sqs["Records"][0]["messageId"] + + ctx = ContextMock() + first_call = handler(events_sqs, ctx) # type:ignore + + assert first_call == "continuing" + + logstash_message = self.logstash.get_messages(expected=1) + assert len(logstash_message) == 1 + + assert logstash_message[0]["message"] == json_dumper(first_expanded_with_root_fields) + assert logstash_message[0]["log"]["offset"] == 0 + assert logstash_message[0]["log"]["file"]["path"] == sqs_queue_url_path + assert logstash_message[0]["aws"]["sqs"]["name"] == sqs_queue_name + assert logstash_message[0]["aws"]["sqs"]["message_id"] == message_id + assert logstash_message[0]["cloud"]["provider"] == "aws" + assert logstash_message[0]["cloud"]["region"] == "us-east-1" + assert logstash_message[0]["cloud"]["account"]["id"] == "000000000000" + assert logstash_message[0]["tags"] == ["forwarded", "tag1", "tag2", "tag3"] + + ctx = ContextMock(remaining_time_in_millis=_OVER_COMPLETION_GRACE_PERIOD_2m) + + continued_events, _ = _sqs_get_messages( + self.sqs_client, os.environ["SQS_CONTINUE_URL"], self.sqs_continue_queue_arn + ) + second_call = handler(continued_events, ctx) # type:ignore + + assert second_call == "completed" + + logstash_message = self.logstash.get_messages(expected=3) + assert len(logstash_message) == 3 + + assert logstash_message[1]["message"] == json_dumper(second_expanded_with_root_fields) + assert logstash_message[1]["log"]["offset"] == 114 + assert logstash_message[1]["log"]["file"]["path"] == sqs_queue_url_path + assert logstash_message[1]["aws"]["sqs"]["name"] == sqs_queue_name + assert logstash_message[1]["aws"]["sqs"]["message_id"] == message_id + assert logstash_message[1]["cloud"]["provider"] == "aws" + assert logstash_message[1]["cloud"]["region"] == "us-east-1" + assert logstash_message[1]["cloud"]["account"]["id"] == "000000000000" + assert logstash_message[1]["tags"] == ["forwarded", "tag1", "tag2", "tag3"] + + assert logstash_message[2]["message"] == json_dumper(third_expanded_event_with_root_fields) + assert logstash_message[2]["log"]["offset"] == 228 + assert logstash_message[2]["log"]["file"]["path"] == sqs_queue_url_path + assert logstash_message[2]["aws"]["sqs"]["name"] == sqs_queue_name + assert logstash_message[2]["aws"]["sqs"]["message_id"] == message_id + assert logstash_message[2]["cloud"]["provider"] == "aws" + assert logstash_message[2]["cloud"]["region"] == "us-east-1" + assert logstash_message[2]["cloud"]["account"]["id"] == "000000000000" + assert logstash_message[2]["tags"] == ["forwarded", "tag1", "tag2", "tag3"] + + def test_root_fields_to_add_to_expanded_event_all(self) -> None: + assert isinstance(self.logstash, LogstashContainer) + assert isinstance(self.localstack, LocalStackContainer) + + first_expanded_event: str = _load_file_fixture("cloudwatch-log-1.json") + first_expanded_with_root_fields: dict[str, Any] = json_parser(first_expanded_event) + first_expanded_with_root_fields["firstRootField"] = "firstRootField" + first_expanded_with_root_fields["secondRootField"] = "secondRootField" + + second_expanded_event: str = _load_file_fixture("cloudwatch-log-3.json") + second_expanded_with_root_fields: dict[str, Any] = json_parser(second_expanded_event) + second_expanded_with_root_fields["firstRootField"] = "firstRootField" + second_expanded_with_root_fields["secondRootField"] = "secondRootField" + + third_expanded_event: str = _load_file_fixture("cloudwatch-log-3.json") + third_expanded_event_with_root_fields: dict[str, Any] = json_parser(third_expanded_event) + third_expanded_event_with_root_fields["firstRootField"] = "firstRootField" + third_expanded_event_with_root_fields["secondRootField"] = "secondRootField" + + fixtures = [ + f"""{{"firstRootField": "firstRootField", "secondRootField":"secondRootField", + "aField": [{first_expanded_event},{second_expanded_event},{third_expanded_event}]}}""" + ] + + sqs_queue_name = _time_based_id(suffix="source-sqs") + + sqs_queue = _sqs_create_queue(self.sqs_client, sqs_queue_name, self.localstack.get_url()) + + sqs_queue_arn = sqs_queue["QueueArn"] + sqs_queue_url = sqs_queue["QueueUrl"] + sqs_queue_url_path = sqs_queue["QueueUrlPath"] + + _sqs_send_messages(self.sqs_client, sqs_queue_url, "".join(fixtures)) + + config_yaml: str = f""" + inputs: + - type: "sqs" + id: "{sqs_queue_arn}" + expand_event_list_from_field: aField + root_fields_to_add_to_expanded_event: all + tags: {self.default_tags} + outputs: + - type: "logstash" + args: + logstash_url: "{self.logstash.get_url()}" + ssl_assert_fingerprint: {self.logstash.ssl_assert_fingerprint} + username: "{self.logstash.logstash_user}" + password: "{self.logstash.logstash_password}" + """ + + config_file_path = "config.yaml" + config_bucket_name = _time_based_id(suffix="config-bucket") + _s3_upload_content_to_bucket( + client=self.s3_client, + content=config_yaml.encode("utf-8"), + content_type="text/plain", + bucket_name=config_bucket_name, + key=config_file_path, + ) + + os.environ["S3_CONFIG_FILE"] = f"s3://{config_bucket_name}/{config_file_path}" + + events_sqs, _ = _sqs_get_messages(self.sqs_client, sqs_queue_url, sqs_queue_arn) + + message_id = events_sqs["Records"][0]["messageId"] + + ctx = ContextMock() + first_call = handler(events_sqs, ctx) # type:ignore + + assert first_call == "continuing" + + logstash_message = self.logstash.get_messages(expected=1) + assert len(logstash_message) == 1 + + assert logstash_message[0]["message"] == json_dumper(first_expanded_with_root_fields) + assert logstash_message[0]["log"]["offset"] == 0 + assert logstash_message[0]["log"]["file"]["path"] == sqs_queue_url_path + assert logstash_message[0]["aws"]["sqs"]["name"] == sqs_queue_name + assert logstash_message[0]["aws"]["sqs"]["message_id"] == message_id + assert logstash_message[0]["cloud"]["provider"] == "aws" + assert logstash_message[0]["cloud"]["region"] == "us-east-1" + assert logstash_message[0]["cloud"]["account"]["id"] == "000000000000" + assert logstash_message[0]["tags"] == ["forwarded", "tag1", "tag2", "tag3"] + + ctx = ContextMock(remaining_time_in_millis=_OVER_COMPLETION_GRACE_PERIOD_2m) + + continued_events, _ = _sqs_get_messages( + self.sqs_client, os.environ["SQS_CONTINUE_URL"], self.sqs_continue_queue_arn + ) + second_call = handler(continued_events, ctx) # type:ignore + + assert second_call == "completed" + + logstash_message = self.logstash.get_messages(expected=3) + assert len(logstash_message) == 3 + + assert logstash_message[1]["message"] == json_dumper(second_expanded_with_root_fields) + assert logstash_message[1]["log"]["offset"] == 114 + assert logstash_message[1]["log"]["file"]["path"] == sqs_queue_url_path + assert logstash_message[1]["aws"]["sqs"]["name"] == sqs_queue_name + assert logstash_message[1]["aws"]["sqs"]["message_id"] == message_id + assert logstash_message[1]["cloud"]["provider"] == "aws" + assert logstash_message[1]["cloud"]["region"] == "us-east-1" + assert logstash_message[1]["cloud"]["account"]["id"] == "000000000000" + assert logstash_message[1]["tags"] == ["forwarded", "tag1", "tag2", "tag3"] + + assert logstash_message[2]["message"] == json_dumper(third_expanded_event_with_root_fields) + assert logstash_message[2]["log"]["offset"] == 228 + assert logstash_message[2]["log"]["file"]["path"] == sqs_queue_url_path + assert logstash_message[2]["aws"]["sqs"]["name"] == sqs_queue_name + assert logstash_message[2]["aws"]["sqs"]["message_id"] == message_id + assert logstash_message[2]["cloud"]["provider"] == "aws" + assert logstash_message[2]["cloud"]["region"] == "us-east-1" + assert logstash_message[2]["cloud"]["account"]["id"] == "000000000000" + assert logstash_message[2]["tags"] == ["forwarded", "tag1", "tag2", "tag3"] + + def test_root_fields_to_add_to_expanded_event_all_no_fields_in_root(self) -> None: + assert isinstance(self.logstash, LogstashContainer) + assert isinstance(self.localstack, LocalStackContainer) + + first_expanded_event: str = _load_file_fixture("cloudwatch-log-1.json") + first_expanded_with_root_fields: dict[str, Any] = json_parser(first_expanded_event) + + second_expanded_event: str = _load_file_fixture("cloudwatch-log-3.json") + second_expanded_with_root_fields: dict[str, Any] = json_parser(second_expanded_event) + + third_expanded_event: str = _load_file_fixture("cloudwatch-log-3.json") + third_expanded_event_with_root_fields: dict[str, Any] = json_parser(third_expanded_event) + + fixtures = [f"""{{"aField": [{first_expanded_event},{second_expanded_event},{third_expanded_event}]}}"""] + + sqs_queue_name = _time_based_id(suffix="source-sqs") + + sqs_queue = _sqs_create_queue(self.sqs_client, sqs_queue_name, self.localstack.get_url()) + + sqs_queue_arn = sqs_queue["QueueArn"] + sqs_queue_url = sqs_queue["QueueUrl"] + sqs_queue_url_path = sqs_queue["QueueUrlPath"] + + _sqs_send_messages(self.sqs_client, sqs_queue_url, "".join(fixtures)) + + config_yaml: str = f""" + inputs: + - type: "sqs" + id: "{sqs_queue_arn}" + expand_event_list_from_field: aField + root_fields_to_add_to_expanded_event: all + tags: {self.default_tags} + outputs: + - type: "logstash" + args: + logstash_url: "{self.logstash.get_url()}" + ssl_assert_fingerprint: {self.logstash.ssl_assert_fingerprint} + username: "{self.logstash.logstash_user}" + password: "{self.logstash.logstash_password}" + """ + + config_file_path = "config.yaml" + config_bucket_name = _time_based_id(suffix="config-bucket") + _s3_upload_content_to_bucket( + client=self.s3_client, + content=config_yaml.encode("utf-8"), + content_type="text/plain", + bucket_name=config_bucket_name, + key=config_file_path, + ) + + os.environ["S3_CONFIG_FILE"] = f"s3://{config_bucket_name}/{config_file_path}" + + events_sqs, _ = _sqs_get_messages(self.sqs_client, sqs_queue_url, sqs_queue_arn) + + message_id = events_sqs["Records"][0]["messageId"] + + ctx = ContextMock() + first_call = handler(events_sqs, ctx) # type:ignore + + assert first_call == "continuing" + + logstash_message = self.logstash.get_messages(expected=1) + assert len(logstash_message) == 1 + + assert logstash_message[0]["message"] == json_dumper(first_expanded_with_root_fields) + assert logstash_message[0]["log"]["offset"] == 0 + assert logstash_message[0]["log"]["file"]["path"] == sqs_queue_url_path + assert logstash_message[0]["aws"]["sqs"]["name"] == sqs_queue_name + assert logstash_message[0]["aws"]["sqs"]["message_id"] == message_id + assert logstash_message[0]["cloud"]["provider"] == "aws" + assert logstash_message[0]["cloud"]["region"] == "us-east-1" + assert logstash_message[0]["cloud"]["account"]["id"] == "000000000000" + assert logstash_message[0]["tags"] == ["forwarded", "tag1", "tag2", "tag3"] + + ctx = ContextMock(remaining_time_in_millis=_OVER_COMPLETION_GRACE_PERIOD_2m) + + continued_events, _ = _sqs_get_messages( + self.sqs_client, os.environ["SQS_CONTINUE_URL"], self.sqs_continue_queue_arn + ) + second_call = handler(continued_events, ctx) # type:ignore + + assert second_call == "completed" + + logstash_message = self.logstash.get_messages(expected=3) + assert len(logstash_message) == 3 + + assert logstash_message[1]["message"] == json_dumper(second_expanded_with_root_fields) + assert logstash_message[1]["log"]["offset"] == 86 + assert logstash_message[1]["log"]["file"]["path"] == sqs_queue_url_path + assert logstash_message[1]["aws"]["sqs"]["name"] == sqs_queue_name + assert logstash_message[1]["aws"]["sqs"]["message_id"] == message_id + assert logstash_message[1]["cloud"]["provider"] == "aws" + assert logstash_message[1]["cloud"]["region"] == "us-east-1" + assert logstash_message[1]["cloud"]["account"]["id"] == "000000000000" + assert logstash_message[1]["tags"] == ["forwarded", "tag1", "tag2", "tag3"] + + assert logstash_message[2]["message"] == json_dumper(third_expanded_event_with_root_fields) + assert logstash_message[2]["log"]["offset"] == 172 + assert logstash_message[2]["log"]["file"]["path"] == sqs_queue_url_path + assert logstash_message[2]["aws"]["sqs"]["name"] == sqs_queue_name + assert logstash_message[2]["aws"]["sqs"]["message_id"] == message_id + assert logstash_message[2]["cloud"]["provider"] == "aws" + assert logstash_message[2]["cloud"]["region"] == "us-east-1" + assert logstash_message[2]["cloud"]["account"]["id"] == "000000000000" + assert logstash_message[2]["tags"] == ["forwarded", "tag1", "tag2", "tag3"] + + def test_cloudwatch_logs_stream_as_input_instead_of_group(self) -> None: + assert isinstance(self.logstash, LogstashContainer) + assert isinstance(self.localstack, LocalStackContainer) + + fixtures = [ + _load_file_fixture("cloudwatch-log-1.json"), + _load_file_fixture("cloudwatch-log-2.json"), + _load_file_fixture("cloudwatch-log-3.json"), + ] + + cloudwatch_group_name = _time_based_id(suffix="source-group") + cloudwatch_group = _logs_create_cloudwatch_logs_group(self.logs_client, group_name=cloudwatch_group_name) + + cloudwatch_stream_name = _time_based_id(suffix="source-stream") + _logs_create_cloudwatch_logs_stream( + self.logs_client, group_name=cloudwatch_group_name, stream_name=cloudwatch_stream_name + ) + + cloudwatch_stream_name_different = _time_based_id(suffix="source-stream-different") + _logs_create_cloudwatch_logs_stream( + self.logs_client, group_name=cloudwatch_group_name, stream_name=cloudwatch_stream_name_different + ) + + _logs_upload_event_to_cloudwatch_logs( + self.logs_client, + group_name=cloudwatch_group_name, + stream_name=cloudwatch_stream_name, + messages_body=[fixtures[0], fixtures[2]], + ) + + _logs_upload_event_to_cloudwatch_logs( + self.logs_client, + group_name=cloudwatch_group_name, + stream_name=cloudwatch_stream_name_different, + messages_body=[fixtures[1]], + ) + + cloudwatch_group_arn = cloudwatch_group["arn"][0:-2] + + cloudwatch_group_name = cloudwatch_group_name + cloudwatch_stream_name = cloudwatch_stream_name + + config_yaml: str = f""" + inputs: + - type: "cloudwatch-logs" + id: "{cloudwatch_group_arn}:log-stream:{cloudwatch_stream_name}" + tags: {self.default_tags} + outputs: + - type: "logstash" + args: + logstash_url: "{self.logstash.get_url()}" + ssl_assert_fingerprint: {self.logstash.ssl_assert_fingerprint} + username: "{self.logstash.logstash_user}" + password: "{self.logstash.logstash_password}" + """ + + config_file_path = "config.yaml" + config_bucket_name = _time_based_id(suffix="config-bucket") + _s3_upload_content_to_bucket( + client=self.s3_client, + content=config_yaml.encode("utf-8"), + content_type="text/plain", + bucket_name=config_bucket_name, + key=config_file_path, + ) + + os.environ["S3_CONFIG_FILE"] = f"s3://{config_bucket_name}/{config_file_path}" + + events_cloudwatch_logs, event_ids_cloudwatch_logs, _ = _logs_retrieve_event_from_cloudwatch_logs( + self.logs_client, cloudwatch_group_name, cloudwatch_stream_name + ) + + events_cloudwatch_logs_different, _, _ = _logs_retrieve_event_from_cloudwatch_logs( + self.logs_client, cloudwatch_group_name, cloudwatch_stream_name_different + ) + + ctx = ContextMock(remaining_time_in_millis=_OVER_COMPLETION_GRACE_PERIOD_2m) + first_call = handler(events_cloudwatch_logs, ctx) # type:ignore + + assert first_call == "completed" + + second_call = handler(events_cloudwatch_logs_different, ctx) # type:ignore + + assert second_call == "completed" + + logstash_message = self.logstash.get_messages(expected=2) + assert len(logstash_message) == 2 + + assert logstash_message[0]["message"] == fixtures[0].rstrip("\n") + assert logstash_message[0]["log"]["offset"] == 0 + assert logstash_message[0]["log"]["file"]["path"] == f"{cloudwatch_group_name}/{cloudwatch_stream_name}" + assert logstash_message[0]["aws"]["cloudwatch"]["log_group"] == cloudwatch_group_name + assert logstash_message[0]["aws"]["cloudwatch"]["log_stream"] == cloudwatch_stream_name + assert logstash_message[0]["aws"]["cloudwatch"]["event_id"] == event_ids_cloudwatch_logs[0] + assert logstash_message[0]["cloud"]["provider"] == "aws" + assert logstash_message[0]["cloud"]["region"] == "us-east-1" + assert logstash_message[0]["cloud"]["account"]["id"] == "000000000000" + assert logstash_message[0]["tags"] == ["forwarded", "tag1", "tag2", "tag3"] + + assert logstash_message[1]["message"] == fixtures[2].rstrip("\n") + assert logstash_message[1]["log"]["offset"] == 0 + assert logstash_message[1]["log"]["file"]["path"] == f"{cloudwatch_group_name}/{cloudwatch_stream_name}" + assert logstash_message[1]["aws"]["cloudwatch"]["log_group"] == cloudwatch_group_name + assert logstash_message[1]["aws"]["cloudwatch"]["log_stream"] == cloudwatch_stream_name + assert logstash_message[1]["aws"]["cloudwatch"]["event_id"] == event_ids_cloudwatch_logs[1] + assert logstash_message[1]["cloud"]["provider"] == "aws" + assert logstash_message[1]["cloud"]["region"] == "us-east-1" + assert logstash_message[1]["cloud"]["account"]["id"] == "000000000000" + assert logstash_message[1]["tags"] == ["forwarded", "tag1", "tag2", "tag3"] + + def test_cloudwatch_logs_last_ending_offset_reset(self) -> None: + assert isinstance(self.logstash, LogstashContainer) + assert isinstance(self.localstack, LocalStackContainer) + + fixtures = [ + _load_file_fixture("cloudwatch-log-1.json"), + _load_file_fixture("cloudwatch-log-2.json"), + _load_file_fixture("cloudwatch-log-3.json"), + ] + + cloudwatch_group_name = _time_based_id(suffix="source-group") + cloudwatch_group = _logs_create_cloudwatch_logs_group(self.logs_client, group_name=cloudwatch_group_name) + + cloudwatch_stream_name = _time_based_id(suffix="source-stream") + _logs_create_cloudwatch_logs_stream( + self.logs_client, group_name=cloudwatch_group_name, stream_name=cloudwatch_stream_name + ) + + _logs_upload_event_to_cloudwatch_logs( + self.logs_client, + group_name=cloudwatch_group_name, + stream_name=cloudwatch_stream_name, + messages_body=fixtures, + ) + + cloudwatch_group_arn = cloudwatch_group["arn"] + + cloudwatch_group_name = cloudwatch_group_name + cloudwatch_stream_name = cloudwatch_stream_name + + config_yaml: str = f""" + inputs: + - type: "cloudwatch-logs" + id: "{cloudwatch_group_arn}" + tags: {self.default_tags} + outputs: + - type: "logstash" + args: + logstash_url: "{self.logstash.get_url()}" + ssl_assert_fingerprint: {self.logstash.ssl_assert_fingerprint} + username: "{self.logstash.logstash_user}" + password: "{self.logstash.logstash_password}" + """ + + config_file_path = "config.yaml" + config_bucket_name = _time_based_id(suffix="config-bucket") + _s3_upload_content_to_bucket( + client=self.s3_client, + content=config_yaml.encode("utf-8"), + content_type="text/plain", + bucket_name=config_bucket_name, + key=config_file_path, + ) + + os.environ["S3_CONFIG_FILE"] = f"s3://{config_bucket_name}/{config_file_path}" + + events_cloudwatch_logs, event_ids_cloudwatch_logs, _ = _logs_retrieve_event_from_cloudwatch_logs( + self.logs_client, cloudwatch_group_name, cloudwatch_stream_name + ) + + ctx = ContextMock() + first_call = handler(events_cloudwatch_logs, ctx) # type:ignore + + assert first_call == "continuing" + + logstash_message = self.logstash.get_messages(expected=1) + assert len(logstash_message) == 1 + + assert logstash_message[0]["message"] == fixtures[0].rstrip("\n") + assert logstash_message[0]["log"]["offset"] == 0 + assert logstash_message[0]["log"]["file"]["path"] == f"{cloudwatch_group_name}/{cloudwatch_stream_name}" + assert logstash_message[0]["aws"]["cloudwatch"]["log_group"] == cloudwatch_group_name + assert logstash_message[0]["aws"]["cloudwatch"]["log_stream"] == cloudwatch_stream_name + assert logstash_message[0]["aws"]["cloudwatch"]["event_id"] == event_ids_cloudwatch_logs[0] + assert logstash_message[0]["cloud"]["provider"] == "aws" + assert logstash_message[0]["cloud"]["region"] == "us-east-1" + assert logstash_message[0]["cloud"]["account"]["id"] == "000000000000" + assert logstash_message[0]["tags"] == ["forwarded", "tag1", "tag2", "tag3"] + + ctx = ContextMock(remaining_time_in_millis=_OVER_COMPLETION_GRACE_PERIOD_2m) + + continued_events, _ = _sqs_get_messages( + self.sqs_client, os.environ["SQS_CONTINUE_URL"], self.sqs_continue_queue_arn + ) + second_call = handler(continued_events, ctx) # type:ignore + + assert second_call == "completed" + + logstash_message = self.logstash.get_messages(expected=3) + assert len(logstash_message) == 3 + + assert logstash_message[1]["message"] == fixtures[1].rstrip("\n") + assert logstash_message[1]["log"]["offset"] == 0 + assert logstash_message[1]["log"]["file"]["path"] == f"{cloudwatch_group_name}/{cloudwatch_stream_name}" + assert logstash_message[1]["aws"]["cloudwatch"]["log_group"] == cloudwatch_group_name + assert logstash_message[1]["aws"]["cloudwatch"]["log_stream"] == cloudwatch_stream_name + assert logstash_message[1]["aws"]["cloudwatch"]["event_id"] == event_ids_cloudwatch_logs[1] + assert logstash_message[1]["cloud"]["provider"] == "aws" + assert logstash_message[1]["cloud"]["region"] == "us-east-1" + assert logstash_message[1]["cloud"]["account"]["id"] == "000000000000" + assert logstash_message[1]["tags"] == ["forwarded", "tag1", "tag2", "tag3"] + + assert logstash_message[2]["message"] == fixtures[2].rstrip("\n") + assert logstash_message[2]["log"]["offset"] == 0 + assert logstash_message[2]["log"]["file"]["path"] == f"{cloudwatch_group_name}/{cloudwatch_stream_name}" + assert logstash_message[2]["aws"]["cloudwatch"]["log_group"] == cloudwatch_group_name + assert logstash_message[2]["aws"]["cloudwatch"]["log_stream"] == cloudwatch_stream_name + assert logstash_message[2]["aws"]["cloudwatch"]["event_id"] == event_ids_cloudwatch_logs[2] + assert logstash_message[2]["cloud"]["provider"] == "aws" + assert logstash_message[2]["cloud"]["region"] == "us-east-1" + assert logstash_message[2]["cloud"]["account"]["id"] == "000000000000" + assert logstash_message[2]["tags"] == ["forwarded", "tag1", "tag2", "tag3"] + + def test_cloudwatch_logs_last_event_expanded_offset_continue(self) -> None: + assert isinstance(self.logstash, LogstashContainer) + assert isinstance(self.localstack, LocalStackContainer) + + first_expanded_event: str = _load_file_fixture("cloudwatch-log-1.json") + second_expanded_event: str = _load_file_fixture("cloudwatch-log-3.json") + third_expanded_event: str = _load_file_fixture("cloudwatch-log-3.json") + + fixtures = [f"""{{"aField": [{first_expanded_event},{second_expanded_event},{third_expanded_event}]}}"""] + + cloudwatch_group_name = _time_based_id(suffix="source-group") + cloudwatch_group = _logs_create_cloudwatch_logs_group(self.logs_client, group_name=cloudwatch_group_name) + + cloudwatch_stream_name = _time_based_id(suffix="source-stream") + _logs_create_cloudwatch_logs_stream( + self.logs_client, group_name=cloudwatch_group_name, stream_name=cloudwatch_stream_name + ) + + _logs_upload_event_to_cloudwatch_logs( + self.logs_client, + group_name=cloudwatch_group_name, + stream_name=cloudwatch_stream_name, + messages_body=fixtures, + ) + + cloudwatch_group_arn = cloudwatch_group["arn"] + + cloudwatch_group_name = cloudwatch_group_name + cloudwatch_stream_name = cloudwatch_stream_name + + config_yaml: str = f""" + inputs: + - type: "cloudwatch-logs" + id: "{cloudwatch_group_arn}" + expand_event_list_from_field: aField + tags: {self.default_tags} + outputs: + - type: "logstash" + args: + logstash_url: "{self.logstash.get_url()}" + ssl_assert_fingerprint: {self.logstash.ssl_assert_fingerprint} + username: "{self.logstash.logstash_user}" + password: "{self.logstash.logstash_password}" + """ + + config_file_path = "config.yaml" + config_bucket_name = _time_based_id(suffix="config-bucket") + _s3_upload_content_to_bucket( + client=self.s3_client, + content=config_yaml.encode("utf-8"), + content_type="text/plain", + bucket_name=config_bucket_name, + key=config_file_path, + ) + + os.environ["S3_CONFIG_FILE"] = f"s3://{config_bucket_name}/{config_file_path}" + + events_cloudwatch_logs, event_ids_cloudwatch_logs, _ = _logs_retrieve_event_from_cloudwatch_logs( + self.logs_client, cloudwatch_group_name, cloudwatch_stream_name + ) + + ctx = ContextMock() + first_call = handler(events_cloudwatch_logs, ctx) # type:ignore + + assert first_call == "continuing" + + logstash_message = self.logstash.get_messages(expected=1) + assert len(logstash_message) == 1 + + assert logstash_message[0]["message"] == json_dumper(json_parser(first_expanded_event)) + assert logstash_message[0]["log"]["offset"] == 0 + assert logstash_message[0]["log"]["file"]["path"] == f"{cloudwatch_group_name}/{cloudwatch_stream_name}" + assert logstash_message[0]["aws"]["cloudwatch"]["log_group"] == cloudwatch_group_name + assert logstash_message[0]["aws"]["cloudwatch"]["log_stream"] == cloudwatch_stream_name + assert logstash_message[0]["aws"]["cloudwatch"]["event_id"] == event_ids_cloudwatch_logs[0] + assert logstash_message[0]["cloud"]["provider"] == "aws" + assert logstash_message[0]["cloud"]["region"] == "us-east-1" + assert logstash_message[0]["cloud"]["account"]["id"] == "000000000000" + assert logstash_message[0]["tags"] == ["forwarded", "tag1", "tag2", "tag3"] + + ctx = ContextMock(remaining_time_in_millis=_OVER_COMPLETION_GRACE_PERIOD_2m) + + continued_events, _ = _sqs_get_messages( + self.sqs_client, os.environ["SQS_CONTINUE_URL"], self.sqs_continue_queue_arn + ) + second_call = handler(continued_events, ctx) # type:ignore + + assert second_call == "completed" + + logstash_message = self.logstash.get_messages(expected=3) + assert len(logstash_message) == 3 + + assert logstash_message[1]["message"] == json_dumper(json_parser(second_expanded_event)) + assert logstash_message[1]["log"]["offset"] == 86 + assert logstash_message[1]["log"]["file"]["path"] == f"{cloudwatch_group_name}/{cloudwatch_stream_name}" + assert logstash_message[1]["aws"]["cloudwatch"]["log_group"] == cloudwatch_group_name + assert logstash_message[1]["aws"]["cloudwatch"]["log_stream"] == cloudwatch_stream_name + assert logstash_message[1]["aws"]["cloudwatch"]["event_id"] == event_ids_cloudwatch_logs[0] + assert logstash_message[1]["cloud"]["provider"] == "aws" + assert logstash_message[1]["cloud"]["region"] == "us-east-1" + assert logstash_message[1]["cloud"]["account"]["id"] == "000000000000" + assert logstash_message[1]["tags"] == ["forwarded", "tag1", "tag2", "tag3"] + + assert logstash_message[2]["message"] == json_dumper(json_parser(third_expanded_event)) + assert logstash_message[2]["log"]["offset"] == 172 + assert logstash_message[2]["log"]["file"]["path"] == f"{cloudwatch_group_name}/{cloudwatch_stream_name}" + assert logstash_message[2]["aws"]["cloudwatch"]["log_group"] == cloudwatch_group_name + assert logstash_message[2]["aws"]["cloudwatch"]["log_stream"] == cloudwatch_stream_name + assert logstash_message[2]["aws"]["cloudwatch"]["event_id"] == event_ids_cloudwatch_logs[0] + assert logstash_message[2]["cloud"]["provider"] == "aws" + assert logstash_message[2]["cloud"]["region"] == "us-east-1" + assert logstash_message[2]["cloud"]["account"]["id"] == "000000000000" + assert logstash_message[2]["tags"] == ["forwarded", "tag1", "tag2", "tag3"] + + def test_kinesis_data_stream_last_ending_offset_reset(self) -> None: + assert isinstance(self.logstash, LogstashContainer) + assert isinstance(self.localstack, LocalStackContainer) + + fixtures = [ + _load_file_fixture("cloudwatch-log-1.json"), + _load_file_fixture("cloudwatch-log-2.json"), + _load_file_fixture("cloudwatch-log-3.json"), + ] + + kinesis_stream_name = _time_based_id(suffix="source-kinesis") + kinesis_stream = _kinesis_create_stream(self.kinesis_client, kinesis_stream_name) + kinesis_stream_arn = kinesis_stream["StreamDescription"]["StreamARN"] + + _kinesis_put_records(self.kinesis_client, kinesis_stream_name, fixtures) + + config_yaml: str = f""" + inputs: + - type: "kinesis-data-stream" + id: "{kinesis_stream_arn}" + tags: {self.default_tags} + outputs: + - type: "logstash" + args: + logstash_url: "{self.logstash.get_url()}" + ssl_assert_fingerprint: {self.logstash.ssl_assert_fingerprint} + username: "{self.logstash.logstash_user}" + password: "{self.logstash.logstash_password}" + """ + + config_file_path = "config.yaml" + config_bucket_name = _time_based_id(suffix="config-bucket") + _s3_upload_content_to_bucket( + client=self.s3_client, + content=config_yaml.encode("utf-8"), + content_type="text/plain", + bucket_name=config_bucket_name, + key=config_file_path, + ) + + os.environ["S3_CONFIG_FILE"] = f"s3://{config_bucket_name}/{config_file_path}" + + events_kinesis, _ = _kinesis_retrieve_event_from_kinesis_stream( + self.kinesis_client, kinesis_stream_name, kinesis_stream_arn + ) + + ctx = ContextMock() + first_call = handler(events_kinesis, ctx) # type:ignore + + assert first_call == "continuing" + + logstash_message = self.logstash.get_messages(expected=1) + assert len(logstash_message) == 1 + + assert logstash_message[0]["message"] == fixtures[0].rstrip("\n") + assert logstash_message[0]["log"]["offset"] == 0 + assert logstash_message[0]["log"]["file"]["path"] == kinesis_stream_arn + assert logstash_message[0]["aws"]["kinesis"]["type"] == "stream" + assert logstash_message[0]["aws"]["kinesis"]["partition_key"] == "PartitionKey" + assert logstash_message[0]["aws"]["kinesis"]["name"] == kinesis_stream_name + assert ( + logstash_message[0]["aws"]["kinesis"]["sequence_number"] + == events_kinesis["Records"][0]["kinesis"]["sequenceNumber"] + ) + assert logstash_message[0]["cloud"]["provider"] == "aws" + assert logstash_message[0]["cloud"]["region"] == "us-east-1" + assert logstash_message[0]["cloud"]["account"]["id"] == "000000000000" + assert logstash_message[0]["tags"] == ["forwarded", "tag1", "tag2", "tag3"] + + ctx = ContextMock(remaining_time_in_millis=_OVER_COMPLETION_GRACE_PERIOD_2m) + + continued_events, _ = _sqs_get_messages( + self.sqs_client, os.environ["SQS_CONTINUE_URL"], self.sqs_continue_queue_arn + ) + second_call = handler(continued_events, ctx) # type:ignore + + assert second_call == "completed" + + logstash_message = self.logstash.get_messages(expected=3) + assert len(logstash_message) == 3 + + assert logstash_message[1]["message"] == fixtures[1].rstrip("\n") + assert logstash_message[1]["log"]["offset"] == 0 + assert logstash_message[1]["log"]["file"]["path"] == kinesis_stream_arn + assert logstash_message[1]["aws"]["kinesis"]["type"] == "stream" + assert logstash_message[1]["aws"]["kinesis"]["partition_key"] == "PartitionKey" + assert logstash_message[1]["aws"]["kinesis"]["name"] == kinesis_stream_name + assert ( + logstash_message[1]["aws"]["kinesis"]["sequence_number"] + == events_kinesis["Records"][1]["kinesis"]["sequenceNumber"] + ) + assert logstash_message[1]["cloud"]["provider"] == "aws" + assert logstash_message[1]["cloud"]["region"] == "us-east-1" + assert logstash_message[1]["cloud"]["account"]["id"] == "000000000000" + assert logstash_message[1]["tags"] == ["forwarded", "tag1", "tag2", "tag3"] + + assert logstash_message[2]["message"] == fixtures[2].rstrip("\n") + assert logstash_message[2]["log"]["offset"] == 0 + assert logstash_message[2]["log"]["file"]["path"] == kinesis_stream_arn + assert logstash_message[2]["aws"]["kinesis"]["type"] == "stream" + assert logstash_message[2]["aws"]["kinesis"]["partition_key"] == "PartitionKey" + assert logstash_message[2]["aws"]["kinesis"]["name"] == kinesis_stream_name + assert ( + logstash_message[2]["aws"]["kinesis"]["sequence_number"] + == events_kinesis["Records"][2]["kinesis"]["sequenceNumber"] + ) + assert logstash_message[2]["cloud"]["provider"] == "aws" + assert logstash_message[2]["cloud"]["region"] == "us-east-1" + assert logstash_message[2]["cloud"]["account"]["id"] == "000000000000" + assert logstash_message[2]["tags"] == ["forwarded", "tag1", "tag2", "tag3"] + + def test_kinesis_data_stream_last_event_expanded_offset_continue(self) -> None: + assert isinstance(self.logstash, LogstashContainer) + assert isinstance(self.localstack, LocalStackContainer) + + first_expanded_event: str = _load_file_fixture("cloudwatch-log-1.json") + second_expanded_event: str = _load_file_fixture("cloudwatch-log-3.json") + third_expanded_event: str = _load_file_fixture("cloudwatch-log-3.json") + + fixtures = [f"""{{"aField": [{first_expanded_event},{second_expanded_event},{third_expanded_event}]}}"""] + + kinesis_stream_name = _time_based_id(suffix="source-kinesis") + kinesis_stream = _kinesis_create_stream(self.kinesis_client, kinesis_stream_name) + kinesis_stream_arn = kinesis_stream["StreamDescription"]["StreamARN"] + + _kinesis_put_records(self.kinesis_client, kinesis_stream_name, fixtures) + + config_yaml: str = f""" + inputs: + - type: "kinesis-data-stream" + id: "{kinesis_stream_arn}" + expand_event_list_from_field: aField + tags: {self.default_tags} + outputs: + - type: "logstash" + args: + logstash_url: "{self.logstash.get_url()}" + ssl_assert_fingerprint: {self.logstash.ssl_assert_fingerprint} + username: "{self.logstash.logstash_user}" + password: "{self.logstash.logstash_password}" + """ + + config_file_path = "config.yaml" + config_bucket_name = _time_based_id(suffix="config-bucket") + _s3_upload_content_to_bucket( + client=self.s3_client, + content=config_yaml.encode("utf-8"), + content_type="text/plain", + bucket_name=config_bucket_name, + key=config_file_path, + ) + + os.environ["S3_CONFIG_FILE"] = f"s3://{config_bucket_name}/{config_file_path}" + + events_kinesis, _ = _kinesis_retrieve_event_from_kinesis_stream( + self.kinesis_client, kinesis_stream_name, kinesis_stream_arn + ) + + ctx = ContextMock() + first_call = handler(events_kinesis, ctx) # type:ignore + + assert first_call == "continuing" + + logstash_message = self.logstash.get_messages(expected=1) + assert len(logstash_message) == 1 + + assert logstash_message[0]["message"] == json_dumper(json_parser(first_expanded_event)) + assert logstash_message[0]["log"]["offset"] == 0 + assert logstash_message[0]["log"]["file"]["path"] == kinesis_stream_arn + assert logstash_message[0]["aws"]["kinesis"]["type"] == "stream" + assert logstash_message[0]["aws"]["kinesis"]["partition_key"] == "PartitionKey" + assert logstash_message[0]["aws"]["kinesis"]["name"] == kinesis_stream_name + assert ( + logstash_message[0]["aws"]["kinesis"]["sequence_number"] + == events_kinesis["Records"][0]["kinesis"]["sequenceNumber"] + ) + assert logstash_message[0]["cloud"]["provider"] == "aws" + assert logstash_message[0]["cloud"]["region"] == "us-east-1" + assert logstash_message[0]["cloud"]["account"]["id"] == "000000000000" + assert logstash_message[0]["tags"] == ["forwarded", "tag1", "tag2", "tag3"] + + ctx = ContextMock(remaining_time_in_millis=_OVER_COMPLETION_GRACE_PERIOD_2m) + + continued_events, _ = _sqs_get_messages( + self.sqs_client, os.environ["SQS_CONTINUE_URL"], self.sqs_continue_queue_arn + ) + second_call = handler(continued_events, ctx) # type:ignore + + assert second_call == "completed" + + logstash_message = self.logstash.get_messages(expected=3) + assert len(logstash_message) == 3 + + assert logstash_message[1]["message"] == json_dumper(json_parser(second_expanded_event)) + assert logstash_message[1]["log"]["offset"] == 86 + assert logstash_message[1]["log"]["file"]["path"] == kinesis_stream_arn + assert logstash_message[1]["aws"]["kinesis"]["type"] == "stream" + assert logstash_message[1]["aws"]["kinesis"]["partition_key"] == "PartitionKey" + assert logstash_message[1]["aws"]["kinesis"]["name"] == kinesis_stream_name + assert ( + logstash_message[1]["aws"]["kinesis"]["sequence_number"] + == events_kinesis["Records"][0]["kinesis"]["sequenceNumber"] + ) + assert logstash_message[1]["cloud"]["provider"] == "aws" + assert logstash_message[1]["cloud"]["region"] == "us-east-1" + assert logstash_message[1]["cloud"]["account"]["id"] == "000000000000" + assert logstash_message[1]["tags"] == ["forwarded", "tag1", "tag2", "tag3"] + + assert logstash_message[2]["message"] == json_dumper(json_parser(third_expanded_event)) + assert logstash_message[2]["log"]["offset"] == 172 + assert logstash_message[2]["log"]["file"]["path"] == kinesis_stream_arn + assert logstash_message[2]["aws"]["kinesis"]["type"] == "stream" + assert logstash_message[2]["aws"]["kinesis"]["partition_key"] == "PartitionKey" + assert logstash_message[2]["aws"]["kinesis"]["name"] == kinesis_stream_name + assert ( + logstash_message[2]["aws"]["kinesis"]["sequence_number"] + == events_kinesis["Records"][0]["kinesis"]["sequenceNumber"] + ) + assert logstash_message[2]["cloud"]["provider"] == "aws" + assert logstash_message[2]["cloud"]["region"] == "us-east-1" + assert logstash_message[2]["cloud"]["account"]["id"] == "000000000000" + assert logstash_message[2]["tags"] == ["forwarded", "tag1", "tag2", "tag3"] + + def test_sqs_last_ending_offset_reset(self) -> None: + assert isinstance(self.logstash, LogstashContainer) + assert isinstance(self.localstack, LocalStackContainer) + + fixtures = [ + _load_file_fixture("cloudwatch-log-1.json"), + _load_file_fixture("cloudwatch-log-2.json"), + _load_file_fixture("cloudwatch-log-3.json"), + ] + + sqs_queue_name = _time_based_id(suffix="source-sqs") + + sqs_queue = _sqs_create_queue(self.sqs_client, sqs_queue_name, self.localstack.get_url()) + + sqs_queue_arn = sqs_queue["QueueArn"] + sqs_queue_url = sqs_queue["QueueUrl"] + sqs_queue_url_path = sqs_queue["QueueUrlPath"] + + _sqs_send_messages(self.sqs_client, sqs_queue_url, "".join(fixtures)) + + config_yaml: str = f""" + inputs: + - type: "sqs" + id: "{sqs_queue_arn}" + tags: {self.default_tags} + outputs: + - type: "logstash" + args: + logstash_url: "{self.logstash.get_url()}" + ssl_assert_fingerprint: {self.logstash.ssl_assert_fingerprint} + username: "{self.logstash.logstash_user}" + password: "{self.logstash.logstash_password}" + """ + + config_file_path = "config.yaml" + config_bucket_name = _time_based_id(suffix="config-bucket") + _s3_upload_content_to_bucket( + client=self.s3_client, + content=config_yaml.encode("utf-8"), + content_type="text/plain", + bucket_name=config_bucket_name, + key=config_file_path, + ) + + os.environ["S3_CONFIG_FILE"] = f"s3://{config_bucket_name}/{config_file_path}" + + events_sqs, _ = _sqs_get_messages(self.sqs_client, sqs_queue_url, sqs_queue_arn) + + message_id = events_sqs["Records"][0]["messageId"] + + ctx = ContextMock() + first_call = handler(events_sqs, ctx) # type:ignore + + assert first_call == "continuing" + + logstash_message = self.logstash.get_messages(expected=1) + assert len(logstash_message) == 1 + + assert logstash_message[0]["message"] == fixtures[0].rstrip("\n") + assert logstash_message[0]["log"]["offset"] == 0 + assert logstash_message[0]["log"]["file"]["path"] == sqs_queue_url_path + assert logstash_message[0]["aws"]["sqs"]["name"] == sqs_queue_name + assert logstash_message[0]["aws"]["sqs"]["message_id"] == message_id + assert logstash_message[0]["cloud"]["provider"] == "aws" + assert logstash_message[0]["cloud"]["region"] == "us-east-1" + assert logstash_message[0]["cloud"]["account"]["id"] == "000000000000" + assert logstash_message[0]["tags"] == ["forwarded", "tag1", "tag2", "tag3"] + + ctx = ContextMock(remaining_time_in_millis=_OVER_COMPLETION_GRACE_PERIOD_2m) + + continued_events, _ = _sqs_get_messages( + self.sqs_client, os.environ["SQS_CONTINUE_URL"], self.sqs_continue_queue_arn + ) + second_call = handler(continued_events, ctx) # type:ignore + + assert second_call == "completed" + + logstash_message = self.logstash.get_messages(expected=3) + assert len(logstash_message) == 3 + + assert logstash_message[1]["message"] == fixtures[1].rstrip("\n") + assert logstash_message[1]["log"]["offset"] == 94 + assert logstash_message[1]["log"]["file"]["path"] == sqs_queue_url_path + assert logstash_message[1]["aws"]["sqs"]["name"] == sqs_queue_name + assert logstash_message[1]["aws"]["sqs"]["message_id"] == message_id + assert logstash_message[1]["cloud"]["provider"] == "aws" + assert logstash_message[1]["cloud"]["region"] == "us-east-1" + assert logstash_message[1]["cloud"]["account"]["id"] == "000000000000" + assert logstash_message[1]["tags"] == ["forwarded", "tag1", "tag2", "tag3"] + + assert logstash_message[2]["message"] == fixtures[2].rstrip("\n") + assert logstash_message[2]["log"]["offset"] == 332 + assert logstash_message[2]["log"]["file"]["path"] == sqs_queue_url_path + assert logstash_message[2]["aws"]["sqs"]["name"] == sqs_queue_name + assert logstash_message[2]["aws"]["sqs"]["message_id"] == message_id + assert logstash_message[2]["cloud"]["provider"] == "aws" + assert logstash_message[2]["cloud"]["region"] == "us-east-1" + assert logstash_message[2]["cloud"]["account"]["id"] == "000000000000" + assert logstash_message[2]["tags"] == ["forwarded", "tag1", "tag2", "tag3"] + + def test_sqs_last_event_expanded_offset_continue(self) -> None: + assert isinstance(self.logstash, LogstashContainer) + assert isinstance(self.localstack, LocalStackContainer) + + first_expanded_event: str = _load_file_fixture("cloudwatch-log-1.json") + second_expanded_event: str = _load_file_fixture("cloudwatch-log-3.json") + third_expanded_event: str = _load_file_fixture("cloudwatch-log-3.json") + + fixtures = [f"""{{"aField": [{first_expanded_event},{second_expanded_event},{third_expanded_event}]}}"""] + + sqs_queue_name = _time_based_id(suffix="source-sqs") + + sqs_queue = _sqs_create_queue(self.sqs_client, sqs_queue_name, self.localstack.get_url()) + + sqs_queue_arn = sqs_queue["QueueArn"] + sqs_queue_url = sqs_queue["QueueUrl"] + sqs_queue_url_path = sqs_queue["QueueUrlPath"] + + _sqs_send_messages(self.sqs_client, sqs_queue_url, "".join(fixtures)) + + config_yaml: str = f""" + inputs: + - type: "sqs" + id: "{sqs_queue_arn}" + expand_event_list_from_field: aField + tags: {self.default_tags} + outputs: + - type: "logstash" + args: + logstash_url: "{self.logstash.get_url()}" + ssl_assert_fingerprint: {self.logstash.ssl_assert_fingerprint} + username: "{self.logstash.logstash_user}" + password: "{self.logstash.logstash_password}" + """ + + config_file_path = "config.yaml" + config_bucket_name = _time_based_id(suffix="config-bucket") + _s3_upload_content_to_bucket( + client=self.s3_client, + content=config_yaml.encode("utf-8"), + content_type="text/plain", + bucket_name=config_bucket_name, + key=config_file_path, + ) + + os.environ["S3_CONFIG_FILE"] = f"s3://{config_bucket_name}/{config_file_path}" + + events_sqs, _ = _sqs_get_messages(self.sqs_client, sqs_queue_url, sqs_queue_arn) + + message_id = events_sqs["Records"][0]["messageId"] + + ctx = ContextMock() + first_call = handler(events_sqs, ctx) # type:ignore + + assert first_call == "continuing" + + logstash_message = self.logstash.get_messages(expected=1) + assert len(logstash_message) == 1 + + assert logstash_message[0]["message"] == json_dumper(json_parser(first_expanded_event)) + assert logstash_message[0]["log"]["offset"] == 0 + assert logstash_message[0]["log"]["file"]["path"] == sqs_queue_url_path + assert logstash_message[0]["aws"]["sqs"]["name"] == sqs_queue_name + assert logstash_message[0]["aws"]["sqs"]["message_id"] == message_id + assert logstash_message[0]["cloud"]["provider"] == "aws" + assert logstash_message[0]["cloud"]["region"] == "us-east-1" + assert logstash_message[0]["cloud"]["account"]["id"] == "000000000000" + assert logstash_message[0]["tags"] == ["forwarded", "tag1", "tag2", "tag3"] + + ctx = ContextMock(remaining_time_in_millis=_OVER_COMPLETION_GRACE_PERIOD_2m) + + continued_events, _ = _sqs_get_messages( + self.sqs_client, os.environ["SQS_CONTINUE_URL"], self.sqs_continue_queue_arn + ) + second_call = handler(continued_events, ctx) # type:ignore + + assert second_call == "completed" + + logstash_message = self.logstash.get_messages(expected=3) + assert len(logstash_message) == 3 + + assert logstash_message[1]["message"] == json_dumper(json_parser(second_expanded_event)) + assert logstash_message[1]["log"]["offset"] == 86 + assert logstash_message[1]["log"]["file"]["path"] == sqs_queue_url_path + assert logstash_message[1]["aws"]["sqs"]["name"] == sqs_queue_name + assert logstash_message[1]["aws"]["sqs"]["message_id"] == message_id + assert logstash_message[1]["cloud"]["provider"] == "aws" + assert logstash_message[1]["cloud"]["region"] == "us-east-1" + assert logstash_message[1]["cloud"]["account"]["id"] == "000000000000" + assert logstash_message[1]["tags"] == ["forwarded", "tag1", "tag2", "tag3"] + + assert logstash_message[2]["message"] == json_dumper(json_parser(third_expanded_event)) + assert logstash_message[2]["log"]["offset"] == 172 + assert logstash_message[2]["log"]["file"]["path"] == sqs_queue_url_path + assert logstash_message[2]["aws"]["sqs"]["name"] == sqs_queue_name + assert logstash_message[2]["aws"]["sqs"]["message_id"] == message_id + assert logstash_message[2]["cloud"]["provider"] == "aws" + assert logstash_message[2]["cloud"]["region"] == "us-east-1" + assert logstash_message[2]["cloud"]["account"]["id"] == "000000000000" + assert logstash_message[2]["tags"] == ["forwarded", "tag1", "tag2", "tag3"] + + def test_s3_sqs_last_ending_offset_reset(self) -> None: + assert isinstance(self.logstash, LogstashContainer) + assert isinstance(self.localstack, LocalStackContainer) + + fixtures = [ + _load_file_fixture("cloudwatch-log-1.json"), + _load_file_fixture("cloudwatch-log-2.json"), + _load_file_fixture("cloudwatch-log-3.json"), + ] + + s3_bucket_name = _time_based_id(suffix="test-bucket") + first_filename = "exportedlog/uuid/yyyy-mm-dd-[$LATEST]hash/000000.gz" + _s3_upload_content_to_bucket( + client=self.s3_client, + content=gzip.compress("".join(fixtures).encode("utf-8")), + content_type="application/x-gzip", + bucket_name=s3_bucket_name, + key=first_filename, + ) + + s3_sqs_queue_name = _time_based_id(suffix="source-s3-sqs") + + s3_sqs_queue = _sqs_create_queue(self.sqs_client, s3_sqs_queue_name, self.localstack.get_url()) + + s3_sqs_queue_arn = s3_sqs_queue["QueueArn"] + s3_sqs_queue_url = s3_sqs_queue["QueueUrl"] + + _sqs_send_s3_notifications(self.sqs_client, s3_sqs_queue_url, s3_bucket_name, [first_filename]) + + config_yaml: str = f""" + inputs: + - type: "s3-sqs" + id: "{s3_sqs_queue_arn}" + tags: {self.default_tags} + outputs: + - type: "logstash" + args: + logstash_url: "{self.logstash.get_url()}" + ssl_assert_fingerprint: {self.logstash.ssl_assert_fingerprint} + username: "{self.logstash.logstash_user}" + password: "{self.logstash.logstash_password}" + """ + + config_file_path = "config.yaml" + config_bucket_name = _time_based_id(suffix="config-bucket") + _s3_upload_content_to_bucket( + client=self.s3_client, + content=config_yaml.encode("utf-8"), + content_type="text/plain", + bucket_name=config_bucket_name, + key=config_file_path, + ) + + os.environ["S3_CONFIG_FILE"] = f"s3://{config_bucket_name}/{config_file_path}" + + events_s3, _ = _sqs_get_messages(self.sqs_client, s3_sqs_queue_url, s3_sqs_queue_arn) + + ctx = ContextMock() + first_call = handler(events_s3, ctx) # type:ignore + + assert first_call == "continuing" + + logstash_message = self.logstash.get_messages(expected=1) + assert len(logstash_message) == 1 + + assert logstash_message[0]["message"] == fixtures[0].rstrip("\n") + assert logstash_message[0]["log"]["offset"] == 0 + assert ( + logstash_message[0]["log"]["file"]["path"] + == f"https://{s3_bucket_name}.s3.eu-central-1.amazonaws.com/{first_filename}" + ) + assert logstash_message[0]["aws"]["s3"]["bucket"]["name"] == s3_bucket_name + assert logstash_message[0]["aws"]["s3"]["bucket"]["arn"] == f"arn:aws:s3:::{s3_bucket_name}" + assert logstash_message[0]["aws"]["s3"]["object"]["key"] == first_filename + assert logstash_message[0]["cloud"]["provider"] == "aws" + assert logstash_message[0]["cloud"]["region"] == "eu-central-1" + assert logstash_message[0]["cloud"]["account"]["id"] == "000000000000" + assert logstash_message[0]["tags"] == ["forwarded", "tag1", "tag2", "tag3"] + + ctx = ContextMock(remaining_time_in_millis=_OVER_COMPLETION_GRACE_PERIOD_2m) + + continued_events, _ = _sqs_get_messages( + self.sqs_client, os.environ["SQS_CONTINUE_URL"], self.sqs_continue_queue_arn + ) + second_call = handler(continued_events, ctx) # type:ignore + + assert second_call == "completed" + + logstash_message = self.logstash.get_messages(expected=3) + assert len(logstash_message) == 3 + + assert logstash_message[1]["message"] == fixtures[1].rstrip("\n") + assert logstash_message[1]["log"]["offset"] == 94 + assert ( + logstash_message[1]["log"]["file"]["path"] + == f"https://{s3_bucket_name}.s3.eu-central-1.amazonaws.com/{first_filename}" + ) + assert logstash_message[1]["aws"]["s3"]["bucket"]["name"] == s3_bucket_name + assert logstash_message[1]["aws"]["s3"]["bucket"]["arn"] == f"arn:aws:s3:::{s3_bucket_name}" + assert logstash_message[1]["aws"]["s3"]["object"]["key"] == first_filename + assert logstash_message[1]["cloud"]["provider"] == "aws" + assert logstash_message[1]["cloud"]["region"] == "eu-central-1" + assert logstash_message[1]["cloud"]["account"]["id"] == "000000000000" + assert logstash_message[1]["tags"] == ["forwarded", "tag1", "tag2", "tag3"] + + assert logstash_message[2]["message"] == fixtures[2].rstrip("\n") + assert logstash_message[2]["log"]["offset"] == 332 + assert ( + logstash_message[2]["log"]["file"]["path"] + == f"https://{s3_bucket_name}.s3.eu-central-1.amazonaws.com/{first_filename}" + ) + assert logstash_message[2]["aws"]["s3"]["bucket"]["name"] == s3_bucket_name + assert logstash_message[2]["aws"]["s3"]["bucket"]["arn"] == f"arn:aws:s3:::{s3_bucket_name}" + assert logstash_message[2]["aws"]["s3"]["object"]["key"] == first_filename + assert logstash_message[2]["cloud"]["provider"] == "aws" + assert logstash_message[2]["cloud"]["region"] == "eu-central-1" + assert logstash_message[2]["cloud"]["account"]["id"] == "000000000000" + assert logstash_message[2]["tags"] == ["forwarded", "tag1", "tag2", "tag3"] + + def test_s3_sqs_last_event_expanded_offset_continue(self) -> None: + assert isinstance(self.logstash, LogstashContainer) + assert isinstance(self.localstack, LocalStackContainer) + + first_expanded_event: str = _load_file_fixture("cloudwatch-log-1.json") + second_expanded_event: str = _load_file_fixture("cloudwatch-log-3.json") + third_expanded_event: str = _load_file_fixture("cloudwatch-log-3.json") + + fixtures = [ + f"""{{"aField": [{first_expanded_event},{second_expanded_event}]}}""", + f"""{{"aField": [{third_expanded_event}]}}""", + ] + + s3_bucket_name = _time_based_id(suffix="test-bucket") + first_filename = "exportedlog/uuid/yyyy-mm-dd-[$LATEST]hash/000000.gz" + _s3_upload_content_to_bucket( + client=self.s3_client, + content=gzip.compress(fixtures[0].encode("utf-8")), + content_type="application/x-gzip", + bucket_name=s3_bucket_name, + key=first_filename, + ) + + second_filename = "exportedlog/uuid/yyyy-mm-dd-[$LATEST]hash/000001.gz" + _s3_upload_content_to_bucket( + client=self.s3_client, + content=gzip.compress(fixtures[1].encode("utf-8")), + content_type="application/x-gzip", + bucket_name=s3_bucket_name, + key=second_filename, + ) + + s3_sqs_queue_name = _time_based_id(suffix="source-s3-sqs") + + s3_sqs_queue = _sqs_create_queue(self.sqs_client, s3_sqs_queue_name, self.localstack.get_url()) + + s3_sqs_queue_arn = s3_sqs_queue["QueueArn"] + s3_sqs_queue_url = s3_sqs_queue["QueueUrl"] + + _sqs_send_s3_notifications(self.sqs_client, s3_sqs_queue_url, s3_bucket_name, [first_filename]) + _sqs_send_s3_notifications(self.sqs_client, s3_sqs_queue_url, s3_bucket_name, [second_filename]) + + config_yaml: str = f""" + inputs: + - type: "s3-sqs" + id: "{s3_sqs_queue_arn}" + expand_event_list_from_field: aField + tags: {self.default_tags} + outputs: + - type: "logstash" + args: + logstash_url: "{self.logstash.get_url()}" + ssl_assert_fingerprint: {self.logstash.ssl_assert_fingerprint} + username: "{self.logstash.logstash_user}" + password: "{self.logstash.logstash_password}" + """ + + config_file_path = "config.yaml" + config_bucket_name = _time_based_id(suffix="config-bucket") + _s3_upload_content_to_bucket( + client=self.s3_client, + content=config_yaml.encode("utf-8"), + content_type="text/plain", + bucket_name=config_bucket_name, + key=config_file_path, + ) + + os.environ["S3_CONFIG_FILE"] = f"s3://{config_bucket_name}/{config_file_path}" + + events_s3, _ = _sqs_get_messages(self.sqs_client, s3_sqs_queue_url, s3_sqs_queue_arn) + + ctx = ContextMock() + first_call = handler(events_s3, ctx) # type:ignore + + assert first_call == "continuing" + + logstash_message = self.logstash.get_messages(expected=1) + assert len(logstash_message) == 1 + + assert logstash_message[0]["message"] == json_dumper(json_parser(first_expanded_event)) + assert logstash_message[0]["log"]["offset"] == 0 + assert ( + logstash_message[0]["log"]["file"]["path"] + == f"https://{s3_bucket_name}.s3.eu-central-1.amazonaws.com/{first_filename}" + ) + assert logstash_message[0]["aws"]["s3"]["bucket"]["name"] == s3_bucket_name + assert logstash_message[0]["aws"]["s3"]["bucket"]["arn"] == f"arn:aws:s3:::{s3_bucket_name}" + assert logstash_message[0]["aws"]["s3"]["object"]["key"] == first_filename + assert logstash_message[0]["cloud"]["provider"] == "aws" + assert logstash_message[0]["cloud"]["region"] == "eu-central-1" + assert logstash_message[0]["cloud"]["account"]["id"] == "000000000000" + assert logstash_message[0]["tags"] == ["forwarded", "tag1", "tag2", "tag3"] + + continued_events, _ = _sqs_get_messages( + self.sqs_client, os.environ["SQS_CONTINUE_URL"], self.sqs_continue_queue_arn + ) + second_call = handler(continued_events, ctx) # type:ignore + + assert second_call == "continuing" + + logstash_message = self.logstash.get_messages(expected=2) + assert len(logstash_message) == 2 + + assert logstash_message[1]["message"] == json_dumper(json_parser(second_expanded_event)) + assert logstash_message[1]["log"]["offset"] == 91 + assert ( + logstash_message[1]["log"]["file"]["path"] + == f"https://{s3_bucket_name}.s3.eu-central-1.amazonaws.com/{first_filename}" + ) + assert logstash_message[1]["aws"]["s3"]["bucket"]["name"] == s3_bucket_name + assert logstash_message[1]["aws"]["s3"]["bucket"]["arn"] == f"arn:aws:s3:::{s3_bucket_name}" + assert logstash_message[1]["aws"]["s3"]["object"]["key"] == first_filename + assert logstash_message[1]["cloud"]["provider"] == "aws" + assert logstash_message[1]["cloud"]["region"] == "eu-central-1" + assert logstash_message[1]["cloud"]["account"]["id"] == "000000000000" + assert logstash_message[1]["tags"] == ["forwarded", "tag1", "tag2", "tag3"] + + ctx = ContextMock(remaining_time_in_millis=_OVER_COMPLETION_GRACE_PERIOD_2m) + + continued_events, _ = _sqs_get_messages( + self.sqs_client, os.environ["SQS_CONTINUE_URL"], self.sqs_continue_queue_arn + ) + third_call = handler(continued_events, ctx) # type:ignore + + assert third_call == "completed" + + logstash_message = self.logstash.get_messages(expected=3) + assert len(logstash_message) == 3 + assert logstash_message[2]["message"] == json_dumper(json_parser(third_expanded_event)) + assert logstash_message[2]["log"]["offset"] == 0 + assert ( + logstash_message[2]["log"]["file"]["path"] + == f"https://{s3_bucket_name}.s3.eu-central-1.amazonaws.com/{second_filename}" + ) + assert logstash_message[2]["aws"]["s3"]["bucket"]["name"] == s3_bucket_name + assert logstash_message[2]["aws"]["s3"]["bucket"]["arn"] == f"arn:aws:s3:::{s3_bucket_name}" + assert logstash_message[2]["aws"]["s3"]["object"]["key"] == second_filename + assert logstash_message[2]["cloud"]["provider"] == "aws" + assert logstash_message[2]["cloud"]["region"] == "eu-central-1" + assert logstash_message[2]["cloud"]["account"]["id"] == "000000000000" + assert logstash_message[2]["tags"] == ["forwarded", "tag1", "tag2", "tag3"] + + def test_cloud_trail_race(self) -> None: + assert isinstance(self.elasticsearch, ElasticsearchContainer) + assert isinstance(self.logstash, LogstashContainer) + assert isinstance(self.localstack, LocalStackContainer) + + s3_sqs_queue_name = _time_based_id(suffix="source-s3-sqs") + + s3_sqs_queue = _sqs_create_queue(self.sqs_client, s3_sqs_queue_name, self.localstack.get_url()) + + s3_sqs_queue_arn = s3_sqs_queue["QueueArn"] + s3_sqs_queue_url = s3_sqs_queue["QueueUrl"] + + config_yaml: str = f""" + inputs: + - type: s3-sqs + id: "{s3_sqs_queue_arn}" + tags: {self.default_tags} + outputs: {self.default_outputs} + """ + + config_file_path = "config.yaml" + config_bucket_name = _time_based_id(suffix="config-bucket") + _s3_upload_content_to_bucket( + client=self.s3_client, + content=config_yaml.encode("utf-8"), + content_type="text/plain", + bucket_name=config_bucket_name, + key=config_file_path, + ) + + os.environ["S3_CONFIG_FILE"] = f"s3://{config_bucket_name}/{config_file_path}" + fixtures = [ + _load_file_fixture("cloudwatch-log-1.json"), + '{"Records": [' + _load_file_fixture("cloudwatch-log-2.json") + "]}", + ] + + cloudtrail_filename_digest = ( + "AWSLogs/aws-account-id/CloudTrail-Digest/region/yyyy/mm/dd/" + "aws-account-id_CloudTrail-Digest_region_end-time_random-string.log.gz" + ) + cloudtrail_filename_non_digest = ( + "AWSLogs/aws-account-id/CloudTrail/region/yyyy/mm/dd/" + "aws-account-id_CloudTrail_region_end-time_random-string.log.gz" + ) + + s3_bucket_name = _time_based_id(suffix="test-bucket") + + _s3_upload_content_to_bucket( + client=self.s3_client, + content=gzip.compress(fixtures[0].encode("utf-8")), + content_type="application/x-gzip", + bucket_name=s3_bucket_name, + key=cloudtrail_filename_digest, + ) + + _s3_upload_content_to_bucket( + client=self.s3_client, + content=gzip.compress(fixtures[1].encode("utf-8")), + content_type="application/x-gzip", + bucket_name=s3_bucket_name, + key=cloudtrail_filename_non_digest, + ) + + _sqs_send_s3_notifications( + self.sqs_client, + s3_sqs_queue_url, + s3_bucket_name, + [cloudtrail_filename_digest, cloudtrail_filename_non_digest], + ) + + event, _ = _sqs_get_messages(self.sqs_client, s3_sqs_queue_url, s3_sqs_queue_arn) + + ctx = ContextMock(remaining_time_in_millis=_OVER_COMPLETION_GRACE_PERIOD_2m) + first_call = handler(event, ctx) # type:ignore + + assert first_call == "completed" + + self.elasticsearch.refresh(index="logs-aws.cloudtrail-default") + assert self.elasticsearch.count(index="logs-aws.cloudtrail-default")["count"] == 2 + + res = self.elasticsearch.search(index="logs-aws.cloudtrail-default", sort="_seq_no") + assert res["hits"]["total"] == {"value": 2, "relation": "eq"} + + assert res["hits"]["hits"][0]["_source"]["message"] == fixtures[0].rstrip("\n") + assert res["hits"]["hits"][0]["_source"]["log"]["offset"] == 0 + assert ( + res["hits"]["hits"][0]["_source"]["log"]["file"]["path"] + == f"https://{s3_bucket_name}.s3.eu-central-1.amazonaws.com/{cloudtrail_filename_digest}" + ) + assert res["hits"]["hits"][0]["_source"]["aws"]["s3"]["bucket"]["name"] == s3_bucket_name + assert res["hits"]["hits"][0]["_source"]["aws"]["s3"]["bucket"]["arn"] == f"arn:aws:s3:::{s3_bucket_name}" + assert res["hits"]["hits"][0]["_source"]["aws"]["s3"]["object"]["key"] == cloudtrail_filename_digest + assert res["hits"]["hits"][0]["_source"]["cloud"]["provider"] == "aws" + assert res["hits"]["hits"][0]["_source"]["cloud"]["region"] == "eu-central-1" + assert res["hits"]["hits"][0]["_source"]["cloud"]["account"]["id"] == "000000000000" + assert res["hits"]["hits"][0]["_source"]["tags"] == ["forwarded", "aws-cloudtrail", "tag1", "tag2", "tag3"] + + assert res["hits"]["hits"][1]["_source"]["message"] == json_dumper( + json_parser(_load_file_fixture("cloudwatch-log-2.json").rstrip("\n")) + ) + assert res["hits"]["hits"][1]["_source"]["log"]["offset"] == 0 + assert ( + res["hits"]["hits"][1]["_source"]["log"]["file"]["path"] + == f"https://{s3_bucket_name}.s3.eu-central-1.amazonaws.com/{cloudtrail_filename_non_digest}" + ) + assert res["hits"]["hits"][1]["_source"]["aws"]["s3"]["bucket"]["name"] == s3_bucket_name + assert res["hits"]["hits"][1]["_source"]["aws"]["s3"]["bucket"]["arn"] == f"arn:aws:s3:::{s3_bucket_name}" + assert res["hits"]["hits"][1]["_source"]["aws"]["s3"]["object"]["key"] == cloudtrail_filename_non_digest + assert res["hits"]["hits"][1]["_source"]["cloud"]["provider"] == "aws" + assert res["hits"]["hits"][1]["_source"]["cloud"]["region"] == "eu-central-1" + assert res["hits"]["hits"][1]["_source"]["cloud"]["account"]["id"] == "000000000000" + assert res["hits"]["hits"][1]["_source"]["tags"] == ["forwarded", "aws-cloudtrail", "tag1", "tag2", "tag3"] + + logstash_message = self.logstash.get_messages(expected=2) + assert len(logstash_message) == 2 + res["hits"]["hits"][0]["_source"]["tags"].remove("aws-cloudtrail") + res["hits"]["hits"][1]["_source"]["tags"].remove("aws-cloudtrail") + + assert res["hits"]["hits"][0]["_source"]["aws"] == logstash_message[0]["aws"] + assert res["hits"]["hits"][0]["_source"]["cloud"] == logstash_message[0]["cloud"] + assert res["hits"]["hits"][0]["_source"]["log"] == logstash_message[0]["log"] + assert res["hits"]["hits"][0]["_source"]["message"] == logstash_message[0]["message"] + assert res["hits"]["hits"][0]["_source"]["tags"] == logstash_message[0]["tags"] + + assert res["hits"]["hits"][1]["_source"]["aws"] == logstash_message[1]["aws"] + assert res["hits"]["hits"][1]["_source"]["cloud"] == logstash_message[1]["cloud"] + assert res["hits"]["hits"][1]["_source"]["log"] == logstash_message[1]["log"] + assert res["hits"]["hits"][1]["_source"]["message"] == logstash_message[1]["message"] + assert res["hits"]["hits"][1]["_source"]["tags"] == logstash_message[1]["tags"] + + def test_es_ssl_fingerprint_mismatch(self) -> None: + assert isinstance(self.elasticsearch, ElasticsearchContainer) + assert isinstance(self.localstack, LocalStackContainer) + + sqs_queue_name = _time_based_id(suffix="source-sqs") + sqs_queue = _sqs_create_queue(self.sqs_client, sqs_queue_name, self.localstack.get_url()) + + sqs_queue_arn = sqs_queue["QueueArn"] + sqs_queue_url = sqs_queue["QueueUrl"] + sqs_queue_url_path = sqs_queue["QueueUrlPath"] + + config_yaml: str = f""" + inputs: + - type: sqs + id: "{sqs_queue_arn}" + tags: {self.default_tags} + outputs: + - type: "elasticsearch" + args: + elasticsearch_url: "{self.elasticsearch.get_url()}" + ssl_assert_fingerprint: {self.elasticsearch.ssl_assert_fingerprint}:AA + username: "{self.secret_arn}:username" + password: "{self.secret_arn}:password" + """ + + config_file_path = "config.yaml" + config_bucket_name = _time_based_id(suffix="config-bucket") + _s3_upload_content_to_bucket( + client=self.s3_client, + content=config_yaml.encode("utf-8"), + content_type="text/plain", + bucket_name=config_bucket_name, + key=config_file_path, + ) + + os.environ["S3_CONFIG_FILE"] = f"s3://{config_bucket_name}/{config_file_path}" + ctx = ContextMock() + + fixtures = [ + _load_file_fixture("cloudwatch-log-1.json"), + _load_file_fixture("cloudwatch-log-2.json"), + ] + + _sqs_send_messages(self.sqs_client, sqs_queue_url, "".join(fixtures)) + + event, _ = _sqs_get_messages(self.sqs_client, sqs_queue_url, sqs_queue_arn) + message_id = event["Records"][0]["messageId"] + + first_call = handler(event, ctx) # type:ignore + + assert first_call == "continuing" + + assert self.elasticsearch.exists(index="logs-generic-default") is False + + event, _ = _sqs_get_messages(self.sqs_client, os.environ["SQS_CONTINUE_URL"], self.sqs_continue_queue_arn) + second_call = handler(event, ctx) # type:ignore + + assert second_call == "continuing" + + assert self.elasticsearch.exists(index="logs-generic-default") is False + + event, _ = _sqs_get_messages(self.sqs_client, os.environ["SQS_CONTINUE_URL"], self.sqs_continue_queue_arn) + third_call = handler(event, ctx) # type:ignore + + assert third_call == "completed" + + assert self.elasticsearch.exists(index="logs-generic-default") is False + + events, _ = _sqs_get_messages(self.sqs_client, os.environ["SQS_REPLAY_URL"], self.sqs_replay_queue_arn) + assert len(events["Records"]) == 2 + + first_body: dict[str, Any] = json_parser(events["Records"][0]["body"]) + second_body: dict[str, Any] = json_parser(events["Records"][1]["body"]) + + assert first_body["event_payload"]["message"] == fixtures[0].rstrip("\n") + assert first_body["event_payload"]["log"]["offset"] == 0 + assert first_body["event_payload"]["log"]["file"]["path"] == sqs_queue_url_path + assert first_body["event_payload"]["aws"]["sqs"]["name"] == sqs_queue_name + assert first_body["event_payload"]["aws"]["sqs"]["message_id"] == message_id + assert first_body["event_payload"]["cloud"]["provider"] == "aws" + assert first_body["event_payload"]["cloud"]["region"] == "us-east-1" + assert first_body["event_payload"]["cloud"]["account"]["id"] == "000000000000" + assert first_body["event_payload"]["tags"] == ["forwarded", "generic", "tag1", "tag2", "tag3"] + + assert second_body["event_payload"]["message"] == fixtures[1].rstrip("\n") + assert second_body["event_payload"]["log"]["offset"] == 94 + assert second_body["event_payload"]["log"]["file"]["path"] == sqs_queue_url_path + assert second_body["event_payload"]["aws"]["sqs"]["name"] == sqs_queue_name + assert second_body["event_payload"]["aws"]["sqs"]["message_id"] == message_id + assert second_body["event_payload"]["cloud"]["provider"] == "aws" + assert second_body["event_payload"]["cloud"]["region"] == "us-east-1" + assert second_body["event_payload"]["cloud"]["account"]["id"] == "000000000000" + assert second_body["event_payload"]["tags"] == ["forwarded", "generic", "tag1", "tag2", "tag3"] + + def test_es_no_matching_action_failed(self) -> None: + assert isinstance(self.elasticsearch, ElasticsearchContainer) + assert isinstance(self.localstack, LocalStackContainer) + + sqs_queue_name = _time_based_id(suffix="source-sqs") + sqs_queue = _sqs_create_queue(self.sqs_client, sqs_queue_name, self.localstack.get_url()) + + sqs_queue_arn = sqs_queue["QueueArn"] + sqs_queue_url = sqs_queue["QueueUrl"] + + message: str = "a message" + fingerprint: str = "DUEwoALOve1Y9MtPCfT7IJGU3IQ=" + + # Create an expected id so that es.send will fail + self.elasticsearch.index( + index="logs-generic-default", + op_type="create", + id=fingerprint, + document={"@timestamp": datetime.datetime.utcnow().strftime("%Y-%m-%dT%H:%M:%S.%fZ")}, + ) + + processors = { + "processors": [ + { + "fingerprint": { + "fields": ["message"], + "target_field": "_id", + } + } + ] + } + + # Add a pipeline that will generate the same _id + self.elasticsearch.put_pipeline(id="id_fingerprint_pipeline", body=processors) + self.elasticsearch.put_settings( + index="logs-generic-default", body={"index.default_pipeline": "id_fingerprint_pipeline"} + ) + + self.elasticsearch.refresh(index="logs-generic-default") + + assert self.elasticsearch.count(index="logs-generic-default")["count"] == 1 + + _sqs_send_messages(self.sqs_client, sqs_queue_url, message) + + event, _ = _sqs_get_messages(self.sqs_client, sqs_queue_url, sqs_queue_arn) + + config_yaml: str = f""" + inputs: + - type: sqs + id: "{sqs_queue_arn}" + tags: {self.default_tags} + outputs: + - type: "elasticsearch" + args: + elasticsearch_url: "{self.elasticsearch.get_url()}" + ssl_assert_fingerprint: {self.elasticsearch.ssl_assert_fingerprint} + username: "{self.secret_arn}:username" + password: "{self.secret_arn}:password" + """ + + config_file_path = "config.yaml" + config_bucket_name = _time_based_id(suffix="config-bucket") + _s3_upload_content_to_bucket( + client=self.s3_client, + content=config_yaml.encode("utf-8"), + content_type="text/plain", + bucket_name=config_bucket_name, + key=config_file_path, + ) + + os.environ["S3_CONFIG_FILE"] = f"s3://{config_bucket_name}/{config_file_path}" + + ctx = ContextMock(remaining_time_in_millis=_OVER_COMPLETION_GRACE_PERIOD_2m) + + first_call = handler(event, ctx) # type:ignore + + assert first_call == "completed" + + self.elasticsearch.refresh(index="logs-generic-default") + + assert self.elasticsearch.count(index="logs-generic-default")["count"] == 1 + + res = self.elasticsearch.search(index="logs-generic-default") + assert "message" not in res["hits"]["hits"][0]["_source"] + + event, timestamp = _sqs_get_messages(self.sqs_client, os.environ["SQS_REPLAY_URL"], self.sqs_replay_queue_arn) + assert not event["Records"] + assert not timestamp + + def test_ls_ssl_fingerprint_mimsmatch(self) -> None: + assert isinstance(self.logstash, LogstashContainer) + assert isinstance(self.localstack, LocalStackContainer) + + sqs_queue_name = _time_based_id(suffix="source-sqs") + sqs_queue = _sqs_create_queue(self.sqs_client, sqs_queue_name, self.localstack.get_url()) + + sqs_queue_arn = sqs_queue["QueueArn"] + sqs_queue_url = sqs_queue["QueueUrl"] + sqs_queue_url_path = sqs_queue["QueueUrlPath"] + + config_yaml: str = f""" + inputs: + - type: sqs + id: "{sqs_queue_arn}" + tags: {self.default_tags} + outputs: + - type: "logstash" + args: + logstash_url: "{self.logstash.get_url()}" + ssl_assert_fingerprint: {self.logstash.ssl_assert_fingerprint}:AA + username: "{self.logstash.logstash_user}" + password: "{self.logstash.logstash_password}" + """ + + config_file_path = "config.yaml" + config_bucket_name = _time_based_id(suffix="config-bucket") + _s3_upload_content_to_bucket( + client=self.s3_client, + content=config_yaml.encode("utf-8"), + content_type="text/plain", + bucket_name=config_bucket_name, + key=config_file_path, + ) + + os.environ["S3_CONFIG_FILE"] = f"s3://{config_bucket_name}/{config_file_path}" + ctx = ContextMock() + + fixtures = [ + _load_file_fixture("cloudwatch-log-1.json"), + _load_file_fixture("cloudwatch-log-2.json"), + ] + + _sqs_send_messages(self.sqs_client, sqs_queue_url, "".join(fixtures)) + + event, _ = _sqs_get_messages(self.sqs_client, sqs_queue_url, sqs_queue_arn) + message_id = event["Records"][0]["messageId"] + + first_call = handler(event, ctx) # type:ignore + + assert first_call == "continuing" + + event, _ = _sqs_get_messages(self.sqs_client, os.environ["SQS_CONTINUE_URL"], self.sqs_continue_queue_arn) + second_call = handler(event, ctx) # type:ignore + + assert second_call == "continuing" + + event, _ = _sqs_get_messages(self.sqs_client, os.environ["SQS_CONTINUE_URL"], self.sqs_continue_queue_arn) + third_call = handler(event, ctx) # type:ignore + + assert third_call == "completed" + + events, _ = _sqs_get_messages(self.sqs_client, os.environ["SQS_REPLAY_URL"], self.sqs_replay_queue_arn) + assert len(events["Records"]) == 2 + + first_body: dict[str, Any] = json_parser(events["Records"][0]["body"]) + second_body: dict[str, Any] = json_parser(events["Records"][1]["body"]) + + assert first_body["event_payload"]["message"] == fixtures[0].rstrip("\n") + assert first_body["event_payload"]["log"]["offset"] == 0 + assert first_body["event_payload"]["log"]["file"]["path"] == sqs_queue_url_path + assert first_body["event_payload"]["aws"]["sqs"]["name"] == sqs_queue_name + assert first_body["event_payload"]["aws"]["sqs"]["message_id"] == message_id + assert first_body["event_payload"]["cloud"]["provider"] == "aws" + assert first_body["event_payload"]["cloud"]["region"] == "us-east-1" + assert first_body["event_payload"]["cloud"]["account"]["id"] == "000000000000" + assert first_body["event_payload"]["tags"] == ["forwarded", "tag1", "tag2", "tag3"] + + assert second_body["event_payload"]["message"] == fixtures[1].rstrip("\n") + assert second_body["event_payload"]["log"]["offset"] == 94 + assert second_body["event_payload"]["log"]["file"]["path"] == sqs_queue_url_path + assert second_body["event_payload"]["aws"]["sqs"]["name"] == sqs_queue_name + assert second_body["event_payload"]["aws"]["sqs"]["message_id"] == message_id + assert second_body["event_payload"]["cloud"]["provider"] == "aws" + assert second_body["event_payload"]["cloud"]["region"] == "us-east-1" + assert second_body["event_payload"]["cloud"]["account"]["id"] == "000000000000" + assert second_body["event_payload"]["tags"] == ["forwarded", "tag1", "tag2", "tag3"] + + def test_ls_wrong_auth_creds(self) -> None: + assert isinstance(self.logstash, LogstashContainer) + assert isinstance(self.localstack, LocalStackContainer) + + sqs_queue_name = _time_based_id(suffix="source-sqs") + sqs_queue = _sqs_create_queue(self.sqs_client, sqs_queue_name, self.localstack.get_url()) + + sqs_queue_arn = sqs_queue["QueueArn"] + sqs_queue_url = sqs_queue["QueueUrl"] + sqs_queue_url_path = sqs_queue["QueueUrlPath"] + + config_yaml: str = f""" + inputs: + - type: sqs + id: "{sqs_queue_arn}" + tags: {self.default_tags} + outputs: + - type: "logstash" + args: + logstash_url: "{self.logstash.get_url()}" + ssl_assert_fingerprint: {self.logstash.ssl_assert_fingerprint} + username: "wrong_username" + password: "wrong_password" + """ + + config_file_path = "config.yaml" + config_bucket_name = _time_based_id(suffix="config-bucket") + _s3_upload_content_to_bucket( + client=self.s3_client, + content=config_yaml.encode("utf-8"), + content_type="text/plain", + bucket_name=config_bucket_name, + key=config_file_path, + ) + + os.environ["S3_CONFIG_FILE"] = f"s3://{config_bucket_name}/{config_file_path}" + ctx = ContextMock() + + fixtures = [ + _load_file_fixture("cloudwatch-log-1.json"), + _load_file_fixture("cloudwatch-log-2.json"), + ] + + _sqs_send_messages(self.sqs_client, sqs_queue_url, "".join(fixtures)) + + event, _ = _sqs_get_messages(self.sqs_client, sqs_queue_url, sqs_queue_arn) + message_id = event["Records"][0]["messageId"] + + first_call = handler(event, ctx) # type:ignore + + assert first_call == "continuing" + + event, _ = _sqs_get_messages(self.sqs_client, os.environ["SQS_CONTINUE_URL"], self.sqs_continue_queue_arn) + second_call = handler(event, ctx) # type:ignore + + assert second_call == "continuing" + + event, _ = _sqs_get_messages(self.sqs_client, os.environ["SQS_CONTINUE_URL"], self.sqs_continue_queue_arn) + third_call = handler(event, ctx) # type:ignore + + assert third_call == "completed" + + events, _ = _sqs_get_messages(self.sqs_client, os.environ["SQS_REPLAY_URL"], self.sqs_replay_queue_arn) + assert len(events["Records"]) == 2 + + first_body: dict[str, Any] = json_parser(events["Records"][0]["body"]) + second_body: dict[str, Any] = json_parser(events["Records"][1]["body"]) + + assert first_body["event_payload"]["message"] == fixtures[0].rstrip("\n") + assert first_body["event_payload"]["log"]["offset"] == 0 + assert first_body["event_payload"]["log"]["file"]["path"] == sqs_queue_url_path + assert first_body["event_payload"]["aws"]["sqs"]["name"] == sqs_queue_name + assert first_body["event_payload"]["aws"]["sqs"]["message_id"] == message_id + assert first_body["event_payload"]["cloud"]["provider"] == "aws" + assert first_body["event_payload"]["cloud"]["region"] == "us-east-1" + assert first_body["event_payload"]["cloud"]["account"]["id"] == "000000000000" + assert first_body["event_payload"]["tags"] == ["forwarded", "tag1", "tag2", "tag3"] + + assert second_body["event_payload"]["message"] == fixtures[1].rstrip("\n") + assert second_body["event_payload"]["log"]["offset"] == 94 + assert second_body["event_payload"]["log"]["file"]["path"] == sqs_queue_url_path + assert second_body["event_payload"]["aws"]["sqs"]["name"] == sqs_queue_name + assert second_body["event_payload"]["aws"]["sqs"]["message_id"] == message_id + assert second_body["event_payload"]["cloud"]["provider"] == "aws" + assert second_body["event_payload"]["cloud"]["region"] == "us-east-1" + assert second_body["event_payload"]["cloud"]["account"]["id"] == "000000000000" + assert second_body["event_payload"]["tags"] == ["forwarded", "tag1", "tag2", "tag3"] diff --git a/tests/handlers/aws/test_replay_trigger.py b/tests/handlers/aws/test_replay_trigger.py new file mode 100644 index 00000000..c67efd52 --- /dev/null +++ b/tests/handlers/aws/test_replay_trigger.py @@ -0,0 +1,87 @@ +# Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one +# or more contributor license agreements. Licensed under the Elastic License 2.0; +# you may not use this file except in compliance with the Elastic License 2.0. + +from typing import Optional +from unittest import TestCase + +import mock +import pytest + +from handlers.aws.replay_trigger import ReplayedEventReplayHandler, get_shipper_for_replay_event +from share import parse_config +from shippers import CompositeShipper, ElasticsearchShipper, LogstashShipper + + +@pytest.mark.unit +class TestReplayTrigger(TestCase): + @mock.patch("share.config._available_output_types", new=["elasticsearch", "logstash", "output_type"]) + def test_get_shipper_for_replay_event(self) -> None: + with self.subTest("Logstash shipper from replay event"): + config_yaml_kinesis = """ + inputs: + - type: kinesis-data-stream + id: arn:aws:kinesis:eu-central-1:123456789:stream/test-esf-kinesis-stream + outputs: + - type: logstash + args: + logstash_url: logstash_url + """ + config = parse_config(config_yaml_kinesis) + replay_handler = ReplayedEventReplayHandler("arn:aws:sqs:eu-central-1:123456789:queue/replayqueue") + logstash_shipper: Optional[CompositeShipper] = get_shipper_for_replay_event( + config, + "logstash", + {}, + "arn:aws:kinesis:eu-central-1:123456789:stream/test-esf-kinesis-stream", + replay_handler, + ) + assert isinstance(logstash_shipper, CompositeShipper) + assert isinstance(logstash_shipper._shippers[0], LogstashShipper) + + with self.subTest("Elasticsearch shipper from replay event"): + config_yaml_kinesis = """ + inputs: + - type: kinesis-data-stream + id: arn:aws:kinesis:eu-central-1:123456789:stream/test-esf-kinesis-stream + outputs: + - type: elasticsearch + args: + elasticsearch_url: "elasticsearch_url" + username: "username" + password: "password" + es_datastream_name: "es_datastream_name" + """ + config = parse_config(config_yaml_kinesis) + replay_handler = ReplayedEventReplayHandler("arn:aws:sqs:eu-central-1:123456789:queue/replayqueue") + elasticsearch_shipper: Optional[CompositeShipper] = get_shipper_for_replay_event( + config, + "elasticsearch", + {"es_datastream_name": "es_datastream_name"}, + "arn:aws:kinesis:eu-central-1:123456789:stream/test-esf-kinesis-stream", + replay_handler, + ) + + assert isinstance(elasticsearch_shipper, CompositeShipper) + assert isinstance(elasticsearch_shipper._shippers[0], ElasticsearchShipper) + + with self.subTest("None shipper from replay event"): + config_yaml_kinesis = """ + inputs: + - type: kinesis-data-stream + id: arn:aws:kinesis:eu-central-1:123456789:stream/test-esf-kinesis-stream + outputs: + - type: output_type + args: + output_arg: output_arg + """ + config = parse_config(config_yaml_kinesis) + replay_handler = ReplayedEventReplayHandler("arn:aws:sqs:eu-central-1:123456789:queue/replayqueue") + none_shipper = get_shipper_for_replay_event( + config, + "output_type", + {}, + "arn:aws:kinesis:eu-central-1:123456789:stream/test-esf-kinesis-stream", + replay_handler, + ) + assert none_shipper is None diff --git a/tests/handlers/aws/test_utils.py b/tests/handlers/aws/test_utils.py new file mode 100644 index 00000000..16e02ea5 --- /dev/null +++ b/tests/handlers/aws/test_utils.py @@ -0,0 +1,336 @@ +# Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one +# or more contributor license agreements. Licensed under the Elastic License 2.0; +# you may not use this file except in compliance with the Elastic License 2.0. + + +import random +import string +from datetime import datetime +from typing import Any +from unittest import TestCase + +import pytest + +from handlers.aws.utils import ( + cloudwatch_logs_object_id, + get_shipper_from_input, + kinesis_record_id, + s3_object_id, + sqs_object_id, +) +from share import parse_config +from shippers import LogstashShipper + +# Elasticsearch _id constraints +MAX_ES_ID_SIZ_BYTES = 512 + +# Kinesis Input +# https://docs.aws.amazon.com/kinesis/latest/APIReference/API_CreateStream.html +MAX_STREAM_NAME_CHARS = 128 +# https://docs.aws.amazon.com/kinesis/latest/APIReference/API_PutRecord.html#Streams-PutRecord-request-PartitionKey +MAX_PARTITION_KEY_CHARS = 256 +# https://docs.aws.amazon.com/kinesis/latest/APIReference/API_PutRecord.html#API_PutRecord_ResponseSyntax +MAX_SEQUENCE_NUMBER_DIGITS = 128 + +# S3-SQS Input +# https://docs.aws.amazon.com/AmazonS3/latest/API/API_control_CreateBucket.html +MAX_BUCKET_NAME_CHARS = 255 +# https://docs.aws.amazon.com/AmazonS3/latest/API/API_PutObject.html#API_PutObject_RequestSyntax +# S3 Object key does not seem to have a maximum allowed number of chars, so we set it to our internal maximum +MAX_OBJECT_KEY_CHARS = 512 + +# SQS Input +# https://docs.aws.amazon.com/AWSSimpleQueueService/latest/APIReference/API_CreateQueue.html#API_CreateQueue_RequestParameters +MAX_QUEUE_NAME_CHARS = 80 +# https://docs.aws.amazon.com/AWSSimpleQueueService/latest/SQSDeveloperGuide/sqs-queue-message-identifiers.html +MAX_MESSAGE_ID_CHARS = 100 + +# Cloudwatch logs input +# https://docs.aws.amazon.com/AmazonCloudWatchLogs/latest/APIReference/API_CreateLogGroup.html +MAX_CW_LOG_GROUP_NAME_CHARS = 512 +# https://docs.aws.amazon.com/AmazonCloudWatchLogs/latest/APIReference/API_CreateLogStream.html +MAX_CW_LOG_STREAM_NAME_CHARS = 512 +# No docs available, set it to the max +MAX_CW_EVENT_ID_CHARS = 512 + + +def _utf8len(s: str) -> int: + return len(s.encode("utf-8")) + + +def _get_random_string_of_size(size: int) -> str: + return "".join(random.choices(string.ascii_lowercase + string.digits, k=size)) + + +def _get_random_digit_string_of_size(size: int) -> str: + return "".join(random.choices(string.digits, k=size)) + + +@pytest.mark.unit +class TestGetTriggerTypeAndConfigSource(TestCase): + def test_get_trigger_type_and_config_source(self) -> None: + from handlers.aws.utils import CONFIG_FROM_PAYLOAD, CONFIG_FROM_S3FILE, get_trigger_type_and_config_source + + with self.subTest("cloudwatch-logs and CONFIG_FROM_S3FILE"): + event: dict[str, Any] = {"awslogs": {"data": ""}} + + assert get_trigger_type_and_config_source(event=event) == ("cloudwatch-logs", CONFIG_FROM_S3FILE) + + with self.subTest("no Records"): + with self.assertRaisesRegexp(Exception, "Not supported trigger"): + event = {} + + get_trigger_type_and_config_source(event=event) + + with self.subTest("len(Records) < 1"): + with self.assertRaisesRegexp(Exception, "Not supported trigger"): + event = {"Records": []} + + get_trigger_type_and_config_source(event=event) + + with self.subTest("body in first record: replay-sqs CONFIG_FROM_S3FILE"): + event = { + "Records": [ + { + "body": '{"output_type": "output_type", ' + '"output_args": "output_args", "event_payload": "event_payload"}' + } + ] + } + + assert get_trigger_type_and_config_source(event=event) == ("replay-sqs", CONFIG_FROM_S3FILE) + + with self.subTest("body in first record: eventSource override"): + event = {"Records": [{"body": '{"Records": [{"eventSource":"aws:s3"}]}', "eventSource": "aws:kinesis"}]} + + assert get_trigger_type_and_config_source(event=event) == ("s3-sqs", CONFIG_FROM_S3FILE) + + with self.subTest("body in first record: eventSource not override"): + event = { + "Records": [ + {"body": '{"Records": [{"eventSource":"not-available-trigger"}]}', "eventSource": "aws:kinesis"} + ] + } + + assert get_trigger_type_and_config_source(event=event) == ("kinesis-data-stream", CONFIG_FROM_S3FILE) + + with self.subTest("body not in first record: eventSource not override"): + event = {"Records": [{"eventSource": "aws:kinesis"}]} + + assert get_trigger_type_and_config_source(event=event) == ("kinesis-data-stream", CONFIG_FROM_S3FILE) + + with self.subTest("messageAttributes without originalEventSourceARN in first record, CONFIG_FROM_S3FILE"): + event = {"Records": [{"messageAttributes": {}, "eventSource": "aws:kinesis"}]} + + assert get_trigger_type_and_config_source(event=event) == ("kinesis-data-stream", CONFIG_FROM_S3FILE) + + with self.subTest("messageAttributes with originalEventSourceARN in first record, CONFIG_FROM_PAYLOAD"): + event = {"Records": [{"messageAttributes": {"originalEventSourceARN": ""}, "eventSource": "aws:kinesis"}]} + + assert get_trigger_type_and_config_source(event=event) == ("kinesis-data-stream", CONFIG_FROM_PAYLOAD) + + +@pytest.mark.unit +class TestDiscoverIntegrationScope(TestCase): + def test_discover_integration_scope(self) -> None: + from handlers.aws.utils import discover_integration_scope + + with self.subTest("discover_integration_scope aws.cloudtrail integration scope"): + s3_object_key = ( + "AWSLogs/aws-account-id/CloudTrail/region/" + "yyyy/mm/dd/aws-account-id_CloudTrail_region_end-time_random-string.log.gz" + ) + + assert discover_integration_scope(s3_object_key=s3_object_key) == "aws.cloudtrail" + + with self.subTest("discover_integration_scope aws.cloudtrail digest integration scope"): + s3_object_key = ( + "AWSLogs/aws-account-id/CloudTrail-Digest/region/" + "yyyy/mm/dd/aws-account-id_CloudTrail-Digest_region_end-time_random-string.log.gz" + ) + + assert discover_integration_scope(s3_object_key=s3_object_key) == "aws.cloudtrail-digest" + + with self.subTest("discover_integration_scope aws.cloudtrail insight integration scope"): + s3_object_key = ( + "AWSLogs/aws-account-id/CloudTrail-Insight/region/" + "yyyy/mm/dd/aws-account-id_CloudTrail-Insight_region_end-time_random-string.log.gz" + ) + + assert discover_integration_scope(s3_object_key=s3_object_key) == "aws.cloudtrail" + + with self.subTest("discover_integration_scope aws.cloudwatch_logs integration scope"): + s3_object_key = "exportedlogs/111-222-333/2021-12-28/hash/file.gz" + + assert discover_integration_scope(s3_object_key=s3_object_key) == "aws.cloudwatch_logs" + + with self.subTest("discover_integration_scope aws.elb_logs integration scope"): + s3_object_key = ( + "AWSLogs/aws-account-id/elasticloadbalancing/region/yyyy/mm/dd/" + "aws-account-id_elasticloadbalancing_region_load-balancer-id_end-time_ip-address_random-string.log.gz" + ) + + assert discover_integration_scope(s3_object_key=s3_object_key) == "aws.elb_logs" + + with self.subTest("discover_integration_scope aws.firewall_logs integration scope"): + s3_object_key = "AWSLogs/aws-account-id/network-firewall/log-type/Region/firewall-name/timestamp/" + + assert discover_integration_scope(s3_object_key=s3_object_key) == "aws.firewall_logs" + + with self.subTest("discover_integration_scope aws.waf integration scope"): + s3_object_key = "AWSLogs/account-id/WAFLogs/Region/web-acl-name/YYYY/MM/dd/HH/mm" + + assert discover_integration_scope(s3_object_key=s3_object_key) == "aws.waf" + + with self.subTest("discover_integration_scope aws.vpcflow integration scope"): + s3_object_key = "AWSLogs/id/vpcflowlogs/region/date_vpcflowlogs_region_file.log.gz" + + assert discover_integration_scope(s3_object_key=s3_object_key) == "aws.vpcflow" + + with self.subTest("discover_integration_scope unknown integration scope"): + s3_object_key = "random_hash" + + assert discover_integration_scope(s3_object_key=s3_object_key) == "generic" + + with self.subTest("discover_integration_scope empty s3"): + s3_object_key = "" + + assert discover_integration_scope(s3_object_key=s3_object_key) == "generic" + + +@pytest.mark.unit +class TestGetShipperFromInput(TestCase): + def test_get_shipper_from_input(self) -> None: + with self.subTest("Logstash shipper from Kinesis input"): + config_yaml_kinesis: str = """ + inputs: + - type: kinesis-data-stream + id: arn:aws:kinesis:eu-central-1:123456789:stream/test-esf-kinesis-stream + outputs: + - type: logstash + args: + logstash_url: logstash_url + """ + config = parse_config(config_yaml_kinesis) + event_input = config.get_input_by_id( + "arn:aws:kinesis:eu-central-1:123456789:stream/test-esf-kinesis-stream" + ) + assert event_input is not None + shipper = get_shipper_from_input(event_input=event_input, config_yaml=config_yaml_kinesis) + assert len(shipper._shippers) == 1 + assert isinstance(shipper._shippers[0], LogstashShipper) + + with self.subTest("Logstash shipper from Cloudwatch logs input"): + config_yaml_cw: str = """ + inputs: + - type: cloudwatch-logs + id: arn:aws:logs:eu-central-1:123456789:stream/test-cw-logs + outputs: + - type: logstash + args: + logstash_url: logstash_url + """ + config = parse_config(config_yaml_cw) + event_input = config.get_input_by_id("arn:aws:logs:eu-central-1:123456789:stream/test-cw-logs") + assert event_input is not None + shipper = get_shipper_from_input(event_input=event_input, config_yaml=config_yaml_cw) + assert len(shipper._shippers) == 1 + assert isinstance(shipper._shippers[0], LogstashShipper) + + +@pytest.mark.unit +class TestRecordId(TestCase): + def test_kinesis_id_less_than_512bytes(self) -> None: + stream_name: str = _get_random_string_of_size(MAX_STREAM_NAME_CHARS) + partition_key: str = _get_random_string_of_size(MAX_PARTITION_KEY_CHARS) + sequence_number: str = _get_random_digit_string_of_size(MAX_SEQUENCE_NUMBER_DIGITS) + approximate_arrival_timestamp: int = int(datetime.utcnow().timestamp() * 1000) + relevant_fields_for_id: dict[str, Any] = { + "fields": { + "log": {"offset": 1}, + "aws": { + "kinesis": { + "type": "stream", + "name": stream_name, + "partition_key": partition_key, + "sequence_number": sequence_number, + } + }, + }, + "meta": { + "approximate_arrival_timestamp": approximate_arrival_timestamp, + }, + } + + generated_id = kinesis_record_id(relevant_fields_for_id) + assert _utf8len(generated_id) <= MAX_ES_ID_SIZ_BYTES + + def test_s3_id_less_than_512bytes(self) -> None: + event_time: int = int(datetime.utcnow().timestamp() * 1000) + bucket_name: str = _get_random_string_of_size(MAX_BUCKET_NAME_CHARS) + bucket_arn: str = f"arn:aws:s3:::{bucket_name}" + object_key: str = _get_random_string_of_size(MAX_OBJECT_KEY_CHARS) + relevant_fields_for_id: dict[str, Any] = { + "fields": { + "log": { + "offset": 1, + }, + "aws": { + "s3": { + "bucket": {"arn": bucket_arn}, + "object": {"key": object_key}, + } + }, + }, + "meta": {"event_time": event_time}, + } + generated_id = s3_object_id(relevant_fields_for_id) + assert _utf8len(generated_id) <= MAX_ES_ID_SIZ_BYTES + + def test_sqs_id_less_than_512bytes(self) -> None: + sent_timestamp: int = int(datetime.utcnow().timestamp() * 1000) + queue_name: str = _get_random_string_of_size(MAX_QUEUE_NAME_CHARS) + message_id: str = _get_random_string_of_size(MAX_MESSAGE_ID_CHARS) + + relevant_fields_for_id: dict[str, Any] = { + "fields": { + "log": { + "offset": 1, + }, + "aws": { + "sqs": { + "name": queue_name, + "message_id": message_id, + }, + }, + }, + "meta": {"sent_timestamp": sent_timestamp}, + } + + generated_id = sqs_object_id(relevant_fields_for_id) + assert _utf8len(generated_id) <= MAX_ES_ID_SIZ_BYTES + + def test_cloudwatch_id_less_than_512bytes(self) -> None: + event_timestamp: int = int(datetime.utcnow().timestamp() * 1000) + log_group_name: str = _get_random_string_of_size(MAX_CW_LOG_GROUP_NAME_CHARS) + log_stream_name: str = _get_random_string_of_size(MAX_CW_LOG_STREAM_NAME_CHARS) + event_id: str = _get_random_string_of_size(MAX_CW_EVENT_ID_CHARS) + + relevant_fields_for_id: dict[str, Any] = { + "fields": { + "log": { + "offset": 1, + }, + "aws": { + "cloudwatch": { + "log_group": log_group_name, + "log_stream": log_stream_name, + "event_id": event_id, + } + }, + }, + "meta": {"event_timestamp": event_timestamp}, + } + + generated_id = cloudwatch_logs_object_id(relevant_fields_for_id) + assert _utf8len(generated_id) <= MAX_ES_ID_SIZ_BYTES