diff --git a/.secrets.baseline b/.secrets.baseline index 5d9b840b5..92e60a158 100644 --- a/.secrets.baseline +++ b/.secrets.baseline @@ -314,14 +314,14 @@ "filename": "tests/data/test_indexed_file.py", "hashed_secret": "a62f2225bf70bfaccbc7f1ef2a397836717377de", "is_verified": false, - "line_number": 449 + "line_number": 467 }, { "type": "Secret Keyword", "filename": "tests/data/test_indexed_file.py", "hashed_secret": "c258a8d1264cc59de81f8b1975ac06732b1cf182", "is_verified": false, - "line_number": 470 + "line_number": 488 } ], "tests/keys/2018-05-01T21:29:02Z/jwt_private_key.pem": [ @@ -422,5 +422,5 @@ } ] }, - "generated_at": "2024-11-04T09:20:13Z" + "generated_at": "2025-01-10T17:57:26Z" } diff --git a/fence/blueprints/data/indexd.py b/fence/blueprints/data/indexd.py index 5ae4a059e..980441289 100755 --- a/fence/blueprints/data/indexd.py +++ b/fence/blueprints/data/indexd.py @@ -967,6 +967,11 @@ def assume_role(cls, bucket_cred, expires_in, aws_creds_config, boto=None): ) return rv + def _create_bucket_name_regex(self, bucket): + if bucket.endswith("*"): + return f"^{bucket[:-1] + '.*'}$" + return f"^{bucket}$" + def bucket_name(self): """ Return: @@ -978,7 +983,10 @@ def bucket_name(self): InternalError("S3_BUCKETS not configured"), ) for bucket in s3_buckets: - if re.match("^" + bucket + "$", self.parsed_url.netloc): + print(self.parsed_url.netloc) + if re.match( + self._create_bucket_name_regex(bucket=bucket), self.parsed_url.netloc + ): return self.parsed_url.netloc return None @@ -998,7 +1006,7 @@ def bucket_config(self, bucket_name=None): bucket_name = self.bucket_name() if bucket_name: for bucket in s3_buckets: - if re.match("^" + bucket + "$", bucket_name): + if re.match(self._create_bucket_name_regex(bucket=bucket), bucket_name): return s3_buckets.get(bucket) return None @@ -1076,8 +1084,11 @@ def get_signed_url( config, "AWS_CREDENTIALS", InternalError("credentials not configured") ) + # breakpoint() bucket_name = self.bucket_name() - bucket_config = self.bucket_config(bucket_name) + bucket_config = self.bucket_config(bucket_name=bucket_name) + if not bucket_config: + raise InternalError(f"cannot get config for bucket {bucket_name}") object_id = self.parsed_url.path.strip("/") if bucket_config and bucket_config.get("endpoint_url"): diff --git a/tests/data/conftest.py b/tests/data/conftest.py index f3c2edfec..950fa5719 100755 --- a/tests/data/conftest.py +++ b/tests/data/conftest.py @@ -25,7 +25,13 @@ def supported_protocol(request): return request.param -@pytest.fixture(params=["invalid_bucket*name", "validbucketname-alreadyvalid"]) +@pytest.fixture( + params=[ + "invalid_bucket*name", + "validbucketname-alreadyvalid", + "validbucketname-netloc", + ] +) def s3_indexed_file_location(request): """ Provides a mock s3 file location instance, parameterized with a valid and invalid bucket_name @@ -33,12 +39,9 @@ def s3_indexed_file_location(request): mock_url = "only/needed/for/instantiation" location = S3IndexedFileLocation(url=mock_url) - # Mock parsed_url attributes, made an always valid value for fallback upon failed regex validation + # Mock parsed_url attributes location.parsed_url = MagicMock() - location.parsed_url.netloc = "validbucketname-netloc" + location.parsed_url.netloc = request.param location.parsed_url.path = "/test/object" - # Set bucket_name based on the parameter, can be valid or invalid - location.bucket_name = MagicMock(return_value=request.param) - return location diff --git a/tests/data/test_indexed_file.py b/tests/data/test_indexed_file.py index 20937855e..a294b0344 100755 --- a/tests/data/test_indexed_file.py +++ b/tests/data/test_indexed_file.py @@ -373,6 +373,7 @@ def test_get_signed_url_s3_bucket_name(mock_get_value, s3_indexed_file_location, "validbucketname-alreadyvalid": { "endpoint_url": "https://custom.endpoint2.com" }, + "validbucketname*": {"endpoint_url": "https://custom.endpoint3.com"}, }, }.get(key, error) @@ -386,15 +387,32 @@ def test_get_signed_url_s3_bucket_name(mock_get_value, s3_indexed_file_location, "aws_secret_access_key": "mock_secret", # pragma: allowlist secret } - result_url = s3_indexed_file_location.get_signed_url( - "download", expires_in=3600 - ) - - # Check that real_bucket_name fell back to parsed_url.netloc, or otherwise used the already valid bucket - if s3_indexed_file_location.bucket_name() == "invalid_bucket*name": - assert "validbucketname-netloc" in result_url + # this is the test case for having an invalid S3 bucket name in URL, which should not happen + if s3_indexed_file_location.parsed_url.netloc == "invalid_bucket*name": + assert s3_indexed_file_location.bucket_name() is None + assert s3_indexed_file_location.bucket_config() is None + with pytest.raises(InternalError): + s3_indexed_file_location.get_signed_url("download", expires_in=3600) else: - assert "validbucketname-alreadyvalid" in result_url + result_url = s3_indexed_file_location.get_signed_url( + "download", expires_in=3600 + ) + if ( + s3_indexed_file_location.parsed_url.netloc + == "validbucketname-netloc" + ): + assert ( + "validbucketname-netloc" in result_url + and "endpoint3" in result_url + ) + elif ( + s3_indexed_file_location.parsed_url.netloc + == "validbucketname-alreadyvalid" + ): + assert ( + "validbucketname-alreadyvalid" in result_url + and "endpoint2" in result_url + ) @pytest.mark.parametrize("supported_action", ["download"], indirect=True)