Skip to content

Commit

Permalink
Add exclusions for aliases [RHELDST-13644]
Browse files Browse the repository at this point in the history
Certain paths on our NetStorage fs have no symlinks/aliases, and teams/
tools that make use of these paths expect similar behaviour from exodus.

This change introduces "exclude_paths" to exclude alias generation for
certain paths, so we can replicate NetStorage behaviour.
  • Loading branch information
amcmahon-rh committed Nov 25, 2024
1 parent 14e5da8 commit 3b28ebd
Show file tree
Hide file tree
Showing 12 changed files with 217 additions and 43 deletions.
13 changes: 7 additions & 6 deletions exodus_gw/aws/dynamodb.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,18 +46,19 @@ def definitions(self):
self._definitions = self.query_definitions()
return self._definitions

def _aliases(self, alias_types: list[str]) -> list[tuple[str, str]]:
out: list[tuple[str, str]] = []
def _aliases(self, alias_types: list[str]) -> list[tuple[str, str, list[str]]]:
out: list[tuple[str, str, list[str]]] = []

for k, v in self.definitions.items():
if k in alias_types:
for alias in v:
out.append((alias["src"], alias["dest"]))
out.append((alias["src"], alias["dest"],
alias.get("exclude_paths") or []))

return out

@property
def aliases_for_write(self) -> list[tuple[str, str]]:
def aliases_for_write(self) -> list[tuple[str, str, list[str]]]:
# Aliases used when writing items to DynamoDB.
#
# Note that these aliases are traversed only in the src => dest
Expand Down Expand Up @@ -95,7 +96,7 @@ def aliases_for_write(self) -> list[tuple[str, str]]:
return self._aliases(["origin_alias", "releasever_alias"])

@property
def aliases_for_flush(self) -> list[tuple[str, str]]:
def aliases_for_flush(self) -> list[tuple[str, str, list[str]]]:
# Aliases used when flushing cache.
out = self._aliases(["origin_alias", "releasever_alias", "rhui_alias"])

Expand Down Expand Up @@ -128,7 +129,7 @@ def aliases_for_flush(self) -> list[tuple[str, str]]:
# this alias. If we don't traverse the alias from dest => src then we
# will miss the fact that /content/dist/rhel8/rhui paths should also
# have cache flushed.
out = out + [(dest, src) for (src, dest) in out]
out = out + [(dest, src, exclusions) for (src, dest, exclusions) in out]

return out

Expand Down
23 changes: 17 additions & 6 deletions exodus_gw/aws/util.py
Original file line number Diff line number Diff line change
Expand Up @@ -161,7 +161,7 @@ def get_reader(cls, request):
return cls(request)


def uri_alias(uri: str, aliases: list[tuple[str, str]]) -> list[str]:
def uri_alias(uri: str, aliases: list[tuple[str, str, list[str]]]) -> list[str]:
# Resolve every alias between paths within the uri (e.g.
# allow RHUI paths to be aliased to non-RHUI).
#
Expand All @@ -187,7 +187,7 @@ def uri_alias(uri: str, aliases: list[tuple[str, str]]) -> list[str]:
def uri_alias_recurse(
accum: list[str],
uri: str,
aliases: list[tuple[str, str]],
aliases: list[tuple[str, str, list[str]]],
depth=0,
maxdepth=4,
):
Expand Down Expand Up @@ -217,8 +217,19 @@ def add_out(new_uri: str) -> bool:
accum.insert(0, new_uri)
return out

for src, dest in aliases:
for src, dest, exclude_paths in aliases:
if uri.startswith(src + "/") or uri == src:
# Used to replicate old NetStorage-compatible behaviour. This will
# typically match non-rpm paths, such as /images/ or /isos/
if any([re.search(exclusion, uri) for exclusion in exclude_paths]):
LOG.debug("Aliasing for %s was not applied as it matches one "
"of the following exclusion paths: %s.",
uri,
",".join(exclude_paths),
extra={"event": "publish", "success": True},
)
continue

new_uri = uri.replace(src, dest, 1)
LOG.debug(
"Resolved alias:\n\tsrc: %s\n\tdest: %s",
Expand Down Expand Up @@ -252,8 +263,8 @@ def add_out(new_uri: str) -> bool:
# But this is not desired behavior, we know that every alias is intended
# to be resolved in the URL a maximum of once, hence this adjustment.
sub_aliases = [
(subsrc, subdest)
for (subsrc, subdest) in aliases
(subsrc, subdest, exclusions)
for (subsrc, subdest, exclusions) in aliases
if (subsrc, subdest) != (src, dest)
]

Expand All @@ -267,7 +278,7 @@ def add_out(new_uri: str) -> bool:


def uris_with_aliases(
uris: Iterable[str], aliases: list[tuple[str, str]]
uris: Iterable[str], aliases: list[tuple[str, str, list[str]]]
) -> list[str]:
# Given a collection of uris and aliases, returns a new collection of uris
# post alias resolution, including *both* sides of each alias when applicable.
Expand Down
21 changes: 19 additions & 2 deletions exodus_gw/routers/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,12 @@
"pattern": PATH_PATTERN,
"description": "Target of the alias, relative to CDN root.",
},
"exclude_paths": {
"type": "array",
"items": {"type": "string"},
"description": "Paths for which alias will not be resolved, "
"treated as an unanchored regex."
}
},
},
"uniqueItems": True,
Expand Down Expand Up @@ -134,19 +140,30 @@ def config_post(
},
},
"origin_alias": [
{"src": "/content/origin", "dest": "/origin"},
{"src": "/origin/rpm", "dest": "/origin/rpms"},
{
"src": "/content/origin",
"dest": "/origin",
"exclude_paths": []
},
{
"src": "/origin/rpm",
"dest": "/origin/rpms",
"exclude_paths": ["/iso/"]
},
],
"releasever_alias": [
{
"dest": "/content/dist/rhel8/8.5",
"src": "/content/dist/rhel8/8",
"exclude_paths": ["/files/", "/images/", "/iso/"]
},

],
"rhui_alias": [
{
"dest": "/content/dist/rhel8",
"src": "/content/dist/rhel8/rhui",
"exclude_paths": ["/files/", "/images/", "/iso/"]
},
],
}
Expand Down
15 changes: 13 additions & 2 deletions exodus_gw/routers/deploy.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,19 +56,30 @@ def deploy_config(
},
},
"origin_alias": [
{"src": "/content/origin", "dest": "/origin"},
{"src": "/origin/rpm", "dest": "/origin/rpms"},
{
"src": "/content/origin",
"dest": "/origin",
"exclude_paths": []
},
{
"src": "/origin/rpm",
"dest": "/origin/rpms",
"exclude_paths": ["/iso/"]
},
],
"releasever_alias": [
{
"dest": "/content/dist/rhel8/8.5",
"src": "/content/dist/rhel8/8",
"exclude_paths": ["/files/", "/images/", "/iso/"]
},

],
"rhui_alias": [
{
"dest": "/content/dist/rhel8",
"src": "/content/dist/rhel8/rhui",
"exclude_paths": ["/files/", "/images/", "/iso/"]
},
],
}
Expand Down
4 changes: 4 additions & 0 deletions exodus_gw/schemas.py
Original file line number Diff line number Diff line change
Expand Up @@ -319,6 +319,10 @@ class Alias(BaseModel):
dest: str = Field(
..., description="Target of the alias, relative to CDN root."
)
exclude_paths: list[str] | None = Field(
[], description="Paths for which alias will not be resolved, "
"treated as an unanchored regex."
)


class YumVariable(Enum):
Expand Down
2 changes: 1 addition & 1 deletion exodus_gw/worker/cache.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ def __init__(
paths: list[str],
settings: Settings,
env: str,
aliases: list[tuple[str, str]],
aliases: list[tuple[str, str, list[str]]],
):
self.paths = [p for p in paths if not exclude_path(p)]
self.settings = settings
Expand Down
12 changes: 10 additions & 2 deletions exodus_gw/worker/deploy.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import logging
import re
from typing import Any

import dramatiq
Expand Down Expand Up @@ -93,7 +94,8 @@ def deploy_config(
db = Session(bind=db_engine(settings))
ddb = DynamoDB(env, settings, from_date)

original_aliases = {src: dest for (src, dest) in ddb.aliases_for_flush}
original_aliases = {src: dest for (src, dest, _) in ddb.aliases_for_flush}
original_exclusions = {src: exc for (src, _, exc) in ddb.aliases_for_flush}

message = CurrentMessage.get_current_message()
assert message
Expand Down Expand Up @@ -142,12 +144,18 @@ def deploy_config(
# URLs depending on what changed in the config.
flush_paths: set[str] = set()

for src, updated_dest in ddb.aliases_for_flush:
for src, updated_dest, _ in ddb.aliases_for_flush:
if original_aliases.get(src) != updated_dest:
for published_path in db.query(models.PublishedPath).filter(
models.PublishedPath.env == env,
models.PublishedPath.web_uri.like(f"{src}/%"),
):
# If any original exclusion matches the uri, the uri wouldn't
# have been treated as an alias, thus cache flushing would be
# unnecessary.
if any([re.search(exclusion, published_path.web_uri)
for exclusion in original_exclusions.get(src, [])]):
continue
LOG.info(
"Updated alias %s will flush cache for %s",
src,
Expand Down
105 changes: 87 additions & 18 deletions tests/aws/test_uri_alias.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,8 +11,8 @@
(
"/content/origin/rpms/path/to/file.iso",
[
("/content/origin", "/origin"),
("/origin/rpm", "/origin/rpms"),
("/content/origin", "/origin", []),
("/origin/rpm", "/origin/rpms", []),
],
[
"/origin/rpms/path/to/file.iso",
Expand All @@ -22,7 +22,7 @@
(
"/content/dist/rhel8/8/path/to/file.rpm",
[
("/content/dist/rhel8/8", "/content/dist/rhel8/8.5"),
("/content/dist/rhel8/8", "/content/dist/rhel8/8.5", []),
],
[
"/content/dist/rhel8/8.5/path/to/file.rpm",
Expand Down Expand Up @@ -50,8 +50,8 @@ def test_uri_alias_multi_level_write():
aliases = [
# The data here is made up as there is not currently any identified
# realistic scenario having multi-level aliases during write.
("/content/testproduct/1", "/content/testproduct/1.1.0"),
("/content/other", "/content/testproduct"),
("/content/testproduct/1", "/content/testproduct/1.1.0", []),
("/content/other", "/content/testproduct", []),
]

out = uri_alias(uri, aliases)
Expand All @@ -74,10 +74,10 @@ def test_uri_alias_multi_level_flush():
aliases = [
# The caller is providing aliases in both src => dest and
# dest => src directions, as in the "cache flush" case.
("/content/dist/rhel8/8", "/content/dist/rhel8/8.8"),
("/content/dist/rhel8/8.8", "/content/dist/rhel8/8"),
("/content/dist/rhel8/rhui", "/content/dist/rhel8"),
("/content/dist/rhel8", "/content/dist/rhel8/rhui"),
("/content/dist/rhel8/8", "/content/dist/rhel8/8.8", []),
("/content/dist/rhel8/8.8", "/content/dist/rhel8/8", []),
("/content/dist/rhel8/rhui", "/content/dist/rhel8", []),
("/content/dist/rhel8", "/content/dist/rhel8/rhui", []),
]

out = uri_alias(uri, aliases)
Expand Down Expand Up @@ -108,15 +108,15 @@ def test_uri_alias_limit(caplog: pytest.LogCaptureFixture):
# actually used on production.

uri = "/path/a/repo"
aliases = [
("/path/a", "/path/b"),
("/path/b", "/path/c"),
("/path/c", "/path/d"),
("/path/d", "/path/e"),
("/path/e", "/path/f"),
("/path/f", "/path/g"),
("/path/g", "/path/h"),
("/path/h", "/path/i"),
aliases: list[tuple[str, str, list[str]]] = [
("/path/a", "/path/b", []),
("/path/b", "/path/c", []),
("/path/c", "/path/d", []),
("/path/d", "/path/e", []),
("/path/e", "/path/f", []),
("/path/f", "/path/g", []),
("/path/g", "/path/h", []),
("/path/h", "/path/i", []),
]

out = uri_alias(uri, aliases)
Expand All @@ -137,3 +137,72 @@ def test_uri_alias_limit(caplog: pytest.LogCaptureFixture):
assert (
"Aliases too deeply nested, bailing out at /path/f/repo" in caplog.text
)


@pytest.mark.parametrize(
"input,aliases,output,log_message",
[
(
"/content/dist/rhel9/9/x86_64/baseos/iso/PULP_MANIFEST",
[
("/content/dist/rhel9/9", "/content/dist/rhel9/9.5", ["/iso/"]),
],
# just returns the original
["/content/dist/rhel9/9/x86_64/baseos/iso/PULP_MANIFEST"],
"Aliasing for /content/dist/rhel9/9/x86_64/baseos/iso/PULP_MANIFEST "
"was not applied as it matches one of the following exclusion paths: /iso/."
),
(
"/some/path/with/file/in/it",
[
("/some/path", "/another/different/path", ["/none/", "/here/"]),
("/another/different/path", "/this/wont/alias", ["/file/"])
],
["/another/different/path/with/file/in/it",
"/some/path/with/file/in/it"],
"Aliasing for /another/different/path/with/file/in/it was not "
"applied as it matches one of the following exclusion paths: /file/."
),
(
"/my/base/content/path/cool_iso_tool.rpm",
[
("/my/base", "/your/own", ["/iso/"]),
],
["/your/own/content/path/cool_iso_tool.rpm",
"/my/base/content/path/cool_iso_tool.rpm"],
"Resolved alias:\\n\\tsrc: /my/base/content/path/cool_iso_tool.rpm"
"\\n\\tdest: /your/own/content/path/cool_iso_tool.rpm"
),
(
"/content/dist/rhel9/9.5/x86_64/baseos/iso/PULP_MANIFEST",
[
("/content/dist", "/alias/path", ["/rhel[89]/"]),
],
["/content/dist/rhel9/9.5/x86_64/baseos/iso/PULP_MANIFEST"],
"Aliasing for /content/dist/rhel9/9.5/x86_64/baseos/iso/PULP_MANIFEST "
"was not applied as it matches one of the following exclusion "
"paths: /rhel[89]/."
),
(
"/content/dist/rhel7/7.5/x86_64/baseos/iso/PULP_MANIFEST",
[
("/content/dist", "/alias/path", ["/rhel[89]/"]),
],
["/alias/path/rhel7/7.5/x86_64/baseos/iso/PULP_MANIFEST",
"/content/dist/rhel7/7.5/x86_64/baseos/iso/PULP_MANIFEST",],
"Resolved alias:\\n\\tsrc: /content/dist/rhel7/7.5/x86_64/baseos/iso/PULP_MANIFEST"
"\\n\\tdest: /alias/path/rhel7/7.5/x86_64/baseos/iso/PULP_MANIFEST"
),
],
ids=["basic", "transitive", "filename", "pattern_include", "pattern_exclude"],
)
def test_uri_alias_exclusions(input, aliases, output, log_message, caplog):
caplog.set_level(DEBUG, logger="exodus-gw")
assert uri_alias(input, aliases) == output
assert (
f'"message": "{log_message}", '
'"event": "publish", '
'"success": true' in caplog.text
)
Loading

0 comments on commit 3b28ebd

Please sign in to comment.