Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

add PexKeyringConfigurationRequest API for configuring Pex/Pip keyring #21852

Closed
wants to merge 9 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions docs/notes/2.25.x.md
Original file line number Diff line number Diff line change
Expand Up @@ -162,6 +162,8 @@ The version of Python used by Pants itself is now [3.11](https://docs.python.org

The oldest [glibc version](https://www.sourceware.org/glibc/wiki/Glibc%20Timeline) supported by the published Pants wheels is now 2.28. This should have no effect unless you are running on extremely old Linux distributions. See <https://github.com/pypa/manylinux> for background context on Python wheels and C libraries.

Added `PexKeyringConfigurationRequest`, a new API for plugins to supply credentials to Pex/Pip for when they access secured Python package indexes.


## Full Changelog

Expand Down
222 changes: 213 additions & 9 deletions src/python/pants/backend/python/util_rules/pex_cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,27 +6,34 @@
import dataclasses
import logging
import os.path
import shlex
from dataclasses import dataclass
from typing import Iterable, List, Mapping, Optional, Tuple
from io import StringIO
from pathlib import Path
from typing import ClassVar, Iterable, Mapping, Optional, Tuple

from pants.backend.python.subsystems.python_native_code import PythonNativeCodeSubsystem
from pants.backend.python.subsystems.setup import PythonSetup
from pants.backend.python.util_rules import pex_environment
from pants.backend.python.util_rules.pex_environment import PexEnvironment, PexSubsystem
from pants.base.build_root import BuildRoot
from pants.core.goals.resolves import ExportableTool
from pants.core.util_rules import adhoc_binaries, external_tool
from pants.core.util_rules import adhoc_binaries, external_tool, system_binaries
from pants.core.util_rules.adhoc_binaries import PythonBuildStandaloneBinary
from pants.core.util_rules.environments import EnvironmentTarget
from pants.core.util_rules.external_tool import (
DownloadedExternalTool,
ExternalToolRequest,
TemplatedExternalTool,
)
from pants.engine.fs import CreateDigest, Digest, Directory, MergeDigests
from pants.core.util_rules.system_binaries import BashBinary
from pants.engine.fs import CreateDigest, Digest, Directory, FileContent, MergeDigests
from pants.engine.internals.selectors import MultiGet
from pants.engine.internals.session import RunId
from pants.engine.platform import Platform
from pants.engine.process import Process, ProcessCacheScope
from pants.engine.rules import Get, collect_rules, rule
from pants.engine.unions import UnionRule
from pants.engine.rules import Get, _uncacheable_rule, collect_rules, rule
from pants.engine.unions import UnionMembership, UnionRule, union
from pants.option.global_options import GlobalOptions, ca_certs_path_to_file_content
from pants.option.option_types import ArgsListOption
from pants.util.frozendict import FrozenDict
Expand Down Expand Up @@ -120,6 +127,73 @@ def __post_init__(self) -> None:
raise ValueError("`--pex-root` flag not allowed. We set its value for you.")


@union
@dataclass(frozen=True)
class PexKeyringConfigurationRequest:
"""Request sent to keyring plugins to request credentials to expose to Pex/Pip."""

name: ClassVar[str]


@dataclass(frozen=True)
class PexKeyringConfigurationResponse:
"""Response containing credentials to expose to Pex/Pip via simulating the `keyring` package's
binary."""

# Nested map from SITE -> (USER, PASSWORD).
credentials: FrozenDict[str, tuple[str, str]] | None


# The following script is injected into any Pex CLI process where credentials will be exposed via
# the `keyring` package. Pip will be configured to use its `subprocess` mode which will obtain
# credentials by invoking this "keyring" binary from the PATH.
#
# Credentials are stored in the file stored in the `__PANTS_KEYRING_DATA` environment variable.
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I would emphasize (via name and comment) that the value in the env var is a path, not actual data content.

# The file is sourced to define a `pants_keyring_credentials` associative array.
_KEYRING_SCRIPT = """\
#!__BASH_PATH__
if [ "$1" != "get" ]; then
echo "ERROR: The `keyring` trampoline script only supports the `get` subcommand." 1>&2
exit 111
fi
shift
if [ -z "$__PANTS_KEYRING_DATA" ]; then
echo "ERROR: __PANTS_KEYRING_DATA env var was not set." 1>&2
exit 111
fi
if [ ! -e "$__PANTS_KEYRING_DATA" ]; then
echo "ERROR: __PANTS_KEYRING_DATA file '${__PANTS_KEYRING_DATA}' does not exist." 1>&2
exit 111
fi
source "$__PANTS_KEYRING_DATA"
if [ -z "$1" ]; then
echo "ERROR: Site not provided on command-line." 1>&2
exit 111
fi
site="$1"
shift
if [ -z "$1" ]; then
echo "ERROR: User not provided on command-line." 1>&2
exit 111
fi
user="$1"
shift
password="${pants_keyring_credentials[$site:$user]}"
if [ -z "$password" ]; then
# If the password is not set, then ordinary `keyring` exits with no output and code 1.
exit 1
fi
echo "$password"
exit 0
"""


class PexPEX(DownloadedExternalTool):
"""The Pex PEX binary."""

Expand All @@ -130,6 +204,114 @@ async def download_pex_pex(pex_cli: PexCli, platform: Platform) -> PexPEX:
return PexPEX(digest=pex_pex.digest, exe=pex_pex.exe)


@dataclass(frozen=True)
class _KeyringScript:
digest: Digest
path: str


@rule
async def setup_keyring_script(bash: BashBinary) -> _KeyringScript:
keyring_path = ".keyring/keyring"
digest = await Get(
Digest,
CreateDigest(
[
FileContent(
path=keyring_path,
content=_KEYRING_SCRIPT.replace("__BASH_PATH__", bash.path).encode(),
is_executable=True,
)
]
),
)
return _KeyringScript(digest=digest, path=keyring_path)


@dataclass(frozen=True)
class _KeyringState:
keyring_data_path: Path | None


def _always_shlex_quote(str: str) -> str:
shlexed_str = shlex.quote(str)
if shlexed_str != str:
return shlexed_str
return f"'{str}'"


@_uncacheable_rule
async def setup_keyring_state(
union_membership: UnionMembership,
env_tgt: EnvironmentTarget,
build_root: BuildRoot,
run_id: RunId,
) -> _KeyringState:
# TODO: Consider how to enable credential injection in remote execution and other contexts
# where Pants rule code cannot directly control where the credentials are stored.
if not env_tgt.can_access_local_system_paths:
return _KeyringState(keyring_data_path=None)

keyring_plugin_request_types = union_membership.get(PexKeyringConfigurationRequest)
if not keyring_plugin_request_types:
return _KeyringState(keyring_data_path=None)

credentials_responses: dict[str, list[tuple[str, tuple[str, str]]]] = {}
for keyring_plugin_request_type in keyring_plugin_request_types:
keyring_plugin_request = keyring_plugin_request_type()
keyring_plugin_response = await Get( # noqa: PNT30: Only one provider expected.
PexKeyringConfigurationResponse, PexKeyringConfigurationRequest, keyring_plugin_request
)

keyring_plugin_credentials = keyring_plugin_response.credentials
if not keyring_plugin_credentials:
continue

for site, user_password in keyring_plugin_credentials.items():
if site not in credentials_responses:
credentials_responses[site] = []
credentials_responses[site].append((keyring_plugin_request_type.name, user_password))

# Short circuit if the providers did not supply any credentials.
if not credentials_responses:
return _KeyringState(keyring_data_path=None)

# Check for multiple responses for a single site. Keyring and our simulation of it only supports
# a single user/password per site.
credentials: list[tuple[str, str, str]] = []
for site, user_password_creds_by_provider in credentials_responses.items():
if len(user_password_creds_by_provider) > 1:
providers = [x[0] for x in user_password_creds_by_provider]
raise Exception(
f"Multiple keyring plugins returned responses for `{site}`. "
"The keyring support only supports a single user/password per site. "
f"The keyring plugins in conflict are: {', '.join(providers)}."
)
user_password_creds = user_password_creds_by_provider[0][1]
credentials.append((site, user_password_creds[0], user_password_creds[1]))
credentials.sort()

# Write the credentials to a file based on the current run ID.
content = StringIO()
content.write("declare -A pants_keyring_credentials\n")
for site, user, password in credentials:
content.write(
f"pants_keyring_credentials[{_always_shlex_quote(site)}]={_always_shlex_quote(user)}\n"
)
site_user = f"{site}:{user}"
content.write(
f"pants_keyring_credentials[{_always_shlex_quote(site_user)}]={_always_shlex_quote(password)}\n"
)

# TODO: Consider removing the run ID so that this value does not contribute to the cache key
# and cause unneeded invalidation of build actions as it changes for each run.
Comment on lines +306 to +307
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is probably a main issue of this PR. Where do we store the credentials out of band in a way that does not cause unnecessary invalidation of processes across different Pants runs?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The concern is that the credentials file gets rewritten on concurrent Pants runs. Maybe it should be stored per-pantsd process?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

One alternative solution might install the keyring package somewhere (whether manually by the user or Pants doing it) and then set the password by calling keyring set SITE USER PASSWORD. Pants would no longer use a shim binary in that case.

An advantages is that we would no longer need an environment variable to point at the data file. This would prevent invalidating for changes to that filename (e.g., for different run/session IDs).

It assumes that updating the password is idempotent in the sense that any caller of keyring get SITE USER will be able to use that password. This should in fact be the case given how keyring works.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We could also have the shim binary be for the entire workspace and have the data file name embedded in it. This would also avoid having the data file name in an environment variable for a Process.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can pants not depend on keyring and use it programmatically in-process?

Copy link
Contributor Author

@tdyas tdyas Jan 21, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

So the trade off made was, we cannot assume the in-process Pip support works under Pex and we cannot assume the user has installed a global keyring binary, so with this PR, Pants will try to inject a keyring binary for Pip to see and Pip will need to be in --keyring-provider=subprocess.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

That said, maybe the trade off of requiring an installed keyring provider will actually be the better solution?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ah, I see. Yeah, requiring keyring as a system dependency outside of Pants seems reasonable for a first cut at least?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If that is the case, then this PR would reduce to just a --pex-cli-keyring-provider option instead of a plugin API? The PR stacked on top of this one to add AWS CodeArtifact token renewal is the driving force for this PR.

What are your thoughts on such token auto-renewal being driven from within a Pants plugin (which can be invoked on a per-session basis)?

An alternative solution would give users the ability to invoke a runnable during session startup as configured by a new target type. Then the user could define the aws token logic in a BUiLD file. Then there would be no need for most of the current solution direction.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think the simplest possible solution, that punts installing, locking down and configuring a keychain binary to the user, is fine...

keyring_data_path = build_root.pathlib_path / ".pants.d" / "keyring" / str(run_id) / "data.sh"
keyring_data_path.parent.mkdir(parents=True, exist_ok=True)
keyring_data_path.write_bytes(content.getvalue().encode())
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Should we not ensure that this file has no greater than 0600 perms?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

And should we not delete it when the process returns?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

See my comment at #21852 (comment) about concurrent runs. Would love your input on that question.


return _KeyringState(keyring_data_path=keyring_data_path)


@rule
async def setup_pex_cli_process(
request: PexCliProcess,
Expand All @@ -143,16 +325,26 @@ async def setup_pex_cli_process(
python_setup: PythonSetup,
) -> Process:
tmpdir = ".tmp"
gets: List[Get] = [Get(Digest, CreateDigest([Directory(tmpdir)]))]

digests_to_merge = [pex_pex.digest]
digest_gets: list[Get] = [Get(Digest, CreateDigest([Directory(tmpdir)]))]

cert_args = []
if global_options.ca_certs_path:
ca_certs_fc = ca_certs_path_to_file_content(global_options.ca_certs_path)
gets.append(Get(Digest, CreateDigest((ca_certs_fc,))))
digest_gets.append(Get(Digest, CreateDigest((ca_certs_fc,))))
cert_args = ["--cert", ca_certs_fc.path]

digests_to_merge = [pex_pex.digest]
digests_to_merge.extend(await MultiGet(gets))
keyring_state = await Get(_KeyringState)
keyring_args: list[str] = []
keyring_data_path = keyring_state.keyring_data_path
keyring_script: _KeyringScript | None = None
if keyring_data_path:
keyring_script = await Get(_KeyringScript)
digests_to_merge.append(keyring_script.digest)
keyring_args.append("--keyring-provider=subprocess")

digests_to_merge.extend(await MultiGet(digest_gets))
if request.additional_input_digest:
digests_to_merge.append(request.additional_input_digest)
input_digest = await Get(Digest, MergeDigests(digests_to_merge))
Expand Down Expand Up @@ -191,6 +383,7 @@ async def setup_pex_cli_process(
pip_version_args = [] if request.subcommand else ["--pip-version", python_setup.pip_version]
args = [
*request.subcommand,
*keyring_args,
*global_args,
*verbosity_args,
*warnings_args,
Expand All @@ -212,6 +405,16 @@ async def setup_pex_cli_process(
**({"PEX_SCRIPT": "pex3"} if request.subcommand else {}),
}

keyring_data_path = keyring_state.keyring_data_path
if keyring_data_path:
assert keyring_script is not None
keyring_script_parent_path = os.path.dirname(keyring_script.path)
env["__PANTS_KEYRING_DATA"] = str(keyring_data_path)
if "PATH" in env:
env["PATH"] = f"{{chroot}}/{keyring_script_parent_path}:{env['PATH']}"
else:
env["PATH"] = f"{{chroot}}/{keyring_script_parent_path}"

return Process(
normalized_argv,
description=request.description,
Expand Down Expand Up @@ -241,5 +444,6 @@ def rules():
*external_tool.rules(),
*pex_environment.rules(),
*adhoc_binaries.rules(),
*system_binaries.rules(),
UnionRule(ExportableTool, PexCli),
]
75 changes: 72 additions & 3 deletions src/python/pants/backend/python/util_rules/pex_cli_test.py
Original file line number Diff line number Diff line change
@@ -1,16 +1,23 @@
# Copyright 2020 Pants project contributors (see CONTRIBUTORS.md).
# Licensed under the Apache License, Version 2.0 (see LICENSE).

import textwrap
from pathlib import Path

import pytest

from pants.backend.python.util_rules import pex_cli
from pants.backend.python.util_rules.pex_cli import PexCliProcess
from pants.backend.python.util_rules.pex_cli import (
PexCliProcess,
PexKeyringConfigurationRequest,
PexKeyringConfigurationResponse,
)
from pants.engine.fs import DigestContents
from pants.engine.process import Process
from pants.engine.rules import QueryRule
from pants.engine.process import Process, ProcessResult
from pants.engine.rules import QueryRule, rule
from pants.engine.unions import UnionRule
from pants.testutil.rule_runner import RuleRunner
from pants.util.frozendict import FrozenDict


@pytest.fixture
Expand Down Expand Up @@ -52,3 +59,65 @@ def test_pass_global_args_to_pex_cli_subsystem(tmp_path: Path, rule_runner: Rule
[PexCliProcess(subcommand=(), extra_args=(), description="")],
)
assert "--foo=bar --baz --spam=eggs" in proc.argv


class DummyKeyringConfigRequest(PexKeyringConfigurationRequest):
name = "test-keyring-provider"


@rule
async def setup_dummy_keyring_provider(
_request: DummyKeyringConfigRequest,
) -> PexKeyringConfigurationResponse:
return PexKeyringConfigurationResponse(
credentials=FrozenDict({"a-site": ("some-user", "xyzzy")})
)


def test_keyring_support() -> None:
rule_runner = RuleRunner(
rules=[
*pex_cli.rules(),
setup_dummy_keyring_provider,
QueryRule(ProcessResult, (PexCliProcess,)),
QueryRule(Process, (PexCliProcess,)),
UnionRule(PexKeyringConfigurationRequest, DummyKeyringConfigRequest),
]
)
proc = rule_runner.request(
Process,
[PexCliProcess(subcommand=(), extra_args=("some", "--args"), description="")],
)

assert "--keyring-provider=subprocess" in proc.argv

# Ensure that the expected username/password was written to the data file on disk.
keyring_data_path = proc.env.get("__PANTS_KEYRING_DATA")
assert keyring_data_path is not None
keyring_data = Path(keyring_data_path).read_text()
assert "pants_keyring_credentials['a-site']='some-user'\n" in keyring_data
assert "pants_keyring_credentials['a-site:some-user']='xyzzy'\n" in keyring_data

# Now invoke the `keyring` script to verify that it works.
proc2 = rule_runner.request(
ProcessResult,
[
PexCliProcess(
subcommand=(),
extra_args=(
"--",
"-c",
textwrap.dedent(
"""\
import subprocess
import sys
result = subprocess.run(["keyring", "get", "a-site", "some-user"])
sys.exit(result.returncode)
"""
),
),
description="",
)
],
)
assert proc2.stdout.decode().strip() == "xyzzy"
Loading