Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Yarn 1.x: Parse packages from yarn.lock #693

Merged
merged 8 commits into from
Nov 19, 2024
Merged
24 changes: 14 additions & 10 deletions cachi2/core/package_managers/yarn_classic/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
run_yarn_cmd,
)
from cachi2.core.package_managers.yarn_classic.project import Project
from cachi2.core.package_managers.yarn_classic.workspaces import extract_workspace_metadata
from cachi2.core.package_managers.yarn_classic.resolver import resolve_packages
from cachi2.core.rooted_path import RootedPath

log = logging.getLogger(__name__)
Expand All @@ -26,22 +26,26 @@ def _ensure_mirror_dir_exists(output_dir: RootedPath) -> None:
output_dir.join_within_root(MIRROR_DIR).path.mkdir(parents=True, exist_ok=True)

for package in request.yarn_classic_packages:
path = request.source_dir.join_within_root(package.path)
package_path = request.source_dir.join_within_root(package.path)
eskultety marked this conversation as resolved.
Show resolved Hide resolved
_ensure_mirror_dir_exists(request.output_dir)
prefetch_env = _get_prefetch_environment_variables(request.output_dir)
_verify_corepack_yarn_version(path, prefetch_env)
_fetch_dependencies(path, prefetch_env)
# Workspaces metadata is not used at the moment, but will
# eventualy be converted into components. Using a noop assertion
# to prevent linters from complaining.
workspaces = extract_workspace_metadata(package, request.source_dir)
assert workspaces is not None # nosec -- see comment above
_resolve_yarn_project(Project.from_source_dir(package_path), request.output_dir)

return RequestOutput.from_obj_list(
components, _generate_build_environment_variables(), project_files=[]
)


def _resolve_yarn_project(project: Project, output_dir: RootedPath) -> None:
"""Process a request for a single yarn source directory."""
log.info(f"Fetching the yarn dependencies at the subpath {project.source_dir}")

_verify_repository(project)
prefetch_env = _get_prefetch_environment_variables(output_dir)
_verify_corepack_yarn_version(project.source_dir, prefetch_env)
_fetch_dependencies(project.source_dir, prefetch_env)
resolve_packages(project)


def _fetch_dependencies(source_dir: RootedPath, env: dict[str, str]) -> None:
"""Fetch dependencies using 'yarn install'.

Expand Down
3 changes: 2 additions & 1 deletion cachi2/core/package_managers/yarn_classic/project.py
Original file line number Diff line number Diff line change
Expand Up @@ -93,6 +93,7 @@ def from_file(cls, path: RootedPath) -> "PackageJson":
return cls(path, package_json_data)


@dataclass
class YarnLock(_CommonConfigFile):
"""A yarn.lock file.

Expand Down Expand Up @@ -131,7 +132,7 @@ def from_file(cls, path: RootedPath) -> "YarnLock":
solution="Please verify the content of the file.",
)

return cls(path, yarn_lockfile.data)
return cls(path, yarn_lockfile.data, yarn_lockfile)


ConfigFile = Union[PackageJson, YarnLock]
Expand Down
237 changes: 237 additions & 0 deletions cachi2/core/package_managers/yarn_classic/resolver.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,237 @@
import re
from itertools import chain
from pathlib import Path
from typing import Iterable, Optional, Union
from urllib.parse import urlparse

from pyarn.lockfile import Package as PYarnPackage
from pydantic import BaseModel

from cachi2.core.errors import PackageRejected, UnexpectedFormat
from cachi2.core.package_managers.npm import NPM_REGISTRY_CNAMES
from cachi2.core.package_managers.yarn_classic.project import PackageJson, Project, YarnLock
from cachi2.core.package_managers.yarn_classic.workspaces import (
Workspace,
extract_workspace_metadata,
)
from cachi2.core.rooted_path import RootedPath

# https://github.com/yarnpkg/yarn/blob/7cafa512a777048ce0b666080a24e80aae3d66a9/src/resolvers/exotics/git-resolver.js#L15-L17
GIT_HOSTS = frozenset(("github.com", "gitlab.com", "bitbucket.com", "bitbucket.org"))
eskultety marked this conversation as resolved.
Show resolved Hide resolved
GIT_PATTERN_MATCHERS = (
re.compile(r"^git:"),
re.compile(r"^git\+.+:"),
re.compile(r"^ssh:"),
re.compile(r"^https?:.+\.git$"),
re.compile(r"^https?:.+\.git#.+"),
)


class _BasePackage(BaseModel):
"""A base Yarn 1.x package."""

name: str
version: Optional[str] = None
integrity: Optional[str] = None
dev: bool = False


class _UrlMixin(BaseModel):
url: str


class _RelpathMixin(BaseModel):
relpath: Path


class RegistryPackage(_BasePackage, _UrlMixin):
"""A Yarn 1.x package from the registry."""


class GitPackage(_BasePackage, _UrlMixin):
"""A Yarn 1.x package from a git repo."""


class UrlPackage(_BasePackage, _UrlMixin):
"""A Yarn 1.x package from a http/https URL."""


class FilePackage(_BasePackage, _RelpathMixin):
"""A Yarn 1.x package from a local file path."""


class WorkspacePackage(_BasePackage, _RelpathMixin):
"""A Yarn 1.x local workspace package."""


class LinkPackage(_BasePackage, _RelpathMixin):
"""A Yarn 1.x local link package."""


YarnClassicPackage = Union[
FilePackage,
GitPackage,
LinkPackage,
RegistryPackage,
UrlPackage,
WorkspacePackage,
]


class _YarnClassicPackageFactory:
def __init__(self, source_dir: RootedPath):
self._source_dir = source_dir

def create_package_from_pyarn_package(self, package: PYarnPackage) -> YarnClassicPackage:
def assert_package_has_relative_path(package: PYarnPackage) -> None:
if package.path and Path(package.path).is_absolute():
raise PackageRejected(
(
f"The package {package.name}@{package.version} has an absolute path "
f"({package.path}), which is not permitted."
),
solution="Ensure that file/link packages in yarn.lock do not have absolute paths.",
)

if _is_from_npm_registry(package.url):
return RegistryPackage(
name=package.name,
version=package.version,
integrity=package.checksum,
url=package.url,
)
elif package.path is not None:
# Ensure path is not absolute
assert_package_has_relative_path(package)
# Ensure path is within the repository root
path = self._source_dir.join_within_root(package.path)
# File packages have a url, whereas link packages do not
if package.url:
return FilePackage(
name=package.name,
version=package.version,
relpath=path.subpath_from_root,
integrity=package.checksum,
)
return LinkPackage(
name=package.name,
version=package.version,
relpath=path.subpath_from_root,
)
elif _is_git_url(package.url):
return GitPackage(
name=package.name,
version=package.version,
url=package.url,
)
elif _is_tarball_url(package.url):
return UrlPackage(
name=package.name,
version=package.version,
url=package.url,
integrity=package.checksum,
)
else:
raise UnexpectedFormat(
(
"Cachi2 could not determine the package type for the following package in "
f"yarn.lock: {vars(package)}"
),
solution=(
"Ensure yarn.lock is well-formed and if so, report this error to the Cachi2 team"
),
)


def _is_tarball_url(url: str) -> bool:
"""Return True if a package URL is a tarball URL."""
# Parse the URL to extract components
parsed_url = urlparse(url)

# https://github.com/yarnpkg/yarn/blob/7cafa512a777048ce0b666080a24e80aae3d66a9/src/resolvers/exotics/tarball-resolver.js#L34
if parsed_url.scheme not in {"http", "https"}:
return False

# https://github.com/yarnpkg/yarn/blob/7cafa512a777048ce0b666080a24e80aae3d66a9/src/resolvers/exotics/tarball-resolver.js#L40
# https://github.com/yarnpkg/yarn/blob/7cafa512a777048ce0b666080a24e80aae3d66a9/src/resolvers/exotics/bitbucket-resolver.js#L11
# https://github.com/yarnpkg/yarn/blob/7cafa512a777048ce0b666080a24e80aae3d66a9/src/resolvers/exotics/gitlab-resolver.js#L10C10-L10C23
eskultety marked this conversation as resolved.
Show resolved Hide resolved
if parsed_url.path.endswith((".tar", ".tar.gz", ".tgz")):
return True

# https://github.com/yarnpkg/yarn/blob/7cafa512a777048ce0b666080a24e80aae3d66a9/src/resolvers/exotics/github-resolver.js#L24
if parsed_url.hostname == "codeload.github.com" and "tar.gz" in parsed_url.path:
return True

return False


def _is_git_url(url: str) -> bool:
"""Return True if a package URL is a git URL."""
# https://github.com/yarnpkg/yarn/blob/7cafa512a777048ce0b666080a24e80aae3d66a9/src/resolvers/exotics/git-resolver.js#L32
if any(matcher.match(url) for matcher in GIT_PATTERN_MATCHERS):
return True

# https://github.com/yarnpkg/yarn/blob/7cafa512a777048ce0b666080a24e80aae3d66a9/src/resolvers/exotics/git-resolver.js#L39
parsed_url = urlparse(url)
if parsed_url.hostname in GIT_HOSTS:
path_segments = [segment for segment in parsed_url.path.split("/") if segment]
# Return True if the path has exactly two segments (e.g. org/repo, not org/repo/file.tar.gz)
return len(path_segments) == 2

return False


def _is_from_npm_registry(url: str) -> bool:
"""Return True if a package URL is from the NPM or Yarn registry."""
return urlparse(url).hostname in NPM_REGISTRY_CNAMES


def _get_packages_from_lockfile(
source_dir: RootedPath, yarn_lock: YarnLock
) -> list[YarnClassicPackage]:
"""Return a list of Packages for all dependencies in yarn.lock."""
pyarn_packages: list[PYarnPackage] = yarn_lock.yarn_lockfile.packages()
package_factory = _YarnClassicPackageFactory(source_dir)

return [
package_factory.create_package_from_pyarn_package(package) for package in pyarn_packages
]


def _get_main_package(package_json: PackageJson) -> WorkspacePackage:
"""Return a WorkspacePackage for the main package in package.json."""
if "name" not in package_json._data:
a-ovchinnikov marked this conversation as resolved.
Show resolved Hide resolved
raise PackageRejected(
f"The package.json file located at {package_json.path.path} is missing the name field",
solution="Ensure the package.json file has a valid name.",
)
return WorkspacePackage(
name=package_json.data["name"],
version=package_json.data.get("version"),
relpath=package_json.path.subpath_from_root.parent,
)


def _get_workspace_packages(
source_dir: RootedPath, workspaces: list[Workspace]
) -> list[WorkspacePackage]:
"""Return a WorkspacePackage for each Workspace."""
return [
WorkspacePackage(
name=ws.package_contents["name"],
version=ws.package_contents.get("version"),
relpath=ws.path.relative_to(source_dir.path),
)
for ws in workspaces
]


def resolve_packages(project: Project) -> Iterable[YarnClassicPackage]:
"""Return a list of Packages corresponding to all project dependencies."""
workspaces = extract_workspace_metadata(project.source_dir)
yarn_lock = YarnLock.from_file(project.source_dir.join_within_root("yarn.lock"))
return chain(
[_get_main_package(project.package_json)],
_get_workspace_packages(project.source_dir, workspaces),
_get_packages_from_lockfile(project.source_dir, yarn_lock),
)
12 changes: 5 additions & 7 deletions cachi2/core/package_managers/yarn_classic/workspaces.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,6 @@
import pydantic

from cachi2.core.errors import PackageRejected
from cachi2.core.models.input import YarnClassicPackageInput
from cachi2.core.rooted_path import PathOutsideRoot, RootedPath


Expand Down Expand Up @@ -93,21 +92,20 @@ def _read_package_from(path: RootedPath) -> dict[str, Any]:


def extract_workspace_metadata(
package: YarnClassicPackageInput,
source_dir: RootedPath,
package_path: RootedPath,
) -> list[Workspace]:
"""Extract workspace metadata from a package."""
processed_package = _read_package_from(source_dir.join_within_root(package.path))
processed_package = _read_package_from(package_path)
workspaces_globs = _extract_workspaces_globs(processed_package)
workspaces_paths = _get_workspace_paths(workspaces_globs, source_dir)
ensure_no_path_leads_out(workspaces_paths, source_dir)
workspaces_paths = _get_workspace_paths(workspaces_globs, package_path)
ensure_no_path_leads_out(workspaces_paths, package_path)
_ensure_workspaces_are_well_formed(workspaces_paths)
parsed_workspaces = []
for wp in workspaces_paths:
parsed_workspaces.append(
Workspace(
path=wp,
package_contents=_read_package_from(source_dir.join_within_root(wp)),
package_contents=_read_package_from(package_path.join_within_root(wp)),
)
)
return parsed_workspaces
6 changes: 6 additions & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@ dependencies = [
"gitpython",
"packageurl-python",
"packaging",
"pyarn",
"pydantic",
"pypi-simple",
"pyarn",
Expand Down Expand Up @@ -81,6 +82,11 @@ disallow_untyped_defs = true
disallow_incomplete_defs = true
disallow_untyped_decorators = true

# TODO: pyarn does not currently have type annotations
eskultety marked this conversation as resolved.
Show resolved Hide resolved
[[tool.mypy.overrides]]
module = "pyarn.*"
ignore_missing_imports = true

[tool.coverage.report]
skip_covered = true
show_missing = true
Expand Down
6 changes: 3 additions & 3 deletions requirements-extras.txt
Original file line number Diff line number Diff line change
Expand Up @@ -786,9 +786,9 @@ propcache==0.2.0 \
--hash=sha256:fc2db02409338bf36590aa985a461b2c96fce91f8e7e0f14c50c5fcc4f229016 \
--hash=sha256:ffcad6c564fe6b9b8916c1aefbb37a362deebf9394bd2974e9d84232e3e08504
# via yarl
pyarn==0.2.0 \
--hash=sha256:542ff739af2b81a1200776eff2b4d2566a330846decbd0f815999b196d7b067d \
--hash=sha256:d06e8b79bb830f142187b57ee664dc0104f658efdb2b2bae7ed99eaf7746eb1a
pyarn==0.3.0 \
--hash=sha256:8f799d94a9b2fd4bd54185149d8745187c7ededcde98576ff70bf0823fd8a710 \
--hash=sha256:e8eaed3ae80c1891a8ea7f6b7538c049a79dd7e7d3b4279f275699b3477b38c2
# via cachi2 (pyproject.toml)
pycodestyle==2.12.1 \
--hash=sha256:46f0fb92069a7c28ab7bb558f05bfc0110dac69a0cd23c61ea0040283a9d78b3 \
Expand Down
6 changes: 3 additions & 3 deletions requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -593,9 +593,9 @@ propcache==0.2.0 \
--hash=sha256:fc2db02409338bf36590aa985a461b2c96fce91f8e7e0f14c50c5fcc4f229016 \
--hash=sha256:ffcad6c564fe6b9b8916c1aefbb37a362deebf9394bd2974e9d84232e3e08504
# via yarl
pyarn==0.2.0 \
--hash=sha256:542ff739af2b81a1200776eff2b4d2566a330846decbd0f815999b196d7b067d \
--hash=sha256:d06e8b79bb830f142187b57ee664dc0104f658efdb2b2bae7ed99eaf7746eb1a
pyarn==0.3.0 \
--hash=sha256:8f799d94a9b2fd4bd54185149d8745187c7ededcde98576ff70bf0823fd8a710 \
--hash=sha256:e8eaed3ae80c1891a8ea7f6b7538c049a79dd7e7d3b4279f275699b3477b38c2
# via cachi2 (pyproject.toml)
pydantic==2.9.1 \
--hash=sha256:1363c7d975c7036df0db2b4a61f2e062fbc0aa5ab5f2772e0ffc7191a4f4bce2 \
Expand Down
2 changes: 1 addition & 1 deletion tests/integration/test_yarn_classic.py
Original file line number Diff line number Diff line change
Expand Up @@ -113,7 +113,7 @@ def test_yarn_classic_packages(
pytest.param(
utils.TestParameters(
repo="https://github.com/cachito-testing/cachi2-yarn.git",
ref="67679cb740122cfa6c17238c7a1ff9cfcdb875f3",
ref="valid_yarn_all_dependency_types",
packages=({"path": ".", "type": "yarn-classic"},),
flags=["--dev-package-managers"],
check_vendor_checksums=False,
Expand Down
Loading
Loading