Skip to content

Commit

Permalink
Filter out non-project resoruces (#38)
Browse files Browse the repository at this point in the history
  • Loading branch information
kokorin authored Jul 10, 2024
1 parent b75bc54 commit 888e195
Show file tree
Hide file tree
Showing 2 changed files with 135 additions and 55 deletions.
23 changes: 14 additions & 9 deletions dbt_pumpkin/loader.py
Original file line number Diff line number Diff line change
Expand Up @@ -78,11 +78,15 @@ def _do_select_resource_ids(self) -> dict[ResourceType, set[ResourceID]]:

result: dict[ResourceType, set[ResourceID]] = {}
resource_counter = Counter()
# TODO: after dropping DBT 1.5 support we can get project name from Manifest
# self.load_manifest().metadata.project_name
project_name = self._load_project_yml()["name"]

for raw_resource in res.result:
resource = json.loads(raw_resource)
resource_type_str = resource["resource_type"]
if resource_type_str in ResourceType.values():
resource_package_name = resource["package_name"]
if resource_type_str in ResourceType.values() and resource_package_name == project_name:
res_type = ResourceType(resource_type_str)
res_id = ResourceID(resource["unique_id"])

Expand Down Expand Up @@ -133,7 +137,7 @@ def _do_select_resources(self) -> list[Resource]:
else:
path = Path(raw_resource.original_file_path)
if raw_resource.patch_path:
# path_path starts with "project_name://", we just remove it
# patch_path starts with "project_name://", we just remove it
# DBT 1.5 has no manifest.metadata.project_name, so we use resource FQN which starts with project name
# patch_path_prefix = self.manifest.metadata.project_name + "://"
patch_path_prefix = raw_resource.fqn[0] + "://"
Expand Down Expand Up @@ -193,13 +197,7 @@ def _create_pumpkin_project(self, project_vars: dict[str, any]) -> Path:
msg = f"Macros directory is not found or doesn't exist: {src_macros_path}"
raise PumpkinError(msg)

project_yml_path = self.locate_project_dir() / "dbt_project.yml"

if not project_yml_path.exists() or not project_yml_path.is_file():
msg = f"dbt_project.yml is not found or doesn't exist: {project_yml_path}"
raise PumpkinError(msg)

project_yml = self._yaml.load(project_yml_path)
project_yml = self._load_project_yml()

pumpkin_yml = {
"name": "dbt_pumpkin",
Expand All @@ -225,6 +223,13 @@ def _create_pumpkin_project(self, project_vars: dict[str, any]) -> Path:

return pumpkin_dir

def _load_project_yml(self):
project_yml_path = self.locate_project_dir() / "dbt_project.yml"
if not project_yml_path.exists() or not project_yml_path.is_file():
msg = f"dbt_project.yml is not found or doesn't exist: {project_yml_path}"
raise PumpkinError(msg)
return self._yaml.load(project_yml_path)

def _run_operation(
self, operation_name: str, project_vars: dict[str, any] | None, result_callback: Callable[[any], None]
):
Expand Down
167 changes: 121 additions & 46 deletions tests/test_loader.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
from __future__ import annotations

import shutil
import textwrap
from dataclasses import dataclass
from pathlib import Path
from tempfile import mkdtemp
from typing import Any
Expand Down Expand Up @@ -54,7 +56,15 @@ def loader_only_models() -> ResourceLoader:
yaml = YAML(typ="safe")


def fake_dbt_project_loader(project_yml: dict, project_files: dict[str, Any]) -> ResourceLoader:
@dataclass
class Project:
project_yml: dict[str, Any]
project_files: dict[str, Any]
profiles_yml: dict[str, Any] | None = None
local_packages: list[Project] | None = None


def fake_dbt_project_loader(project: Project) -> ResourceLoader:
project_dir = Path(mkdtemp(prefix="test_pumpkin_"))

default_profiles = {
Expand All @@ -71,13 +81,38 @@ def fake_dbt_project_loader(project_yml: dict, project_files: dict[str, Any]) ->
}
}

yaml.dump(project_yml, project_dir / "dbt_project.yml")
yaml.dump(default_profiles, project_dir / "profiles.yml")
def create_project(root: Path, project: Project):
project_yaml = {"packages-install-path": str(root / "dbt_packages"), **project.project_yml.copy()}
yaml.dump(project_yaml, root / "dbt_project.yml")

for path_str, content in project.project_files.items():
path = root / path_str
path.parent.mkdir(exist_ok=True)
path.write_text(content, encoding="utf-8")

if project.local_packages:
packages_yml = {}

for package in project.local_packages:
package_name = package.project_yml["name"]
package_root = root / "sub_packages" / package_name
package_root.mkdir(parents=True, exist_ok=True)

create_project(package_root, package)

packages_yml.setdefault("packages", []).append({"local": str(package_root)})

yaml.dump(packages_yml, root / "packages.yml")
# DBT 1.5 can't install local deps on Windows, we just copy packages
# Besides that DBT 1.8 and earlier changes CWD when executing `dbt deps`
# # https://github.com/dbt-labs/dbt-core/issues/8997
# so copying file tree is the easiest fix

for path_str, content in project_files.items():
path = project_dir / path_str
path.parent.mkdir(exist_ok=True)
path.write_text(content, encoding="utf-8")
shutil.copytree(root / "sub_packages", root / "dbt_packages")

create_project(project_dir, project)

yaml.dump(default_profiles, project_dir / "profiles.yml")

return ResourceLoader(
project_params=ProjectParams(project_dir=project_dir, profiles_dir=project_dir),
Expand All @@ -88,46 +123,47 @@ def fake_dbt_project_loader(project_yml: dict, project_files: dict[str, Any]) ->
@pytest.fixture
def loader_multiple_roots():
return fake_dbt_project_loader(
project_yml={
"name": "test_pumpkin",
"version": "0.1.0",
"profile": "test_pumpkin",
"model-paths": ["models", "models_{{ var('absent_var', 'extra') }}"],
"seed-paths": ["seeds", "seeds_{{ var('absent_var', 'extra') }}"],
"snapshot-paths": ["snapshots", "snapshots_{{ var('absent_var', 'extra') }}"],
},
project_files={
"models/customers.sql": "select 1 as id",
"models/customers.yml": textwrap.dedent("""\
Project(
project_yml={
"name": "test_pumpkin",
"version": "0.1.0",
"profile": "test_pumpkin",
"model-paths": ["models", "models_{{ var('absent_var', 'extra') }}"],
"seed-paths": ["seeds", "seeds_{{ var('absent_var', 'extra') }}"],
"snapshot-paths": ["snapshots", "snapshots_{{ var('absent_var', 'extra') }}"],
},
project_files={
"models/customers.sql": "select 1 as id",
"models/customers.yml": textwrap.dedent("""\
version: 2
models:
- name: customers
"""),
"models_extra/extra_customers.sql": "select 1 as id",
"models_extra/extra_customers.yml": textwrap.dedent("""\
"models_extra/extra_customers.sql": "select 1 as id",
"models_extra/extra_customers.yml": textwrap.dedent("""\
version: 2
models:
- name: extra_customers
"""),
"seeds/seed_customers.csv": textwrap.dedent("""\
"seeds/seed_customers.csv": textwrap.dedent("""\
id,name
42,John
"""),
"seeds/seed_customers.yml": textwrap.dedent("""\
"seeds/seed_customers.yml": textwrap.dedent("""\
version: 2
seeds:
- name: seed_customers
"""),
"seeds_extra/seed_extra_customers.csv": textwrap.dedent("""\
"seeds_extra/seed_extra_customers.csv": textwrap.dedent("""\
id,name
42,John
"""),
"seeds_extra/seed_extra_customers.yml": textwrap.dedent("""\
"seeds_extra/seed_extra_customers.yml": textwrap.dedent("""\
version: 2
seeds:
- name: seed_extra_customers
"""),
"models/sources.yml": textwrap.dedent("""\
"models/sources.yml": textwrap.dedent("""\
version: 2
sources:
- name: pumpkin
Expand All @@ -136,7 +172,7 @@ def loader_multiple_roots():
- name: customers
identifier: seed_customers
"""),
"models_extra/sources.yml": textwrap.dedent("""\
"models_extra/sources.yml": textwrap.dedent("""\
version: 2
sources:
- name: extra_pumpkin
Expand All @@ -145,51 +181,53 @@ def loader_multiple_roots():
- name: customers
identifier: seed_customers
"""),
"snapshots/customers_snapshot.sql": textwrap.dedent("""\
"snapshots/customers_snapshot.sql": textwrap.dedent("""\
{% snapshot customers_snapshot %}
{{ config(unique_key='id', target_schema='snapshots', strategy='check', check_cols='all') }}
select * from {{ source('pumpkin', 'customers') }}
{% endsnapshot %}
"""),
"snapshots/customers_snapshot.yml": textwrap.dedent("""\
"snapshots/customers_snapshot.yml": textwrap.dedent("""\
version: 2
snapshots:
- name: customers_snapshot
"""),
"snapshots_extra/extra_customers_snapshot.sql": textwrap.dedent("""\
"snapshots_extra/extra_customers_snapshot.sql": textwrap.dedent("""\
{% snapshot extra_customers_snapshot %}
{{ config(unique_key='id', target_schema='extra_snapshots', strategy='check', check_cols='all') }}
select * from {{ source('extra_pumpkin', 'customers') }}
{% endsnapshot %}
"""),
"snapshots_extra/extra_customers_snapshot.yml": textwrap.dedent("""\
"snapshots_extra/extra_customers_snapshot.yml": textwrap.dedent("""\
version: 2
snapshots:
- name: extra_customers_snapshot
"""),
},
},
)
)


@pytest.fixture
def loader_configured_paths():
return fake_dbt_project_loader(
project_yml={
"name": "test_pumpkin",
"version": "0.1.0",
"profile": "test_pumpkin",
"seeds": {"test_pumpkin": {"+dbt-pumpkin-path": "_seeds.yml"}},
"models": {"test_pumpkin": {"+dbt-pumpkin-path": "_models.yml"}},
"snapshots": {"test_pumpkin": {"+dbt-pumpkin-path": "_snapshots.yml"}},
"sources": {"test_pumpkin": {"+dbt-pumpkin-path": "_sources.yml"}},
},
project_files={
"models/customers.sql": "select 1 as id",
"seeds/seed_customers.csv": textwrap.dedent("""\
Project(
project_yml={
"name": "test_pumpkin",
"version": "0.1.0",
"profile": "test_pumpkin",
"seeds": {"test_pumpkin": {"+dbt-pumpkin-path": "_seeds.yml"}},
"models": {"test_pumpkin": {"+dbt-pumpkin-path": "_models.yml"}},
"snapshots": {"test_pumpkin": {"+dbt-pumpkin-path": "_snapshots.yml"}},
"sources": {"test_pumpkin": {"+dbt-pumpkin-path": "_sources.yml"}},
},
project_files={
"models/customers.sql": "select 1 as id",
"seeds/seed_customers.csv": textwrap.dedent("""\
id,name
42,John
"""),
"models/sources.yml": textwrap.dedent("""\
"models/sources.yml": textwrap.dedent("""\
version: 2
sources:
- name: pumpkin
Expand All @@ -198,13 +236,42 @@ def loader_configured_paths():
- name: customers
- name: orders
"""),
"snapshots/customers_snapshot.sql": textwrap.dedent("""\
"snapshots/customers_snapshot.sql": textwrap.dedent("""\
{% snapshot customers_snapshot %}
{{ config(unique_key='id', target_schema='snapshots', strategy='check', check_cols='all') }}
select * from {{ source('pumpkin', 'customers') }}
{% endsnapshot %}
"""),
},
},
)
)


@pytest.fixture
def loader_with_deps():
return fake_dbt_project_loader(
Project(
project_yml={
"name": "test_pumpkin",
"version": "0.1.0",
"profile": "test_pumpkin",
},
project_files={
"models/customers.sql": "select 1 as id",
},
local_packages=[
Project(
project_yml={
"name": "extra",
"version": "0.1.0",
"profile": "test_pumpkin",
},
project_files={
"models/extra_customers.sql": "select 1 as id",
},
)
],
)
)


Expand Down Expand Up @@ -268,6 +335,14 @@ def test_selected_resource_ids_only_models(loader_only_models: ResourceLoader):
}


def test_selected_resources_non_project_resources_excluded(loader_with_deps):
assert loader_with_deps.select_resource_ids() == {
ResourceType.MODEL: {
ResourceID("model.test_pumpkin.customers"),
},
}


def test_selected_resources(loader_all):
def sort_order(res: Resource):
return str(res.unique_id)
Expand Down

0 comments on commit 888e195

Please sign in to comment.