Skip to content

Commit

Permalink
Merge branch 'unit_testing_feature_branch' into support-complex-types…
Browse files Browse the repository at this point in the history
…-unit-testing
  • Loading branch information
MichelleArk committed Nov 21, 2023
2 parents f1d68f4 + e001991 commit 827e35e
Show file tree
Hide file tree
Showing 14 changed files with 570 additions and 50 deletions.
6 changes: 6 additions & 0 deletions .changes/unreleased/Fixes-20231013-130943.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
kind: Fixes
body: For packages installed with tarball method, fetch metadata to resolve nested dependencies
time: 2023-10-13T13:09:43.188308-04:00
custom:
Author: adamlopez
Issue: "8621"
6 changes: 6 additions & 0 deletions .changes/unreleased/Fixes-20231113-154535.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
kind: Fixes
body: Use seed file from disk for unit testing if rows not specified in YAML config
time: 2023-11-13T15:45:35.008565Z
custom:
Author: aranke
Issue: "8652"
6 changes: 6 additions & 0 deletions .changes/unreleased/Under the Hood-20231107-191546.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
kind: Under the Hood
body: Cache dbt plugin modules to improve integration test performance
time: 2023-11-07T19:15:46.170151-05:00
custom:
Author: peterallenwebb
Issue: "9029"
34 changes: 34 additions & 0 deletions core/dbt/contracts/README.md
Original file line number Diff line number Diff line change
@@ -1 +1,35 @@
# Contracts README


## Artifacts

### Generating JSON schemas
A helper script, `sripts/collect-artifact-schema.py` is available to generate json schemas corresponding to versioned artifacts (`ArtifactMixin`s).

This script is necessary to run when a new artifact schema version is created, or when changes are made to existing artifact versions, and writes json schema to `schema/dbt/<artifact>/v<version>.json`.

Schemas in `schema/dbt` power the rendering in https://schemas.getdbt.com/ via https://github.com/dbt-labs/schemas.getdbt.com/

#### Example Usage

Available arguments:
```sh
❯ scripts/collect-artifact-schema.py --help
usage: Collect and write dbt arfifact schema [-h] [--path PATH] [--artifact {manifest,sources,run-results,catalog}]

options:
-h, --help show this help message and exit
--path PATH The dir to write artifact schema
--artifact {manifest,sources,run-results,catalog}
The name of the artifact to update
```

Generate latest version of schemas of all artifacts to `schema/dbt/<artifact>/v<version>.json`
```sh
> sripts/collect-artifact-schema.py --path schemas
```

Generate latest version of schemas of manifest to `schema/dbt/manifest/v<version>.json`
```sh
> sripts/collect-artifact-schema.py --path schemas --artifact manifest
```
59 changes: 41 additions & 18 deletions core/dbt/deps/tarball.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,14 @@
import functools
import os
from pathlib import Path
from typing import Dict

from dbt.contracts.project import RegistryPackageMetadata, TarballPackage
from dbt.deps.base import PinnedPackage, UnpinnedPackage
from dbt.clients import system
from dbt.config.project import PartialProject
from dbt.contracts.project import TarballPackage
from dbt.deps.base import PinnedPackage, UnpinnedPackage, get_downloads_path
from dbt.exceptions import DependencyError
from dbt.utils import _connection_exception_retry as connection_exception_retry


class TarballPackageMixin:
Expand All @@ -20,9 +27,10 @@ def source_type(self) -> str:
class TarballPinnedPackage(TarballPackageMixin, PinnedPackage):
def __init__(self, tarball: str, package: str) -> None:
super().__init__(tarball)
# setup to recycle RegistryPinnedPackage fns
self.package = package
self.version = "tarball"
self.tar_path = os.path.join(Path(get_downloads_path()), self.package)
self.untarred_path = f"{self.tar_path}_untarred"

@property
def name(self):
Expand All @@ -41,23 +49,38 @@ def nice_version_name(self):
return f"tarball (url: {self.tarball})"

def _fetch_metadata(self, project, renderer):
"""
recycle RegistryPackageMetadata so that we can use the install and
download_and_untar from RegistryPinnedPackage next.
build RegistryPackageMetadata from info passed via packages.yml since no
'metadata' service exists in this case.
"""

dct = {
"name": self.package,
"packages": [], # note: required by RegistryPackageMetadata
"downloads": {"tarball": self.tarball},
}

return RegistryPackageMetadata.from_dict(dct)
"""Download and untar the project and parse metadata from the project folder."""
download_untar_fn = functools.partial(
self.download_and_untar, self.tarball, self.tar_path, self.untarred_path, self.name
)
connection_exception_retry(download_untar_fn, 5)

tar_contents = os.listdir(self.untarred_path)
if len(tar_contents) != 1:
raise DependencyError(
f"Incorrect structure for package extracted from {self.tarball}."
f"The extracted package needs to follow the structure {self.name}/<package_contents>."
)
child_folder = os.listdir(self.untarred_path)[0]

self.untarred_path = os.path.join(self.untarred_path, child_folder)
partial = PartialProject.from_project_root(self.untarred_path)
metadata = partial.render_package_metadata(renderer)
metadata.name = self.package if self.package else metadata.name
return metadata

def install(self, project, renderer):
self._install(project, renderer)
download_untar_fn = functools.partial(
self.download_and_untar, self.tarball, self.tar_path, self.untarred_path, self.name
)
connection_exception_retry(download_untar_fn, 5)
dest_path = self.get_installation_path(project, renderer)
if os.path.exists(dest_path):
if system.path_is_symlink(dest_path):
system.remove_file(dest_path)
else:
system.rmdir(dest_path)
system.move(self.untarred_path, dest_path)


class TarballUnpinnedPackage(TarballPackageMixin, UnpinnedPackage[TarballPinnedPackage]):
Expand Down
4 changes: 1 addition & 3 deletions core/dbt/parser/schema_yaml_readers.py
Original file line number Diff line number Diff line change
Expand Up @@ -308,9 +308,7 @@ def _get_metric_type_params(self, type_params: UnparsedMetricTypeParams) -> Metr
window=self._get_time_window(type_params.window),
grain_to_date=grain_to_date,
metrics=self._get_metric_inputs(type_params.metrics),
# TODO This is a compiled list of measure/numerator/denominator as
# well as the `input_measures` of included metrics. We're planning
# on doing this as part of CT-2707
# input measures are calculated via metric processing post parsing
# input_measures=?,
)

Expand Down
38 changes: 36 additions & 2 deletions core/dbt/parser/unit_tests.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,9 @@
from csv import DictReader
from pathlib import Path
from typing import List, Set, Dict, Any

from dbt_extractor import py_extract_from_source, ExtractionError # type: ignore

from dbt.config import RuntimeConfig
from dbt.context.context_config import ContextConfig
from dbt.context.providers import generate_parse_exposure, get_rendered
Expand Down Expand Up @@ -28,7 +32,6 @@
ParseResult,
)
from dbt.utils import get_pseudo_test_path
from dbt_extractor import py_extract_from_source, ExtractionError # type: ignore


class UnitTestManifestLoader:
Expand Down Expand Up @@ -130,7 +133,7 @@ def parse_unit_test_case(self, test_case: UnitTestDefinition):
),
}

if original_input_node.resource_type == NodeType.Model:
if original_input_node.resource_type in (NodeType.Model, NodeType.Seed):
input_name = f"{unit_test_node.name}__{original_input_node.name}"
input_node = ModelNode(
**common_fields,
Expand Down Expand Up @@ -219,6 +222,35 @@ def __init__(self, schema_parser: SchemaParser, yaml: YamlBlock) -> None:
self.schema_parser = schema_parser
self.yaml = yaml

def _load_rows_from_seed(self, ref_str: str) -> List[Dict[str, Any]]:
"""Read rows from seed file on disk if not specified in YAML config. If seed file doesn't exist, return empty list."""
ref = py_extract_from_source("{{ " + ref_str + " }}")["refs"][0]

rows: List[Dict[str, Any]] = []

seed_name = ref["name"]
package_name = ref.get("package", self.project.project_name)

seed_node = self.manifest.ref_lookup.find(seed_name, package_name, None, self.manifest)

if not seed_node or seed_node.resource_type != NodeType.Seed:
# Seed not found in custom package specified
if package_name != self.project.project_name:
raise ParsingError(
f"Unable to find seed '{package_name}.{seed_name}' for unit tests in '{package_name}' package"
)
else:
raise ParsingError(
f"Unable to find seed '{package_name}.{seed_name}' for unit tests in directories: {self.project.seed_paths}"
)

seed_path = Path(seed_node.root_path) / seed_node.original_file_path
with open(seed_path, "r") as f:
for row in DictReader(f):
rows.append(row)

return rows

def parse(self) -> ParseResult:
for data in self.get_key_dicts():
unit_test = self._get_unit_test(data)
Expand All @@ -232,6 +264,8 @@ def parse(self) -> ParseResult:

# Check that format and type of rows matches for each given input
for input in unit_test.given:
if input.rows is None and input.fixture is None:
input.rows = self._load_rows_from_seed(input.input)
input.validate_fixture("input", unit_test.name)
unit_test.expect.validate_fixture("expected", unit_test.name)

Expand Down
21 changes: 15 additions & 6 deletions core/dbt/plugins/manager.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
import functools
import importlib
import pkgutil
from typing import Dict, List, Callable
from types import ModuleType
from typing import Dict, List, Callable, Mapping

from dbt.contracts.graph.manifest import Manifest
from dbt.exceptions import DbtRuntimeError
Expand Down Expand Up @@ -63,6 +65,17 @@ def get_manifest_artifacts(self, manifest: Manifest) -> PluginArtifacts:
raise NotImplementedError(f"get_manifest_artifacts hook not implemented for {self.name}")


@functools.lru_cache(maxsize=None)
def _get_dbt_modules() -> Mapping[str, ModuleType]:
# This is an expensive function, especially in the context of testing, when
# it is called repeatedly, so we break it out and cache the result globally.
return {
name: importlib.import_module(name)
for _, name, _ in pkgutil.iter_modules()
if name.startswith(PluginManager.PLUGIN_MODULE_PREFIX)
}


class PluginManager:
PLUGIN_MODULE_PREFIX = "dbt_"
PLUGIN_ATTR_NAME = "plugins"
Expand Down Expand Up @@ -91,11 +104,7 @@ def __init__(self, plugins: List[dbtPlugin]) -> None:

@classmethod
def from_modules(cls, project_name: str) -> "PluginManager":
discovered_dbt_modules = {
name: importlib.import_module(name)
for _, name, _ in pkgutil.iter_modules()
if name.startswith(cls.PLUGIN_MODULE_PREFIX)
}
discovered_dbt_modules = _get_dbt_modules()

plugins = []
for name, module in discovered_dbt_modules.items():
Expand Down
2 changes: 1 addition & 1 deletion dev-requirements.txt
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
black==23.3.0
bumpversion
ddtrace
ddtrace==2.1.7
docutils
flake8
flaky
Expand Down
Loading

0 comments on commit 827e35e

Please sign in to comment.