diff --git a/.github/workflows/integration-tests-sqlmesh.yaml b/.github/workflows/integration-tests-sqlmesh.yaml index a95208bb..f0eb494c 100644 --- a/.github/workflows/integration-tests-sqlmesh.yaml +++ b/.github/workflows/integration-tests-sqlmesh.yaml @@ -4,16 +4,12 @@ on: push: branches: - main - paths: - - "recce/**" pull_request: branches: - main - paths: - - "recce/**" jobs: - build: + smoke-test: runs-on: ubuntu-latest steps: - uses: actions/checkout@v3 @@ -23,6 +19,7 @@ jobs: with: python-version: 3.11 cache: "pip" # caching pip dependencies + cache-dependency-path: setup.py - run: | python -m pip install --upgrade pip pip install sqlmesh diff --git a/.github/workflows/integration-tests.yaml b/.github/workflows/integration-tests.yaml index 42d4df02..2bbdf9ea 100644 --- a/.github/workflows/integration-tests.yaml +++ b/.github/workflows/integration-tests.yaml @@ -4,28 +4,36 @@ on: push: branches: - main - paths: - - "recce/**" pull_request: branches: - main - paths: - - "recce/**" jobs: - build: + smoke-test: runs-on: ubuntu-latest + strategy: + matrix: + include: + - python-version: "3.9" + dbt-version: "1.6" + - python-version: "3.11" + dbt-version: "latest" steps: - uses: actions/checkout@v3 - name: Set up Python uses: actions/setup-python@v5 with: - python-version: 3.11 + python-version: ${{ matrix.python-version }} cache: "pip" # caching pip dependencies + cache-dependency-path: setup.py - run: | python -m pip install --upgrade pip - pip install -r ./integration_tests/dbt/requirements.txt + if [ "${{ matrix.dbt-version }}" = "latest" ]; then + pip install dbt-duckdb + else + pip install dbt-duckdb==${{ matrix.dbt-version }} + fi - name: Install Recce run: | diff --git a/.github/workflows/eslint.yaml b/.github/workflows/tests-js.yaml similarity index 98% rename from .github/workflows/eslint.yaml rename to .github/workflows/tests-js.yaml index 8087d0ed..1267dcff 100644 --- a/.github/workflows/eslint.yaml +++ b/.github/workflows/tests-js.yaml @@ -1,4 +1,4 @@ -name: Lint by ESLint +name: Test JS on: push: diff --git a/.github/workflows/tests.yaml b/.github/workflows/tests-python.yaml similarity index 87% rename from .github/workflows/tests.yaml rename to .github/workflows/tests-python.yaml index dc74d9d1..fdb5132d 100644 --- a/.github/workflows/tests.yaml +++ b/.github/workflows/tests-python.yaml @@ -1,4 +1,4 @@ -name: Run Python Flake8 and Unit Tests +name: Test Python on: push: @@ -11,9 +11,10 @@ on: - main paths: - "recce/**" + - "setup.py" jobs: - build: + test-python: runs-on: ubuntu-latest steps: - uses: actions/checkout@v3 @@ -23,6 +24,7 @@ jobs: with: python-version: 3.11 cache: "pip" # caching pip dependencies + cache-dependency-path: setup.py - name: Install flake8 run: pip install flake8 @@ -41,4 +43,4 @@ jobs: - name: Run tests run: | - make test + make test-tox diff --git a/Makefile b/Makefile index f60bf3ee..07facd39 100644 --- a/Makefile +++ b/Makefile @@ -17,7 +17,9 @@ flake8: test: install-dev @python3 -m pytest tests - @tox + +test-tox: install-dev + @tox install-frontend-requires: # Install pnpm if not installed diff --git a/integration_tests/dbt/requirements.txt b/integration_tests/dbt/requirements.txt deleted file mode 100644 index 92872c7e..00000000 --- a/integration_tests/dbt/requirements.txt +++ /dev/null @@ -1,2 +0,0 @@ -dbt-core -dbt-duckdb \ No newline at end of file diff --git a/integration_tests/dbt/smoke_test.sh b/integration_tests/dbt/smoke_test.sh index 09f35205..26a45708 100755 --- a/integration_tests/dbt/smoke_test.sh +++ b/integration_tests/dbt/smoke_test.sh @@ -1,4 +1,5 @@ #!/usr/bin/env bash +set -euxo pipefail SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" cd "$SCRIPT_DIR" diff --git a/integration_tests/sqlmesh/prep_env.sh b/integration_tests/sqlmesh/prep_env.sh index b526f14d..767da7cb 100755 --- a/integration_tests/sqlmesh/prep_env.sh +++ b/integration_tests/sqlmesh/prep_env.sh @@ -1,4 +1,5 @@ #!/usr/bin/env bash +set -euxo pipefail SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" cd "$SCRIPT_DIR" diff --git a/integration_tests/sqlmesh/test_server.sh b/integration_tests/sqlmesh/test_server.sh index 5d75ce6e..0af02dbe 100755 --- a/integration_tests/sqlmesh/test_server.sh +++ b/integration_tests/sqlmesh/test_server.sh @@ -1,4 +1,5 @@ #!/usr/bin/env bash +set -euxo pipefail SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" cd "$SCRIPT_DIR" diff --git a/recce/core.py b/recce/core.py index 65cbd7e4..3dbd681c 100644 --- a/recce/core.py +++ b/recce/core.py @@ -4,9 +4,7 @@ import os from dataclasses import dataclass from pathlib import Path -from typing import Callable, Dict, Optional, Tuple - -import agate +from typing import Callable, Dict, Optional from recce.adapter.base import BaseAdapter from recce.models import RunDAO, CheckDAO, Check @@ -65,15 +63,6 @@ def get_node_name_by_id(self, unique_id: object) -> object: def generate_sql(self, sql_template: str, base: bool = False, context: Dict = {}): self.adapter.generate_sql(sql_template, base=base, context=context) - def execute( - self, - sql: str, - auto_begin: bool = False, - fetch: bool = False, - limit: Optional[int] = None - ) -> Tuple[any, agate.Table]: - return self.adapter.execute(sql, auto_begin=auto_begin, fetch=fetch, limit=limit) - def get_lineage(self, base: Optional[bool] = False): return self.adapter.get_lineage(base=base) diff --git a/recce/tasks/histogram.py b/recce/tasks/histogram.py index 766c8c82..19872ab7 100644 --- a/recce/tasks/histogram.py +++ b/recce/tasks/histogram.py @@ -4,7 +4,6 @@ from dateutil.relativedelta import relativedelta -from recce.adapter.dbt_adapter import DbtAdapter from recce.core import default_context from recce.tasks import Task from recce.tasks.core import TaskResultDiffer @@ -293,6 +292,7 @@ def __init__(self, params: HistogramDiffParams): self.connection = None def execute(self): + from recce.adapter.dbt_adapter import DbtAdapter result = {} dbt_adapter: DbtAdapter = default_context().adapter diff --git a/recce/tasks/profile.py b/recce/tasks/profile.py index 380f7e9c..ac44027f 100644 --- a/recce/tasks/profile.py +++ b/recce/tasks/profile.py @@ -1,9 +1,7 @@ from typing import TypedDict, List -import agate from pydantic import BaseModel -from recce.adapter.dbt_adapter import DbtAdapter, merge_tables from .core import Task, TaskResultDiffer from .dataframe import DataFrame from ..core import default_context @@ -27,6 +25,8 @@ def __init__(self, params: ProfileParams): self.connection = None def execute(self): + import agate + from recce.adapter.dbt_adapter import DbtAdapter, merge_tables dbt_adapter: DbtAdapter = default_context().adapter model: str = self.params['model'] @@ -72,7 +72,7 @@ def _verify_dbt_profiler(self, dbt_adapter): raise RecceException( r"Package 'dbt_profiler' not found. Please refer to the link to install: https://hub.getdbt.com/data-mie/dbt_profiler/") - def _profile_column(self, dbt_adapter: DbtAdapter, relation, column): + def _profile_column(self, dbt_adapter, relation, column): sql_template = r""" select @@ -116,24 +116,11 @@ def _profile_column(self, dbt_adapter: DbtAdapter, relation, column): e = RecceException('No profile diff result due to the model is empty.', False) raise e - def _to_dataframe(self, table: agate.Table): - import pandas as pd - import json - - df = pd.DataFrame([row.values() for row in table.rows], columns=table.column_names) - - for column_name, column_type in zip(table.column_names, table.column_types): - if column_name.lower() == 'not_null_proportion': - df[column_name] = df[column_name].astype('float') - if column_name.lower() == 'distinct_proportion': - df[column_name] = df[column_name].astype('float') - result_json = df.to_json(orient='table') - return json.loads(result_json) - def cancel(self): super().cancel() if self.connection: + from recce.adapter.dbt_adapter import DbtAdapter dbt_adapter: DbtAdapter = default_context().adapter with dbt_adapter.connection_named("cancel"): dbt_adapter.cancel(self.connection) diff --git a/recce/tasks/query.py b/recce/tasks/query.py index 3b99ad1f..a06d40f1 100644 --- a/recce/tasks/query.py +++ b/recce/tasks/query.py @@ -1,6 +1,6 @@ +import typing from typing import TypedDict, Optional, Tuple, List -import agate from pydantic import BaseModel from .core import Task, TaskResultDiffer @@ -11,6 +11,9 @@ QUERY_LIMIT = 2000 +if typing.TYPE_CHECKING: + import agate + class QueryMixin: @classmethod @@ -19,7 +22,7 @@ def execute_sql_with_limit( sql_template, base: bool = False, limit: Optional[int] = None - ) -> Tuple[agate.Table, bool]: + ) -> Tuple['agate.Table', bool]: """ Execute a SQL template and return the result as an agate table. :param sql_template: SQL template to execute @@ -45,7 +48,7 @@ def execute_sql_with_limit( raise RecceException(f"Jinja template error: line {e.lineno}: {str(e)}") @classmethod - def execute_sql(cls, sql_template, base: bool = False) -> agate.Table: + def execute_sql(cls, sql_template, base: bool = False) -> 'agate.Table': result, _ = cls.execute_sql_with_limit(sql_template, base) return result diff --git a/recce/tasks/rowcount.py b/recce/tasks/rowcount.py index fc1c73ff..e089d518 100644 --- a/recce/tasks/rowcount.py +++ b/recce/tasks/rowcount.py @@ -1,6 +1,5 @@ from typing import TypedDict, Optional -from recce.adapter.dbt_adapter import DbtAdapter from recce.core import default_context from recce.tasks import Task from recce.tasks.core import TaskResultDiffer @@ -18,7 +17,7 @@ def __init__(self, params: RowCountDiffParams): self.params = params self.connection = None - def _query_row_count(self, dbt_adapter: DbtAdapter, model_name, base=False): + def _query_row_count(self, dbt_adapter, model_name, base=False): node = dbt_adapter.find_node_by_name(model_name, base=base) if node is None: return None diff --git a/recce/tasks/top_k.py b/recce/tasks/top_k.py index a7970e9e..e2f611c6 100644 --- a/recce/tasks/top_k.py +++ b/recce/tasks/top_k.py @@ -1,6 +1,5 @@ from typing import TypedDict -from recce.adapter.dbt_adapter import DbtAdapter from recce.core import default_context from recce.tasks import Task from recce.tasks.core import TaskResultDiffer @@ -20,7 +19,7 @@ def __init__(self, params: TopKDiffParams): self.params = params self.connection = None - def _query_row_count_diff(self, dbt_adapter: DbtAdapter, base_relation, curr_relation, column): + def _query_row_count_diff(self, dbt_adapter, base_relation, curr_relation, column): """ Query the row count of the base and current relations @@ -98,6 +97,7 @@ def _query_top_k(self, dbt_adapter, base_relation, curr_relation, column, k): def execute(self): + from recce.adapter.dbt_adapter import DbtAdapter dbt_adapter: DbtAdapter = default_context().adapter with dbt_adapter.connection_named("query"): diff --git a/recce/tasks/valuediff.py b/recce/tasks/valuediff.py index c01ec34e..2927d78a 100644 --- a/recce/tasks/valuediff.py +++ b/recce/tasks/valuediff.py @@ -1,11 +1,14 @@ +import typing from typing import TypedDict, Optional, List, Union -import agate +if typing.TYPE_CHECKING: + import agate + from pydantic import BaseModel from .core import Task, TaskResultDiffer from .dataframe import DataFrame -from ..adapter.dbt_adapter import DbtAdapter + from ..core import default_context from ..exceptions import RecceException @@ -40,7 +43,7 @@ def _verify_dbt_packages_deps(self, dbt_adapter): self.legacy_surrogate_key = False break - def _verify_primary_key(self, dbt_adapter: DbtAdapter, primary_key: Union[str, List[str]], model: str): + def _verify_primary_key(self, dbt_adapter, primary_key: Union[str, List[str]], model: str): self.update_progress(message=f"Verify primary key: {primary_key}") composite = True if isinstance(primary_key, List) else False @@ -85,7 +88,7 @@ def __init__(self, params: ValueDiffParams): self.connection = None self.legacy_surrogate_key = True - def _query_value_diff(self, dbt_adpter: DbtAdapter, primary_key: Union[str, List[str]], model: str, + def _query_value_diff(self, dbt_adpter, primary_key: Union[str, List[str]], model: str, columns: List[str] = None): column_groups = {} composite = True if isinstance(primary_key, List) else False @@ -232,6 +235,8 @@ def execute(self): return self._query_value_diff(dbt_adapter, primary_key, model, columns=columns) def cancel(self): + from recce.adapter.dbt_adapter import DbtAdapter + if self.connection: adapter: DbtAdapter = default_context().adapter with adapter.connection_named("cancel"): @@ -290,8 +295,7 @@ def __init__(self, params: ValueDiffParams): self.connection = None self.legacy_surrogate_key = True - def _query_value_diff(self, dbt_adapter: DbtAdapter, primary_key: Union[str, List[str]], model: str, - columns: List[str] = None): + def _query_value_diff(self, dbt_adapter, primary_key: Union[str, List[str]], model: str, columns: List[str] = None): composite = True if isinstance(primary_key, List) else False @@ -356,6 +360,7 @@ def _query_value_diff(self, dbt_adapter: DbtAdapter, primary_key: Union[str, Lis def execute(self): + from recce.adapter.dbt_adapter import DbtAdapter dbt_adapter: DbtAdapter = default_context().adapter with dbt_adapter.connection_named("value diff"): @@ -374,6 +379,7 @@ def execute(self): return self._query_value_diff(dbt_adapter, primary_key, model, columns) def cancel(self): + from recce.adapter.dbt_adapter import DbtAdapter if self.connection: adapter: DbtAdapter = default_context().adapter with adapter.connection_named("cancel"): diff --git a/setup.py b/setup.py index 2568a5d8..75d581bc 100644 --- a/setup.py +++ b/setup.py @@ -29,7 +29,6 @@ def _get_version(): 'ruamel.yaml<0.18.0', 'click>=7.1', 'deepdiff', - 'dbt-core>=1.5', 'portalocker', 'fastapi', 'itsdangerous',