diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index 8b0bd2f..2e7972e 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -7,33 +7,58 @@ on: branches: - main jobs: - test: + pre-commit: runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + - uses: actions/setup-python@v4 + - uses: pre-commit/action@v3.0.0 + integration-tests: + runs-on: ubuntu-latest + services: + postgres: + image: postgres + env: + POSTGRES_USER: postgres + POSTGRES_PASSWORD: postgres + POSTGRES_DB: dbt_linreg + ports: + - 5432:5432 + options: >- + --health-cmd pg_isready + --health-interval 10s + --health-timeout 5s + --health-retries 5 strategy: matrix: - dbt_core: [1.3.*, 1.4.*, 1.5.*, 1.6.*] + dbt_core: [1.4.*, 1.6.*] + db_target: [dbt-duckdb, dbt-postgres] steps: - - uses: actions/checkout@v1 - - uses: actions/setup-python@v1 - with: - python-version: "3.10" - architecture: x64 - - name: Install Poetry - uses: snok/install-poetry@v1 - with: - version: 1.4.0 - virtualenvs-create: true - virtualenvs-in-project: true - - name: Install dependencies - run: | - sudo apt-get update - sudo apt-get install - chmod +x ./run - ./run setup - pip install -U "dbt-core==$DBT_CORE_VERSION" "dbt-duckdb" - env: - DBT_CORE_VERSION: ${{ matrix.dbt_core }} - - name: Lint - run: ./run lint - - name: Test - run: ./run test + - uses: actions/checkout@v4 + - uses: actions/setup-python@v4 + with: + python-version: "3.10" + - name: Install Poetry + uses: snok/install-poetry@v1 + with: + version: 1.4.0 + virtualenvs-create: true + virtualenvs-in-project: true + - name: Install dependencies + run: | + sudo apt-get update + sudo apt-get install + chmod +x ./run + ./run setup + pip install -U "dbt-core==$DBT_CORE_VERSION" "${DBT_PROVIDER_PACKAGE}" + env: + DBT_CORE_VERSION: ${{ matrix.dbt_core }} + DBT_PROVIDER_PACKAGE: ${{ matrix.db_target }} + - name: Test + run: ./run test "${DBT_TARGET}" + env: + DBT_TARGET: ${{ matrix.db_target }} + POSTGRES_HOST: localhost + POSTGRES_USER: postgres + POSTGRES_PASSWORD: postgres + POSTGRES_DB: dbt_linreg diff --git a/.gitignore b/.gitignore index c076aed..0ce0028 100644 --- a/.gitignore +++ b/.gitignore @@ -1,4 +1,5 @@ dbt.duckdb +dbt.duckdb.wal .user.yml docs/site/ integration_tests/seeds/*.csv diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 077bbb0..e659793 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -17,3 +17,8 @@ repos: hooks: - id: shellcheck args: [-x, run] + +- repo: https://github.com/rhysd/actionlint + rev: v1.6.26 + hooks: + - id: actionlint diff --git a/CHANGELOG.md b/CHANGELOG.md index f372f48..e53a321 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,9 @@ # Changelog +### `0.2.3` + +- Added Postgres support in integration tests + fixed bugs that prevented Postgres from working. + ### `0.2.2` - Added dbt documentation of the `ols()` macro. diff --git a/README.md b/README.md index 6727e0c..5889d0e 100644 --- a/README.md +++ b/README.md @@ -30,7 +30,7 @@ Add this the `packages:` list your dbt project's `packages.yml`: ```yaml - package: "dwreeves/dbt_linreg" - version: "0.2.2" + version: "0.2.3" ``` The full file will look something like this: @@ -41,7 +41,7 @@ packages: # Other packages here # ... - package: "dwreeves/dbt_linreg" - version: "0.2.2" + version: "0.2.3" ``` # Examples @@ -64,7 +64,7 @@ select * from {{ format='long', format_options={'round': 5} ) -}} +}} as linreg ``` Output: @@ -169,9 +169,12 @@ group by - Snowflake - DuckDB +- Postgres\* If `dbt_linreg` does not work in your database tool, please let me know in a bug report and I can make sure it is supported. +> _* Minimal support. Postgres is syntactically supported, but is not performant under certain circumstances._ + # API The only function available in the public API is the `dbt_linreg.ols()` macro. @@ -255,7 +258,7 @@ This method calculates regression coefficients using the Moore-Penrose pseudo-in Specify these in a dict using the `method_options=` kwarg: - **safe** (default = `True`): If True, returns null coefficients instead of an error when X is perfectly multicollinear. If False, a negative value will be passed into a SQRT(), and most SQL engines will raise an error when this happens. -- **subquery_optimization** (default = `True`): If True, nested subqueries are used during some of the steps to optimize the query speed. If false, the query is flattened. Note that turning this off can significantly degrade performance. +- **subquery_optimization** (default: `True`): If True, nested subqueries are used during some of the steps to optimize the query speed. If false, the query is flattened. ## `fwl` method @@ -269,10 +272,12 @@ Ridge regression is implemented using the augmentation technique described in Ex There are a few reasons why this method is discouraged over the `chol` method: -- 🐌 It tends to be much slower, and struggles to efficiently calculate large number of columns. +- 🐌 It tends to be much slower in OLAP systems, and struggles to efficiently calculate large number of columns. - 📊 It does not calculate standard errors. - 😕 For ridge regression, coefficients are not accurate; they tend to be off by a magnitude of ~0.01%. +So when should you use `fwl`? The main use case is in OLTP systems (e.g. Postgres) for unregularized coefficient estimation. Long story short, the `chol` method relies on subquery optimization to be more performant than `fwl`; however, OLTP systems do not benefit at all from subquery optimization. This means that `fwl` is slightly more performant in this context. + # Notes - ⚠️ **If your coefficients are null, it does not mean dbt_linreg is broken, it most likely means your feature columns are perfectly multicollinear.** If you are 100% sure that is not the issue, please file a bug report with a minimally reproducible example. @@ -282,6 +287,11 @@ There are a few reasons why this method is discouraged over the `chol` method: - An array input (e.g. `alpha=[0.01, 0.02, 0.03, 0.04, 0.05]`) will apply an alpha of `0.01` to the first column, `0.02` to the second column, etc. - `alpha` is equivalent to what TEoSL refers to as "lambda," times the sample size N. That is to say: `α ≡ λ * N`. +- Regularization as currently implemented for the `chol` method tends to be very slow in OLTP systems (e.g. Postgres), but is very performant in OLAP systems (e.g. Snowflake, DuckDB, BigQuery, Redshift). As dbt is more commonly used in OLAP contexts, the code base is optimized for the OLAP use case. + - That said, it may be possible to make regularization in OLTP more performant (e.g. with augmentation of the design matrix), so PRs are welcome. + +- Regression coefficients in Postgres are always `numeric` types. + ### Possible future features Some things I am thinking about working on down the line: diff --git a/dbt_project.yml b/dbt_project.yml index 79c08b3..59a675e 100644 --- a/dbt_project.yml +++ b/dbt_project.yml @@ -1,5 +1,5 @@ name: "dbt_linreg" -version: "0.2.2" +version: "0.2.3" # 1.2 is required because of modules.itertools. require-dbt-version: [">=1.2.0", "<2.0.0"] diff --git a/integration_tests/dbt_project.yml b/integration_tests/dbt_project.yml index c85ee09..88472f4 100644 --- a/integration_tests/dbt_project.yml +++ b/integration_tests/dbt_project.yml @@ -1,5 +1,5 @@ name: "dbt_linreg_tests" -version: "0.2.1" +version: "0.2.3" require-dbt-version: [">=1.0.0", "<2.0.0"] diff --git a/integration_tests/models/collinear_matrix_1var_without_const.sql b/integration_tests/models/collinear_matrix_1var_without_const.sql index 1c8e8e7..23809b8 100644 --- a/integration_tests/models/collinear_matrix_1var_without_const.sql +++ b/integration_tests/models/collinear_matrix_1var_without_const.sql @@ -11,4 +11,4 @@ select * from {{ format='long', add_constant=False ) -}} +}} as linreg diff --git a/integration_tests/models/collinear_matrix_1var_without_const_ridge.sql b/integration_tests/models/collinear_matrix_1var_without_const_ridge.sql index 59d9c5a..3eb2e64 100644 --- a/integration_tests/models/collinear_matrix_1var_without_const_ridge.sql +++ b/integration_tests/models/collinear_matrix_1var_without_const_ridge.sql @@ -12,4 +12,4 @@ select * from {{ format='long', add_constant=False ) -}} +}} as linreg diff --git a/integration_tests/models/collinear_matrix_2var_without_const.sql b/integration_tests/models/collinear_matrix_2var_without_const.sql index a5b4b5f..9b293b0 100644 --- a/integration_tests/models/collinear_matrix_2var_without_const.sql +++ b/integration_tests/models/collinear_matrix_2var_without_const.sql @@ -11,4 +11,4 @@ select * from {{ format='long', add_constant=False ) -}} +}} as linreg diff --git a/integration_tests/models/collinear_matrix_3var_without_const.sql b/integration_tests/models/collinear_matrix_3var_without_const.sql index 56d2c99..11c642e 100644 --- a/integration_tests/models/collinear_matrix_3var_without_const.sql +++ b/integration_tests/models/collinear_matrix_3var_without_const.sql @@ -11,4 +11,4 @@ select * from {{ format='long', add_constant=False ) -}} +}} as linreg diff --git a/integration_tests/models/collinear_matrix_4var_without_const.sql b/integration_tests/models/collinear_matrix_4var_without_const.sql index 6ed0249..54144c4 100644 --- a/integration_tests/models/collinear_matrix_4var_without_const.sql +++ b/integration_tests/models/collinear_matrix_4var_without_const.sql @@ -11,4 +11,4 @@ select * from {{ format='long', add_constant=False ) -}} +}} as linreg diff --git a/integration_tests/models/collinear_matrix_5var_without_const.sql b/integration_tests/models/collinear_matrix_5var_without_const.sql index 1b3056b..f3187e6 100644 --- a/integration_tests/models/collinear_matrix_5var_without_const.sql +++ b/integration_tests/models/collinear_matrix_5var_without_const.sql @@ -11,4 +11,4 @@ select * from {{ format='long', add_constant=False ) -}} +}} as linreg diff --git a/integration_tests/models/collinear_matrix_5var_without_const_ridge.sql b/integration_tests/models/collinear_matrix_5var_without_const_ridge.sql index 109ffed..dfe9a9b 100644 --- a/integration_tests/models/collinear_matrix_5var_without_const_ridge.sql +++ b/integration_tests/models/collinear_matrix_5var_without_const_ridge.sql @@ -1,6 +1,7 @@ {{ config( - materialized="table" + materialized="table", + tags=["skip-postgres"] ) }} select * from {{ @@ -12,4 +13,4 @@ select * from {{ format='long', add_constant=False ) -}} +}} as linreg diff --git a/integration_tests/models/collinear_matrix_regression_chol.sql b/integration_tests/models/collinear_matrix_regression_chol.sql index 2c47592..0874ed7 100644 --- a/integration_tests/models/collinear_matrix_regression_chol.sql +++ b/integration_tests/models/collinear_matrix_regression_chol.sql @@ -1,6 +1,7 @@ {{ config( - materialized="table" + materialized="table", + tags=["skip-postgres"] ) }} select * from {{ @@ -11,4 +12,4 @@ select * from {{ format='long', method='chol' ) -}} +}} as linreg diff --git a/integration_tests/models/collinear_matrix_regression_chol_unoptimized.sql b/integration_tests/models/collinear_matrix_regression_chol_unoptimized.sql index c85c9aa..ae8a0da 100644 --- a/integration_tests/models/collinear_matrix_regression_chol_unoptimized.sql +++ b/integration_tests/models/collinear_matrix_regression_chol_unoptimized.sql @@ -1,6 +1,7 @@ {{ config( - materialized="table" + materialized="table", + tags=["skip-postgres"] ) }} select * from {{ @@ -12,4 +13,4 @@ select * from {{ method='chol', method_options={'subquery_optimization': False} ) -}} +}} as linreg diff --git a/integration_tests/models/collinear_matrix_regression_fwl.sql b/integration_tests/models/collinear_matrix_regression_fwl.sql index 7e104db..e427c76 100644 --- a/integration_tests/models/collinear_matrix_regression_fwl.sql +++ b/integration_tests/models/collinear_matrix_regression_fwl.sql @@ -11,4 +11,4 @@ select * from {{ format='long', method='fwl' ) -}} +}} as linreg diff --git a/integration_tests/models/collinear_matrix_ridge_regression_chol.sql b/integration_tests/models/collinear_matrix_ridge_regression_chol.sql index a73d4e1..ccb622e 100644 --- a/integration_tests/models/collinear_matrix_ridge_regression_chol.sql +++ b/integration_tests/models/collinear_matrix_ridge_regression_chol.sql @@ -1,6 +1,7 @@ {{ config( - materialized="table" + materialized="table", + tags=["skip-postgres"] ) }} select * from {{ @@ -12,4 +13,4 @@ select * from {{ alpha=0.01, method='chol' ) -}} +}} as linreg diff --git a/integration_tests/models/collinear_matrix_ridge_regression_chol_unoptimized.sql b/integration_tests/models/collinear_matrix_ridge_regression_chol_unoptimized.sql index 63888e7..e0e5160 100644 --- a/integration_tests/models/collinear_matrix_ridge_regression_chol_unoptimized.sql +++ b/integration_tests/models/collinear_matrix_ridge_regression_chol_unoptimized.sql @@ -1,6 +1,7 @@ {{ config( - materialized="table" + materialized="table", + tags=["skip-postgres"] ) }} select * from {{ @@ -13,4 +14,4 @@ select * from {{ method='chol', method_options={'subquery_optimization': False} ) -}} +}} as linreg diff --git a/integration_tests/models/collinear_matrix_ridge_regression_fwl.sql b/integration_tests/models/collinear_matrix_ridge_regression_fwl.sql index e4fbcad..1ebc482 100644 --- a/integration_tests/models/collinear_matrix_ridge_regression_fwl.sql +++ b/integration_tests/models/collinear_matrix_ridge_regression_fwl.sql @@ -12,4 +12,4 @@ select * from {{ alpha=0.01, method='fwl' ) -}} +}} as linreg diff --git a/integration_tests/models/groups_matrix_regression_chol.sql b/integration_tests/models/groups_matrix_regression_chol_optimized.sql similarity index 60% rename from integration_tests/models/groups_matrix_regression_chol.sql rename to integration_tests/models/groups_matrix_regression_chol_optimized.sql index 2b081e8..0b6a836 100644 --- a/integration_tests/models/groups_matrix_regression_chol.sql +++ b/integration_tests/models/groups_matrix_regression_chol_optimized.sql @@ -1,6 +1,7 @@ {{ config( - materialized="table" + materialized="table", + tags=["skip-postgres"] ) }} select * from {{ @@ -10,7 +11,8 @@ select * from {{ exog=['x1', 'x2', 'x3'], group_by=['gb_var'], format='long', - method='chol' + method='chol', + method_options={'subquery_optimization': True} ) -}} +}} as linreg order by gb_var, variable_name diff --git a/integration_tests/models/groups_matrix_regression_chol_unoptimized.sql b/integration_tests/models/groups_matrix_regression_chol_unoptimized.sql index e1026d8..0a6e718 100644 --- a/integration_tests/models/groups_matrix_regression_chol_unoptimized.sql +++ b/integration_tests/models/groups_matrix_regression_chol_unoptimized.sql @@ -1,6 +1,7 @@ {{ config( - materialized="table" + materialized="table", + tags=["skip-postgres"] ) }} select * from {{ @@ -13,5 +14,5 @@ select * from {{ method='chol', method_options={'subquery_optimization': False} ) -}} +}} as linreg order by gb_var, variable_name diff --git a/integration_tests/models/groups_matrix_regression_fwl.sql b/integration_tests/models/groups_matrix_regression_fwl.sql index b6ab07c..eda7dd6 100644 --- a/integration_tests/models/groups_matrix_regression_fwl.sql +++ b/integration_tests/models/groups_matrix_regression_fwl.sql @@ -12,5 +12,5 @@ select * from {{ format='long', method='fwl' ) -}} +}} as linreg order by gb_var, variable_name diff --git a/integration_tests/models/long_format_options.sql b/integration_tests/models/long_format_options.sql index 3e2c681..2fd436f 100644 --- a/integration_tests/models/long_format_options.sql +++ b/integration_tests/models/long_format_options.sql @@ -18,7 +18,7 @@ select 'strip_quotes': True } ) - }} + }} as linreg1 union all @@ -37,4 +37,4 @@ select 'strip_quotes': False } ) - }} + }} as linreg2 diff --git a/integration_tests/models/perfectly_multicollinear_model.sql b/integration_tests/models/perfectly_multicollinear_model.sql index c831846..2f3fb9e 100644 --- a/integration_tests/models/perfectly_multicollinear_model.sql +++ b/integration_tests/models/perfectly_multicollinear_model.sql @@ -17,4 +17,4 @@ select * from {{ endog='y', exog=['xa', 'xb'] ) -}} +}} as linreg diff --git a/integration_tests/models/simple_0var_regression_long_chol.sql b/integration_tests/models/simple_0var_regression_long_chol.sql index f3bc907..fa84aea 100644 --- a/integration_tests/models/simple_0var_regression_long_chol.sql +++ b/integration_tests/models/simple_0var_regression_long_chol.sql @@ -11,4 +11,4 @@ select * from {{ format='long', format_options={'round': 5} ) -}} +}} as linreg diff --git a/integration_tests/models/simple_0var_regression_long_fwl.sql b/integration_tests/models/simple_0var_regression_long_fwl.sql index f3bc907..fa84aea 100644 --- a/integration_tests/models/simple_0var_regression_long_fwl.sql +++ b/integration_tests/models/simple_0var_regression_long_fwl.sql @@ -11,4 +11,4 @@ select * from {{ format='long', format_options={'round': 5} ) -}} +}} as linreg diff --git a/integration_tests/models/simple_0var_regression_wide.sql b/integration_tests/models/simple_0var_regression_wide.sql index 66956b0..dc2ca1f 100644 --- a/integration_tests/models/simple_0var_regression_wide.sql +++ b/integration_tests/models/simple_0var_regression_wide.sql @@ -11,4 +11,4 @@ select * from {{ format='wide', format_options={'round': 5} ) -}} +}} as linreg diff --git a/integration_tests/models/simple_10var_regression_long.sql b/integration_tests/models/simple_10var_regression_long.sql index e90fc38..e1d16c2 100644 --- a/integration_tests/models/simple_10var_regression_long.sql +++ b/integration_tests/models/simple_10var_regression_long.sql @@ -1,7 +1,8 @@ {{ config( materialized="view", - enabled=False + enabled=False, + tags=["skip-postgres"] ) }} select * from {{ @@ -12,4 +13,4 @@ select * from {{ format='long', format_options={'round': 5} ) -}} +}} as linreg diff --git a/integration_tests/models/simple_1var_regression_long_chol.sql b/integration_tests/models/simple_1var_regression_long_chol.sql index 5e7d765..5922702 100644 --- a/integration_tests/models/simple_1var_regression_long_chol.sql +++ b/integration_tests/models/simple_1var_regression_long_chol.sql @@ -11,4 +11,4 @@ select * from {{ format='long', format_options={'round': 5} ) -}} +}} as linreg diff --git a/integration_tests/models/simple_1var_regression_long_fwl.sql b/integration_tests/models/simple_1var_regression_long_fwl.sql index 5e7d765..5922702 100644 --- a/integration_tests/models/simple_1var_regression_long_fwl.sql +++ b/integration_tests/models/simple_1var_regression_long_fwl.sql @@ -11,4 +11,4 @@ select * from {{ format='long', format_options={'round': 5} ) -}} +}} as linreg diff --git a/integration_tests/models/simple_1var_regression_wide.sql b/integration_tests/models/simple_1var_regression_wide.sql index 10bcfb4..e75df44 100644 --- a/integration_tests/models/simple_1var_regression_wide.sql +++ b/integration_tests/models/simple_1var_regression_wide.sql @@ -11,4 +11,4 @@ select * from {{ format='wide', format_options={'round': 5} ) -}} +}} as linreg diff --git a/integration_tests/models/simple_2var_regression_long.sql b/integration_tests/models/simple_2var_regression_long.sql index aefec5a..96d6266 100644 --- a/integration_tests/models/simple_2var_regression_long.sql +++ b/integration_tests/models/simple_2var_regression_long.sql @@ -11,4 +11,4 @@ select * from {{ format='long', format_options={'round': 5} ) -}} +}} as linreg diff --git a/integration_tests/models/simple_2var_regression_wide.sql b/integration_tests/models/simple_2var_regression_wide.sql index 7689460..c259e6f 100644 --- a/integration_tests/models/simple_2var_regression_wide.sql +++ b/integration_tests/models/simple_2var_regression_wide.sql @@ -11,4 +11,4 @@ select * from {{ format='wide', format_options={'round': 5} ) -}} +}} as linreg diff --git a/integration_tests/models/simple_3var_regression_long.sql b/integration_tests/models/simple_3var_regression_long.sql index 1851953..9230a2c 100644 --- a/integration_tests/models/simple_3var_regression_long.sql +++ b/integration_tests/models/simple_3var_regression_long.sql @@ -11,4 +11,4 @@ select * from {{ format='long', format_options={'round': 5} ) -}} +}} as linreg diff --git a/integration_tests/models/simple_3var_regression_wide.sql b/integration_tests/models/simple_3var_regression_wide.sql index e10d477..53f81c4 100644 --- a/integration_tests/models/simple_3var_regression_wide.sql +++ b/integration_tests/models/simple_3var_regression_wide.sql @@ -1,6 +1,7 @@ {{ config( - materialized="table" + materialized="table", + tags=["skip-postgres"] ) }} select * from {{ @@ -11,4 +12,4 @@ select * from {{ format='wide', format_options={'round': 5} ) -}} +}} as linreg diff --git a/integration_tests/models/simple_4var_regression_long.sql b/integration_tests/models/simple_4var_regression_long.sql index 80f543c..7e26eeb 100644 --- a/integration_tests/models/simple_4var_regression_long.sql +++ b/integration_tests/models/simple_4var_regression_long.sql @@ -11,4 +11,4 @@ select * from {{ format='long', format_options={'round': 5} ) -}} +}} as linreg diff --git a/integration_tests/models/simple_4var_regression_wide.sql b/integration_tests/models/simple_4var_regression_wide.sql index 72af960..8c6cedb 100644 --- a/integration_tests/models/simple_4var_regression_wide.sql +++ b/integration_tests/models/simple_4var_regression_wide.sql @@ -1,6 +1,7 @@ {{ config( - materialized="table" + materialized="table", + tags=["skip-postgres"] ) }} select * from {{ @@ -11,4 +12,4 @@ select * from {{ format='wide', format_options={'round': 5} ) -}} +}} as linreg diff --git a/integration_tests/models/simple_5var_regression_long.sql b/integration_tests/models/simple_5var_regression_long.sql index 14a3dc8..466fbfc 100644 --- a/integration_tests/models/simple_5var_regression_long.sql +++ b/integration_tests/models/simple_5var_regression_long.sql @@ -1,6 +1,7 @@ {{ config( - materialized="table" + materialized="table", + tags=["skip-postgres"] ) }} select * from {{ @@ -11,4 +12,4 @@ select * from {{ format='long', format_options={'round': 5} ) -}} +}} as linreg diff --git a/integration_tests/models/simple_5var_regression_wide.sql b/integration_tests/models/simple_5var_regression_wide.sql index 929dd66..9c21289 100644 --- a/integration_tests/models/simple_5var_regression_wide.sql +++ b/integration_tests/models/simple_5var_regression_wide.sql @@ -1,6 +1,7 @@ {{ config( - materialized="table" + materialized="table", + tags=["skip-postgres"] ) }} select * from {{ @@ -11,4 +12,4 @@ select * from {{ format='wide', format_options={'round': 5} ) -}} +}} as linreg diff --git a/integration_tests/models/simple_8var_regression_wide.sql b/integration_tests/models/simple_8var_regression_wide.sql index fbe2c76..3b0f9b4 100644 --- a/integration_tests/models/simple_8var_regression_wide.sql +++ b/integration_tests/models/simple_8var_regression_wide.sql @@ -1,8 +1,8 @@ {{ config( materialized="view", - tags=["perftest"], - enabled=False + tags=["perftest", "skip-postgres"], + enabled=False, ) }} select * from {{ @@ -12,4 +12,4 @@ select * from {{ exog=['xa', 'xb', 'xc', 'xd', 'xe', 'xf', 'xg', 'xh'], format='wide', ) -}} +}} as linreg diff --git a/integration_tests/models/wide_format_options.sql b/integration_tests/models/wide_format_options.sql index 23eab5c..6ea13c2 100644 --- a/integration_tests/models/wide_format_options.sql +++ b/integration_tests/models/wide_format_options.sql @@ -17,4 +17,4 @@ select 'constant_name': 'constant_term' } ) - }} + }} as linreg diff --git a/integration_tests/profiles/profiles.yml b/integration_tests/profiles/profiles.yml index 43d9557..a2b454b 100644 --- a/integration_tests/profiles/profiles.yml +++ b/integration_tests/profiles/profiles.yml @@ -1,9 +1,14 @@ dbt_linreg_profile: - target: dev + target: dbt-duckdb outputs: - dev: - type: duckdb - path: dbt.duckdb - dev-memory: - type: duckdb - path: ":memory:" + dbt-duckdb: + type: duckdb + path: dbt.duckdb + dbt-postgres: + type: postgres + user: '{{ env_var("POSTGRES_USER") }}' + password: '{{ env_var("POSTGRES_PASSWORD") }}' + host: '{{ env_var("POSTGRES_HOST", "localhost") }}' + port: '{{ env_var("POSTGRES_PORT", "5432") | as_number }}' + dbname: '{{ env_var("POSTGRES_DB", "dbt_linreg") }}' + schema: '{{ env_var("POSTGRES_SCHEMA", "public") }}' diff --git a/integration_tests/selectors.yml b/integration_tests/selectors.yml new file mode 100644 index 0000000..37e3554 --- /dev/null +++ b/integration_tests/selectors.yml @@ -0,0 +1,11 @@ +selectors: + - name: dbt-duckdb-selector + definition: 'fqn:*' + - name: dbt-postgres-selector + # Postgres runs into memory / performance issues for some of these queries. + # Resolving this and making Postgres more performant is a TODO. + definition: + union: + - 'fqn:*' + - exclude: + - '@tag:skip-postgres' diff --git a/integration_tests/tests/test_groups_matrix_regression_chol.sql b/integration_tests/tests/test_groups_matrix_regression_chol_optimized.sql similarity index 96% rename from integration_tests/tests/test_groups_matrix_regression_chol.sql rename to integration_tests/tests/test_groups_matrix_regression_chol_optimized.sql index 3bd35cd..22075ae 100644 --- a/integration_tests/tests/test_groups_matrix_regression_chol.sql +++ b/integration_tests/tests/test_groups_matrix_regression_chol_optimized.sql @@ -21,7 +21,7 @@ expected as ( ) select base.variable_name -from {{ ref('groups_matrix_regression_chol') }} as base +from {{ ref('groups_matrix_regression_chol_optimized') }} as base full outer join expected on base.gb_var = expected.gb_var diff --git a/integration_tests/tests/test_long_format_options.sql b/integration_tests/tests/test_long_format_options.sql index 5b35bbb..1d8f12b 100644 --- a/integration_tests/tests/test_long_format_options.sql +++ b/integration_tests/tests/test_long_format_options.sql @@ -10,8 +10,8 @@ base as ( find_unstripped_quotes as ( select - max(vname = '"xa"') as should_be_true, - max(vname = 'xa') as should_be_false + cast(max(cast(vname = '"xa"' as integer)) as boolean) as should_be_true, + cast(max(cast(vname = 'xa' as integer)) as boolean) as should_be_false from base where not strip_quotes @@ -20,8 +20,8 @@ find_unstripped_quotes as ( dodge_unstripped_quotes as ( select - max(vname = 'xa') as should_be_true, - max(vname = '"xa"') as should_be_false + cast(max(cast(vname = 'xa' as integer)) as boolean) as should_be_true, + cast(max(cast(vname = '"xa"' as integer)) as boolean) as should_be_false from base where strip_quotes @@ -30,8 +30,8 @@ dodge_unstripped_quotes as ( coef_col_name as ( select - max(vname = 'constant_term') as should_be_true, - max(vname = 'const') as should_be_false + cast(max(cast(vname = 'constant_term' as integer)) as boolean) as should_be_true, + cast(max(cast(vname = 'const' as integer)) as boolean) as should_be_false from base ) diff --git a/macros/linear_regression/ols_impl_chol/_ols_impl_chol.sql b/macros/linear_regression/ols_impl_chol/_ols_impl_chol.sql index ecb3123..c9d9046 100644 --- a/macros/linear_regression/ols_impl_chol/_ols_impl_chol.sql +++ b/macros/linear_regression/ols_impl_chol/_ols_impl_chol.sql @@ -1,23 +1,24 @@ {# In some warehouses, you can reference newly created column aliases in the query you wrote. If that's not available, the previous calc will be in the dict. #} -{% macro _cell_or_alias(i, j, d) %} + +{% macro _cell_or_alias(i, j, d, prefix=none) %} {{ return( adapter.dispatch('_cell_or_alias', 'dbt_linreg') - (i, j, d) + (i, j, d, prefix) ) }} {% endmacro %} -{% macro default___cell_or_alias(i, j, d) %} +{% macro default___cell_or_alias(i, j, d, prefix=none) %} {{ return(d[(i, j)]) }} {% endmacro %} -{% macro snowflake___cell_or_alias(i, j, d) %} - {{ return('i' ~ i ~ 'j' ~ j) }} +{% macro snowflake___cell_or_alias(i, j, d, prefix=none) %} + {{ return((prefix if prefix is not none else '') ~ 'i' ~ i ~ 'j' ~ j) }} {% endmacro %} -{% macro duckdb___cell_or_alias(i, j, d) %} - {{ return('i' ~ i ~ 'j' ~ j) }} +{% macro duckdb___cell_or_alias(i, j, d, prefix=none) %} + {{ return((prefix if prefix is not none else '') ~ 'i' ~ i ~ 'j' ~ j) }} {% endmacro %} {% macro _safe_sqrt(x, safe=True) %} @@ -60,7 +61,11 @@ {% if i == j %} {% do d.update({(i, j): dbt_linreg._safe_sqrt(x=ns.s, safe=safe)}) %} {% else %} - {% do d.update({(i, j): '('~ns.s~')/'~dbt_linreg._cell_or_alias(i=j, j=j, d=d)}) %} + {% if adapter.type() == "postgres" %} + {% do d.update({(i, j): '('~ns.s~')/nullif('~dbt_linreg._cell_or_alias(i=j, j=j, d=d) ~ ', 0)'}) %} + {% else %} + {% do d.update({(i, j): '('~ns.s~')/'~dbt_linreg._cell_or_alias(i=j, j=j, d=d)}) %} + {% endif %} {% endif %} {% endif %} {% endfor %} @@ -77,11 +82,15 @@ {% else %} {% set ns.numerator = '(' %} {% for k in range(i, j) %} - {% set ns.numerator = ns.numerator~'-i'~j~'j'~k~'*inv_'~dbt_linreg._cell_or_alias(i=i, j=k, d=d) %} + {% set ns.numerator = ns.numerator~'-i'~j~'j'~k~'*'~dbt_linreg._cell_or_alias(i=i, j=k, d=d, prefix="inv_") %} {% endfor %} {% set ns.numerator = ns.numerator~')' %} {% endif %} - {% do d.update({(i, j): '('~ns.numerator~'/i'~j~'j'~j~')'}) %} + {% if adapter.type() == "postgres" %} + {% do d.update({(i, j): '('~ns.numerator~'/nullif(i'~j~'j'~j~', 0))'}) %} + {% else %} + {% do d.update({(i, j): '('~ns.numerator~'/i'~j~'j'~j~')'}) %} + {% endif %} {% endfor %} {{ return(d) }} {% endmacro %} @@ -122,6 +131,7 @@ {% set xmin = 1 %} {%- endif %} {%- set xcols = (range(xmin, (exog | length) + 1) | list) %} +{%- set upto = (xcols | length) %} {%- set exog_aliased = dbt_linreg._alias_exog(exog) %} (with _dbt_linreg_base as ( @@ -175,7 +185,7 @@ _dbt_linreg_chol as ( {%- if not loop.last %} from ( {%- else %} - from _dbt_linreg_xtx{{ ')' * ((xcols | length) - 1) }} + from _dbt_linreg_xtx{% for close_ct in range(upto - 1) %}) as ic{{ close_ct }}{% endfor %} {%- endif %} {%- endfor %} {%- else %} @@ -194,7 +204,6 @@ _dbt_linreg_inverse_chol as ( {#- The optimal way to calculate is to do each diagonal at a time. #} {%- set d = dbt_linreg._forward_substitution(li=xcols) %} {%- if subquery_optimization %} - {%- set upto = (xcols | length) %} {%- for gap in (range(0, upto) | reverse) %} select *, {%- for j in range(gap + xmin, upto + xmin) %} @@ -207,7 +216,7 @@ _dbt_linreg_inverse_chol as ( {%- if not loop.last %} from ( {%- else %} - from _dbt_linreg_chol{{ ')' * (upto - 1) }} + from _dbt_linreg_chol{% for close_ct in range(upto - 1) %}) as ic{{ close_ct }}{% endfor %} {%- endif %} {%- endfor %} {%- else %} @@ -226,7 +235,6 @@ _dbt_linreg_inverse_xtx as ( select {{ dbt_linreg._gb_cols(group_by, trailing_comma=True) | indent(4) }} {%- for i, j in modules.itertools.combinations_with_replacement(xcols, 2) %} - {%- set upto = (xcols | length) %} {%- if not add_constant %} {%- set upto = upto + 1 %} {%- endif %} @@ -287,7 +295,7 @@ _dbt_linreg_stderrs as ( select {{ dbt_linreg._gb_cols(group_by, trailing_comma=True, prefix='b') | indent(4) }} {%- for x in xcols %} - sqrt(inv_x{{ x }}x{{ x }} * resid_square_mean * n / (n - {{ xcols | length }})) as x{{ x }}_stderr + sqrt(inv_x{{ x }}x{{ x }} * resid_square_mean * n / (n - {{ upto }})) as x{{ x }}_stderr {%- if not loop.last -%} , {%- endif %} diff --git a/macros/linear_regression/ols_impl_fwl/_ols_impl_fwl.sql b/macros/linear_regression/ols_impl_fwl/_ols_impl_fwl.sql index 755e693..59c24ff 100644 --- a/macros/linear_regression/ols_impl_fwl/_ols_impl_fwl.sql +++ b/macros/linear_regression/ols_impl_fwl/_ols_impl_fwl.sql @@ -80,7 +80,7 @@ Doing this keeps the compiled SQL cleaner, and for large regressions can slightly improve the query planner speed (albeit not the execution). #} {% macro default___regress_or_alias(y, x, add_constant=True) %} - {{ return(regress(y, x, add_constant=add_constant)) }} + {{ return(dbt_linreg.regress(y, x, add_constant=add_constant)) }} {% endmacro %} {% macro snowflake___regress_or_alias(y, x, add_constant=True) %} diff --git a/macros/linear_regression/utils.sql b/macros/linear_regression/utils.sql index e3f66eb..d5cbaab 100644 --- a/macros/linear_regression/utils.sql +++ b/macros/linear_regression/utils.sql @@ -43,7 +43,7 @@ {%- if add_constant %} select {{ dbt_linreg._unalias_gb_cols(group_by, prefix='b') | indent(2) }} - '{{ format_options.get('constant_name', 'const') }}' as {{ format_options.get('variable_column_name', 'variable_name') }}, + {{ dbt.string_literal(format_options.get('constant_name', 'const')) }} as {{ format_options.get('variable_column_name', 'variable_name') }}, {{ dbt_linreg._maybe_round('x0_coef', format_options.get('round')) }} as {{ format_options.get('coefficient_column_name', 'coefficient') }}{% if calculate_standard_error %}, {{ dbt_linreg._maybe_round('x0_stderr', format_options.get('round')) }} as {{ format_options.get('standard_error_column_name', 'standard_error') }}, {{ dbt_linreg._maybe_round('x0_coef/x0_stderr', format_options.get('round')) }} as {{ format_options.get('t_statistic_column_name', 't_statistic') }} @@ -59,7 +59,7 @@ union all {%- for i in exog_aliased %} select {{ dbt_linreg._unalias_gb_cols(group_by, prefix='b') | indent(2) }} - '{{ dbt_linreg._strip_quotes(exog[loop.index0], format_options) }}' as {{ format_options.get('variable_column_name', 'variable_name') }}, + {{ dbt.string_literal(dbt_linreg._strip_quotes(exog[loop.index0], format_options)) }} as {{ format_options.get('variable_column_name', 'variable_name') }}, {{ dbt_linreg._maybe_round(i~'_coef', format_options.get('round')) }} as {{ format_options.get('coefficient_column_name', 'coefficient') }}{% if calculate_standard_error %}, {{ dbt_linreg._maybe_round(i~'_stderr', format_options.get('round')) }} as {{ format_options.get('standard_error_column_name', 'standard_error') }}, {{ dbt_linreg._maybe_round(i~'_coef/'~i~'_stderr', format_options.get('round')) }} as {{ format_options.get('t_statistic_column_name', 't_statistic') }} @@ -154,12 +154,27 @@ gb{{ loop.index }} as {{ gb }}, {# Round the final coefficient if the user specifies the `round` format option. Otherwise, keep as is. #} + {% macro _maybe_round(x, round_) %} -{% if round_ is not none %} - {{ return('round(' ~ x ~ ', ' ~ round_ ~ ')') }} -{% else %} - {{ return(x) }} -{% endif %} + {{ return( + adapter.dispatch('_maybe_round', 'dbt_linreg')(x, round_) + ) }} +{% endmacro %} + +{% macro default___maybe_round(x, round_) %} + {% if round_ is not none %} + {{ return('round(' ~ x ~ ', ' ~ round_ ~ ')') }} + {% else %} + {{ return(x) }} + {% endif %} +{% endmacro %} + +{% macro postgres___maybe_round(x, round_) %} + {% if round_ is not none %} + {{ return('round((' ~ x ~ ')::numeric, ' ~ round_ ~ ')') }} + {% else %} + {{ return('(' ~ x ~ ')::numeric') }} + {% endif %} {% endmacro %} {# Alias and write group by columns in a standard way. #} diff --git a/macros/schema.yml b/macros/schema.yml index 79b69f2..e31f3d7 100644 --- a/macros/schema.yml +++ b/macros/schema.yml @@ -28,6 +28,8 @@ macros: ``` {% endraw %} + The macro renders a subquery; in some database engines, such as Postgres, it is required to alias all subqueries. + Please see the README / full documentation for more information: [https://dwreeves.github.io/dbt_linreg/](https://dwreeves.github.io/dbt_linreg/) arguments: - name: table diff --git a/poetry.lock b/poetry.lock index beee4e1..7214cd0 100644 --- a/poetry.lock +++ b/poetry.lock @@ -338,14 +338,14 @@ toml = ["tomli"] [[package]] name = "dbt-core" -version = "1.6.0" +version = "1.6.6" description = "With dbt, data analysts and engineers can build analytics the way engineers build applications." category = "main" optional = false python-versions = ">=3.8" files = [ - {file = "dbt-core-1.6.0.tar.gz", hash = "sha256:51da239af02ed449562d3c5caea903edee7b5427c47efd5a6439634c59858087"}, - {file = "dbt_core-1.6.0-py3-none-any.whl", hash = "sha256:494018fef1254e6169f754c9ea32986d3851f1e5a8214d02f4e75fdd4a6c6c50"}, + {file = "dbt-core-1.6.6.tar.gz", hash = "sha256:6a38a5ac0dcbff59a3b388bc76cec301a6fc0f076694c31d878ffb0dc89b5c7d"}, + {file = "dbt_core-1.6.6-py3-none-any.whl", hash = "sha256:4ae40ae4c663898b76bb14d053daed256847da54d0d496a943811bc10133a557"}, ] [package.dependencies] @@ -418,6 +418,23 @@ files = [ {file = "dbt_extractor-0.4.1.tar.gz", hash = "sha256:75b1c665699ec0f1ffce1ba3d776f7dfce802156f22e70a7b9c8f0b4d7e80f42"}, ] +[[package]] +name = "dbt-postgres" +version = "1.6.6" +description = "The postgres adapter plugin for dbt (data build tool)" +category = "main" +optional = false +python-versions = ">=3.8" +files = [ + {file = "dbt-postgres-1.6.6.tar.gz", hash = "sha256:e87d417c4b7abc84fbedbca78ff53661eb10f041e2a93bc0d692a196015d632f"}, + {file = "dbt_postgres-1.6.6-py3-none-any.whl", hash = "sha256:a5d9c43e402f438a8353781f616e922a5d2d4800a10261057872c19cffa15d32"}, +] + +[package.dependencies] +agate = "*" +dbt-core = "1.6.6" +psycopg2-binary = ">=2.8,<3.0" + [[package]] name = "dbt-semantic-interfaces" version = "0.2.0" @@ -1205,6 +1222,87 @@ files = [ {file = "protobuf-4.24.0.tar.gz", hash = "sha256:5d0ceb9de6e08311832169e601d1fc71bd8e8c779f3ee38a97a78554945ecb85"}, ] +[[package]] +name = "psycopg2-binary" +version = "2.9.9" +description = "psycopg2 - Python-PostgreSQL Database Adapter" +category = "main" +optional = false +python-versions = ">=3.7" +files = [ + {file = "psycopg2-binary-2.9.9.tar.gz", hash = "sha256:7f01846810177d829c7692f1f5ada8096762d9172af1b1a28d4ab5b77c923c1c"}, + {file = "psycopg2_binary-2.9.9-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:c2470da5418b76232f02a2fcd2229537bb2d5a7096674ce61859c3229f2eb202"}, + {file = "psycopg2_binary-2.9.9-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:c6af2a6d4b7ee9615cbb162b0738f6e1fd1f5c3eda7e5da17861eacf4c717ea7"}, + {file = "psycopg2_binary-2.9.9-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:75723c3c0fbbf34350b46a3199eb50638ab22a0228f93fb472ef4d9becc2382b"}, + {file = "psycopg2_binary-2.9.9-cp310-cp310-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:83791a65b51ad6ee6cf0845634859d69a038ea9b03d7b26e703f94c7e93dbcf9"}, + {file = "psycopg2_binary-2.9.9-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:0ef4854e82c09e84cc63084a9e4ccd6d9b154f1dbdd283efb92ecd0b5e2b8c84"}, + {file = "psycopg2_binary-2.9.9-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ed1184ab8f113e8d660ce49a56390ca181f2981066acc27cf637d5c1e10ce46e"}, + {file = "psycopg2_binary-2.9.9-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:d2997c458c690ec2bc6b0b7ecbafd02b029b7b4283078d3b32a852a7ce3ddd98"}, + {file = "psycopg2_binary-2.9.9-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:b58b4710c7f4161b5e9dcbe73bb7c62d65670a87df7bcce9e1faaad43e715245"}, + {file = "psycopg2_binary-2.9.9-cp310-cp310-musllinux_1_1_ppc64le.whl", hash = "sha256:0c009475ee389757e6e34611d75f6e4f05f0cf5ebb76c6037508318e1a1e0d7e"}, + {file = "psycopg2_binary-2.9.9-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:8dbf6d1bc73f1d04ec1734bae3b4fb0ee3cb2a493d35ede9badbeb901fb40f6f"}, + {file = "psycopg2_binary-2.9.9-cp310-cp310-win32.whl", hash = "sha256:3f78fd71c4f43a13d342be74ebbc0666fe1f555b8837eb113cb7416856c79682"}, + {file = "psycopg2_binary-2.9.9-cp310-cp310-win_amd64.whl", hash = "sha256:876801744b0dee379e4e3c38b76fc89f88834bb15bf92ee07d94acd06ec890a0"}, + {file = "psycopg2_binary-2.9.9-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:ee825e70b1a209475622f7f7b776785bd68f34af6e7a46e2e42f27b659b5bc26"}, + {file = "psycopg2_binary-2.9.9-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:1ea665f8ce695bcc37a90ee52de7a7980be5161375d42a0b6c6abedbf0d81f0f"}, + {file = "psycopg2_binary-2.9.9-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:143072318f793f53819048fdfe30c321890af0c3ec7cb1dfc9cc87aa88241de2"}, + {file = "psycopg2_binary-2.9.9-cp311-cp311-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:c332c8d69fb64979ebf76613c66b985414927a40f8defa16cf1bc028b7b0a7b0"}, + {file = "psycopg2_binary-2.9.9-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:f7fc5a5acafb7d6ccca13bfa8c90f8c51f13d8fb87d95656d3950f0158d3ce53"}, + {file = "psycopg2_binary-2.9.9-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:977646e05232579d2e7b9c59e21dbe5261f403a88417f6a6512e70d3f8a046be"}, + {file = "psycopg2_binary-2.9.9-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:b6356793b84728d9d50ead16ab43c187673831e9d4019013f1402c41b1db9b27"}, + {file = "psycopg2_binary-2.9.9-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:bc7bb56d04601d443f24094e9e31ae6deec9ccb23581f75343feebaf30423359"}, + {file = "psycopg2_binary-2.9.9-cp311-cp311-musllinux_1_1_ppc64le.whl", hash = "sha256:77853062a2c45be16fd6b8d6de2a99278ee1d985a7bd8b103e97e41c034006d2"}, + {file = "psycopg2_binary-2.9.9-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:78151aa3ec21dccd5cdef6c74c3e73386dcdfaf19bced944169697d7ac7482fc"}, + {file = "psycopg2_binary-2.9.9-cp311-cp311-win32.whl", hash = "sha256:dc4926288b2a3e9fd7b50dc6a1909a13bbdadfc67d93f3374d984e56f885579d"}, + {file = "psycopg2_binary-2.9.9-cp311-cp311-win_amd64.whl", hash = "sha256:b76bedd166805480ab069612119ea636f5ab8f8771e640ae103e05a4aae3e417"}, + {file = "psycopg2_binary-2.9.9-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:8532fd6e6e2dc57bcb3bc90b079c60de896d2128c5d9d6f24a63875a95a088cf"}, + {file = "psycopg2_binary-2.9.9-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:8f8544b092a29a6ddd72f3556a9fcf249ec412e10ad28be6a0c0d948924f2212"}, + {file = "psycopg2_binary-2.9.9-cp312-cp312-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:2d423c8d8a3c82d08fe8af900ad5b613ce3632a1249fd6a223941d0735fce493"}, + {file = "psycopg2_binary-2.9.9-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:2e5afae772c00980525f6d6ecf7cbca55676296b580c0e6abb407f15f3706996"}, + {file = "psycopg2_binary-2.9.9-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6e6f98446430fdf41bd36d4faa6cb409f5140c1c2cf58ce0bbdaf16af7d3f119"}, + {file = "psycopg2_binary-2.9.9-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:c77e3d1862452565875eb31bdb45ac62502feabbd53429fdc39a1cc341d681ba"}, + {file = "psycopg2_binary-2.9.9-cp312-cp312-musllinux_1_1_i686.whl", hash = "sha256:cb16c65dcb648d0a43a2521f2f0a2300f40639f6f8c1ecbc662141e4e3e1ee07"}, + {file = "psycopg2_binary-2.9.9-cp312-cp312-musllinux_1_1_ppc64le.whl", hash = "sha256:911dda9c487075abd54e644ccdf5e5c16773470a6a5d3826fda76699410066fb"}, + {file = "psycopg2_binary-2.9.9-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:57fede879f08d23c85140a360c6a77709113efd1c993923c59fde17aa27599fe"}, + {file = "psycopg2_binary-2.9.9-cp312-cp312-win32.whl", hash = "sha256:64cf30263844fa208851ebb13b0732ce674d8ec6a0c86a4e160495d299ba3c93"}, + {file = "psycopg2_binary-2.9.9-cp312-cp312-win_amd64.whl", hash = "sha256:81ff62668af011f9a48787564ab7eded4e9fb17a4a6a74af5ffa6a457400d2ab"}, + {file = "psycopg2_binary-2.9.9-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:2293b001e319ab0d869d660a704942c9e2cce19745262a8aba2115ef41a0a42a"}, + {file = "psycopg2_binary-2.9.9-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:03ef7df18daf2c4c07e2695e8cfd5ee7f748a1d54d802330985a78d2a5a6dca9"}, + {file = "psycopg2_binary-2.9.9-cp37-cp37m-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:0a602ea5aff39bb9fac6308e9c9d82b9a35c2bf288e184a816002c9fae930b77"}, + {file = "psycopg2_binary-2.9.9-cp37-cp37m-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:8359bf4791968c5a78c56103702000105501adb557f3cf772b2c207284273984"}, + {file = "psycopg2_binary-2.9.9-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:275ff571376626195ab95a746e6a04c7df8ea34638b99fc11160de91f2fef503"}, + {file = "psycopg2_binary-2.9.9-cp37-cp37m-musllinux_1_1_aarch64.whl", hash = "sha256:f9b5571d33660d5009a8b3c25dc1db560206e2d2f89d3df1cb32d72c0d117d52"}, + {file = "psycopg2_binary-2.9.9-cp37-cp37m-musllinux_1_1_i686.whl", hash = "sha256:420f9bbf47a02616e8554e825208cb947969451978dceb77f95ad09c37791dae"}, + {file = "psycopg2_binary-2.9.9-cp37-cp37m-musllinux_1_1_ppc64le.whl", hash = "sha256:4154ad09dac630a0f13f37b583eae260c6aa885d67dfbccb5b02c33f31a6d420"}, + {file = "psycopg2_binary-2.9.9-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:a148c5d507bb9b4f2030a2025c545fccb0e1ef317393eaba42e7eabd28eb6041"}, + {file = "psycopg2_binary-2.9.9-cp37-cp37m-win32.whl", hash = "sha256:68fc1f1ba168724771e38bee37d940d2865cb0f562380a1fb1ffb428b75cb692"}, + {file = "psycopg2_binary-2.9.9-cp37-cp37m-win_amd64.whl", hash = "sha256:281309265596e388ef483250db3640e5f414168c5a67e9c665cafce9492eda2f"}, + {file = "psycopg2_binary-2.9.9-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:60989127da422b74a04345096c10d416c2b41bd7bf2a380eb541059e4e999980"}, + {file = "psycopg2_binary-2.9.9-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:246b123cc54bb5361588acc54218c8c9fb73068bf227a4a531d8ed56fa3ca7d6"}, + {file = "psycopg2_binary-2.9.9-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:34eccd14566f8fe14b2b95bb13b11572f7c7d5c36da61caf414d23b91fcc5d94"}, + {file = "psycopg2_binary-2.9.9-cp38-cp38-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:18d0ef97766055fec15b5de2c06dd8e7654705ce3e5e5eed3b6651a1d2a9a152"}, + {file = "psycopg2_binary-2.9.9-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:d3f82c171b4ccd83bbaf35aa05e44e690113bd4f3b7b6cc54d2219b132f3ae55"}, + {file = "psycopg2_binary-2.9.9-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ead20f7913a9c1e894aebe47cccf9dc834e1618b7aa96155d2091a626e59c972"}, + {file = "psycopg2_binary-2.9.9-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:ca49a8119c6cbd77375ae303b0cfd8c11f011abbbd64601167ecca18a87e7cdd"}, + {file = "psycopg2_binary-2.9.9-cp38-cp38-musllinux_1_1_i686.whl", hash = "sha256:323ba25b92454adb36fa425dc5cf6f8f19f78948cbad2e7bc6cdf7b0d7982e59"}, + {file = "psycopg2_binary-2.9.9-cp38-cp38-musllinux_1_1_ppc64le.whl", hash = "sha256:1236ed0952fbd919c100bc839eaa4a39ebc397ed1c08a97fc45fee2a595aa1b3"}, + {file = "psycopg2_binary-2.9.9-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:729177eaf0aefca0994ce4cffe96ad3c75e377c7b6f4efa59ebf003b6d398716"}, + {file = "psycopg2_binary-2.9.9-cp38-cp38-win32.whl", hash = "sha256:804d99b24ad523a1fe18cc707bf741670332f7c7412e9d49cb5eab67e886b9b5"}, + {file = "psycopg2_binary-2.9.9-cp38-cp38-win_amd64.whl", hash = "sha256:a6cdcc3ede532f4a4b96000b6362099591ab4a3e913d70bcbac2b56c872446f7"}, + {file = "psycopg2_binary-2.9.9-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:72dffbd8b4194858d0941062a9766f8297e8868e1dd07a7b36212aaa90f49472"}, + {file = "psycopg2_binary-2.9.9-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:30dcc86377618a4c8f3b72418df92e77be4254d8f89f14b8e8f57d6d43603c0f"}, + {file = "psycopg2_binary-2.9.9-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:31a34c508c003a4347d389a9e6fcc2307cc2150eb516462a7a17512130de109e"}, + {file = "psycopg2_binary-2.9.9-cp39-cp39-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:15208be1c50b99203fe88d15695f22a5bed95ab3f84354c494bcb1d08557df67"}, + {file = "psycopg2_binary-2.9.9-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:1873aade94b74715be2246321c8650cabf5a0d098a95bab81145ffffa4c13876"}, + {file = "psycopg2_binary-2.9.9-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3a58c98a7e9c021f357348867f537017057c2ed7f77337fd914d0bedb35dace7"}, + {file = "psycopg2_binary-2.9.9-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:4686818798f9194d03c9129a4d9a702d9e113a89cb03bffe08c6cf799e053291"}, + {file = "psycopg2_binary-2.9.9-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:ebdc36bea43063116f0486869652cb2ed7032dbc59fbcb4445c4862b5c1ecf7f"}, + {file = "psycopg2_binary-2.9.9-cp39-cp39-musllinux_1_1_ppc64le.whl", hash = "sha256:ca08decd2697fdea0aea364b370b1249d47336aec935f87b8bbfd7da5b2ee9c1"}, + {file = "psycopg2_binary-2.9.9-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:ac05fb791acf5e1a3e39402641827780fe44d27e72567a000412c648a85ba860"}, + {file = "psycopg2_binary-2.9.9-cp39-cp39-win32.whl", hash = "sha256:9dba73be7305b399924709b91682299794887cbbd88e38226ed9f6712eabee90"}, + {file = "psycopg2_binary-2.9.9-cp39-cp39-win_amd64.whl", hash = "sha256:f7ae5d65ccfbebdfa761585228eb4d0df3a8b15cfb53bd953e713e09fbb12957"}, +] + [[package]] name = "pycparser" version = "2.21" @@ -1809,4 +1907,4 @@ testing = ["big-O", "jaraco.functools", "jaraco.itertools", "more-itertools", "p [metadata] lock-version = "2.0" python-versions = "^3.10" -content-hash = "23d01c70dc58530b7335d6390a0bcbd43ce43b4ba457c80c6e04fe8f56033c96" +content-hash = "2d824f9fbc44cc3746f30e66e6781b42c970bf277c6e10581eda4bc980d05466" diff --git a/pyproject.toml b/pyproject.toml index f66c392..29747fc 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -5,7 +5,7 @@ build-backend = "poetry.core.masonry.api" [tool.poetry] name = "dbt_linreg" description = "" -version = "0.2.1" +version = "0.2.3" authors = ["Daniel Reeves"] [tool.poetry.dependencies] @@ -24,6 +24,7 @@ rich_click = "*" ruff = "*" statsmodels = "*" tabulate = "*" +dbt-postgres = "^1.6.6" [tool.ruff] line-length = 100 diff --git a/run b/run index 010e263..3562fc6 100755 --- a/run +++ b/run @@ -2,26 +2,48 @@ set -eo pipefail +if [ -f .env ]; then + # shellcheck disable=SC2002,SC2046 + export $(cat .env | xargs) +fi + function setup { poetry install poetry run pre-commit install } -function testloc { - # rm -f integration_tests/dbt.duckdb - export DBT_PROFILES_DIR=./integration_tests/profiles - poetry run dbt deps --project-dir ./integration_tests - # poetry run dbt compile --project-dir ./integration_tests --select tag:perftest - poetry run dbt run --project-dir ./integration_tests --select tag:perftest -} - function test { - rm -f dbt.duckdb - poetry run python scripts.py gen-test-cases # --skip-if-exists - poetry run dbt deps --project-dir ./integration_tests --profiles-dir ./integration_tests/profiles - poetry run dbt seed --project-dir ./integration_tests --profiles-dir ./integration_tests/profiles - poetry run dbt run --project-dir ./integration_tests --profiles-dir ./integration_tests/profiles - poetry run dbt test --project-dir ./integration_tests --profiles-dir ./integration_tests/profiles + local target="${1-"dbt-duckdb"}" + + if [ -z "${GITHUB_ACTIONS}" ] && [ "${target}" = "dbt-postgres" ]; + then + createdb "${POSTGRES_DB-"dbt_linreg"}" || true + fi + + if [ -z "${GITHUB_ACTIONS}" ] && [ "${target}" = "dbt-duckdb" ]; + then + rm -f dbt.duckdb + fi + + poetry run python scripts.py gen-test-cases --skip-if-exists + poetry run dbt deps \ + --project-dir ./integration_tests \ + --profiles-dir ./integration_tests/profiles \ + --target "${target}" + poetry run dbt seed \ + --project-dir ./integration_tests \ + --profiles-dir ./integration_tests/profiles \ + --target "${target}" + poetry run dbt run \ + --project-dir ./integration_tests \ + --profiles-dir ./integration_tests/profiles \ + --target "${target}" \ + --selector "${target}-selector" + poetry run dbt test \ + --project-dir ./integration_tests \ + --profiles-dir ./integration_tests/profiles \ + --target "${target}" \ + --selector "${target}-selector" } function lint {