From 4950ce71b2139f88c3b0005ea4cab5068756b372 Mon Sep 17 00:00:00 2001 From: Damian Owsianny Date: Wed, 13 Sep 2023 11:51:23 +0200 Subject: [PATCH] Add support for Trino --- .circleci/config.yml | 15 +++++++++++++++ .github/ISSUE_TEMPLATE/bug_report.md | 1 + .github/pull_request_template.md | 1 + README.md | 1 + dbt_project.yml | 4 ++-- docs/index.md | 1 + integration_tests/ci/sample.profiles.yml | 15 ++++++++++++++- integration_tests/seeds/docs/docs_seeds.yml | 8 ++++---- integration_tests/seeds/tests/tests_seeds.yml | 10 +++++----- macros/recursive_dag.sql | 13 ++++++++++--- models/marts/dag/fct_duplicate_sources.sql | 2 +- models/marts/dag/fct_model_fanout.sql | 2 +- models/marts/dag/fct_multiple_sources_joined.sql | 2 +- models/marts/dag/fct_source_fanout.sql | 2 +- .../documentation/fct_documentation_coverage.sql | 4 ++-- .../structure/fct_model_naming_conventions.sql | 2 +- models/marts/tests/fct_test_coverage.sql | 4 ++-- seeds/seeds.yml | 8 ++++---- 18 files changed, 67 insertions(+), 28 deletions(-) diff --git a/.circleci/config.yml b/.circleci/config.yml index b1e80893..19147aca 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -82,6 +82,17 @@ jobs: - store_artifacts: path: ./integration_tests/logs + integration-trino: + docker: + - image: cimg/python:3.9.9 + steps: + - checkout + - run: + name: "Run Tests - Trino" + command: ./run_test.sh trino + - store_artifacts: + path: ./integration_tests/logs + workflows: version: 2 test-all: @@ -104,3 +115,7 @@ workflows: context: profile-databricks requires: - integration-postgres + - integration-trino: + context: profile-trino + requires: + - integration-postgres diff --git a/.github/ISSUE_TEMPLATE/bug_report.md b/.github/ISSUE_TEMPLATE/bug_report.md index a1e6bd36..9d19d5c2 100644 --- a/.github/ISSUE_TEMPLATE/bug_report.md +++ b/.github/ISSUE_TEMPLATE/bug_report.md @@ -40,6 +40,7 @@ If applicable, add screenshots or log output to help explain your problem. - [ ] redshift - [ ] bigquery - [ ] snowflake +- [ ] trino/starburst - [ ] other (specify: ____________) diff --git a/.github/pull_request_template.md b/.github/pull_request_template.md index bf2878fd..56682e16 100644 --- a/.github/pull_request_template.md +++ b/.github/pull_request_template.md @@ -28,5 +28,6 @@ Screenshot of passing integration tests locally - [ ] Snowflake - [ ] Databricks - [ ] DuckDB + - [ ] Trino/Starburst - [ ] I have updated the README.md (if applicable) - [ ] I have added tests & descriptions to my models (and macros if applicable) \ No newline at end of file diff --git a/README.md b/README.md index 47ea620e..2f247e5b 100644 --- a/README.md +++ b/README.md @@ -20,6 +20,7 @@ Currently, the following adapters are supported: - Redshift - Snowflake - DuckDB +- Trino (tested with Iceberg connector) ## Using This Package diff --git a/dbt_project.yml b/dbt_project.yml index e66cfcbf..19a8b730 100644 --- a/dbt_project.yml +++ b/dbt_project.yml @@ -79,8 +79,8 @@ vars: other_prefixes: ['rpt_'] # -- Performance variables -- - chained_views_threshold: 5 + chained_views_threshold: "{{ 5 if target.type != 'trino' else 4 }}" # -- Execution variables -- insert_batch_size: "{{ 500 if target.type == 'bigquery' else 10000 }}" - max_depth_dag: "{{ 9 if target.type in ['bigquery', 'spark', 'databricks'] else -1 }}" + max_depth_dag: "{{ 9 if target.type in ['bigquery', 'spark', 'databricks'] else 4 if target.type == 'trino' else -1 }}" diff --git a/docs/index.md b/docs/index.md index a01c80dc..da818c86 100644 --- a/docs/index.md +++ b/docs/index.md @@ -21,6 +21,7 @@ Currently, the following adapters are supported: - Redshift - Snowflake - DuckDB +- Trino (tested with Iceberg connector) ## Using This Package diff --git a/integration_tests/ci/sample.profiles.yml b/integration_tests/ci/sample.profiles.yml index 2fd230c2..183a3e4d 100644 --- a/integration_tests/ci/sample.profiles.yml +++ b/integration_tests/ci/sample.profiles.yml @@ -58,4 +58,17 @@ integration_tests: duckdb: type: duckdb - path: ./duck.db \ No newline at end of file + path: ./duck.db + + trino: + type: trino + host: "{{ env_var('TRINO_TEST_HOST') }}" + port: "{{ env_var('TRINO_TEST_PORT') | as_number }}" + method: ldap + user: "{{ env_var('TRINO_TEST_USER') }}" + password: "{{ env_var('TRINO_TEST_PASS') }}" + catalog: "{{ env_var('TRINO_TEST_CATALOG_NAME') }}" + schema: dbt_project_evaluator_integration_tests_trino + threads: 5 + session_properties: + query_max_stage_count: 200 diff --git a/integration_tests/seeds/docs/docs_seeds.yml b/integration_tests/seeds/docs/docs_seeds.yml index 643b0e08..25b5e75d 100644 --- a/integration_tests/seeds/docs/docs_seeds.yml +++ b/integration_tests/seeds/docs/docs_seeds.yml @@ -13,10 +13,10 @@ seeds: - name: test_fct_documentation_coverage config: column_types: - staging_documentation_coverage_pct: "{{ 'float' if target.type not in ['spark','databricks','duckdb'] else 'decimal(10,2)' }}" - intermediate_documentation_coverage_pct: "{{ 'float' if target.type not in ['spark','databricks','duckdb'] else 'decimal(10,2)' }}" - marts_documentation_coverage_pct: "{{ 'float' if target.type not in ['spark','databricks','duckdb'] else 'decimal(10,2)' }}" - other_documentation_coverage_pct: "{{ 'float' if target.type not in ['spark','databricks','duckdb'] else 'decimal(10,2)' }}" + staging_documentation_coverage_pct: "{{ 'float' if target.type not in ['spark','databricks','duckdb','trino'] else 'decimal(10,2)' }}" + intermediate_documentation_coverage_pct: "{{ 'float' if target.type not in ['spark','databricks','duckdb','trino'] else 'decimal(10,2)' }}" + marts_documentation_coverage_pct: "{{ 'float' if target.type not in ['spark','databricks','duckdb','trino'] else 'decimal(10,2)' }}" + other_documentation_coverage_pct: "{{ 'float' if target.type not in ['spark','databricks','duckdb','trino'] else 'decimal(10,2)' }}" tags: - docs tests: diff --git a/integration_tests/seeds/tests/tests_seeds.yml b/integration_tests/seeds/tests/tests_seeds.yml index 9d3545a5..1f86ab85 100644 --- a/integration_tests/seeds/tests/tests_seeds.yml +++ b/integration_tests/seeds/tests/tests_seeds.yml @@ -10,11 +10,11 @@ seeds: - name: test_fct_test_coverage config: column_types: - test_coverage_pct: float - staging_test_coverage_pct: float - intermediate_test_coverage_pct: float - marts_test_coverage_pct: float - other_test_coverage_pct: float + test_coverage_pct: "{{ 'float' if target.type != 'trino' else 'double' }}" + staging_test_coverage_pct: "{{ 'float' if target.type != 'trino' else 'double' }}" + intermediate_test_coverage_pct: "{{ 'float' if target.type != 'trino' else 'double' }}" + marts_test_coverage_pct: "{{ 'float' if target.type != 'trino' else 'double' }}" + other_test_coverage_pct: "{{ 'float' if target.type != 'trino' else 'double' }}" tests: - dbt_utils.equality: name: equality_fct_test_coverage diff --git a/macros/recursive_dag.sql b/macros/recursive_dag.sql index e8772b6c..f0911da9 100644 --- a/macros/recursive_dag.sql +++ b/macros/recursive_dag.sql @@ -120,7 +120,7 @@ all_relationships ( on all_relationships.child_id = direct_relationships.direct_parent_id {% if var('max_depth_dag') | int > 0 %} - {% if var('max_depth_dag') | int < 2 or var('max_depth_dag') | int < var('chained_views_threshold')%} + {% if var('max_depth_dag') | int < 2 or var('max_depth_dag') | int < var('chained_views_threshold') | int %} {% do exceptions.raise_compiler_error( 'Variable max_depth_dag must be at least 2 and must be greater or equal to than chained_views_threshold.' ) %} @@ -138,7 +138,7 @@ all_relationships ( -- as of Feb 2022 BigQuery doesn't support with recursive in the same way as other DWs {% set max_depth = var('max_depth_dag') | int %} -{% if max_depth < 2 or max_depth < var('chained_views_threshold') %} +{% if max_depth < 2 or max_depth < var('chained_views_threshold') | int %} {% do exceptions.raise_compiler_error( 'Variable max_depth_dag must be at least 2 and must be greater or equal to than chained_views_threshold.' ) %} @@ -260,4 +260,11 @@ with direct_relationships as ( {% macro spark__recursive_dag() %} -- as of June 2022 databricks SQL doesn't support "with recursive" in the same way as other DWs {{ return(bigquery__recursive_dag()) }} -{% endmacro %} \ No newline at end of file +{% endmacro %} + + +{% macro trino__recursive_dag() %} +{#-- Although Trino supports a recursive WITH-queries, +-- it is less performant than creating CTEs with loops and unioning them --#} + {{ return(bigquery__recursive_dag()) }} +{% endmacro %} diff --git a/models/marts/dag/fct_duplicate_sources.sql b/models/marts/dag/fct_duplicate_sources.sql index b0fd5d54..0168649a 100644 --- a/models/marts/dag/fct_duplicate_sources.sql +++ b/models/marts/dag/fct_duplicate_sources.sql @@ -19,7 +19,7 @@ source_duplicates as ( {{ dbt.listagg( measure = 'resource_name', delimiter_text = "', '", - order_by_clause = 'order by resource_name' if target.type in ['snowflake','redshift','duckdb']) + order_by_clause = 'order by resource_name' if target.type in ['snowflake','redshift','duckdb','trino']) }} as source_names from sources group by source_db_location diff --git a/models/marts/dag/fct_model_fanout.sql b/models/marts/dag/fct_model_fanout.sql index 8688ddc8..72285d17 100644 --- a/models/marts/dag/fct_model_fanout.sql +++ b/models/marts/dag/fct_model_fanout.sql @@ -39,7 +39,7 @@ model_fanout_agg as ( {{ dbt.listagg( measure = 'child', delimiter_text = "', '", - order_by_clause = 'order by child' if target.type in ['snowflake','redshift','duckdb']) + order_by_clause = 'order by child' if target.type in ['snowflake','redshift','duckdb','trino']) }} as leaf_children from model_fanout group by 1, 2 diff --git a/models/marts/dag/fct_multiple_sources_joined.sql b/models/marts/dag/fct_multiple_sources_joined.sql index 1a6c877a..7bf8b812 100644 --- a/models/marts/dag/fct_multiple_sources_joined.sql +++ b/models/marts/dag/fct_multiple_sources_joined.sql @@ -18,7 +18,7 @@ multiple_sources_joined as ( {{ dbt.listagg( measure='parent', delimiter_text="', '", - order_by_clause='order by parent' if target.type in ['snowflake','redshift','duckdb']) + order_by_clause='order by parent' if target.type in ['snowflake','redshift','duckdb','trino']) }} as source_parents from direct_source_relationships group by 1 diff --git a/models/marts/dag/fct_source_fanout.sql b/models/marts/dag/fct_source_fanout.sql index 4c4eda99..3ec34c9f 100644 --- a/models/marts/dag/fct_source_fanout.sql +++ b/models/marts/dag/fct_source_fanout.sql @@ -18,7 +18,7 @@ source_fanout as ( {{ dbt.listagg( measure='child', delimiter_text="', '", - order_by_clause='order by child' if target.type in ['snowflake','redshift','duckdb']) + order_by_clause='order by child' if target.type in ['snowflake','redshift','duckdb','trino']) }} as model_children from direct_source_relationships group by 1 diff --git a/models/marts/documentation/fct_documentation_coverage.sql b/models/marts/documentation/fct_documentation_coverage.sql index 39dd9ce3..d5bfbfd3 100644 --- a/models/marts/documentation/fct_documentation_coverage.sql +++ b/models/marts/documentation/fct_documentation_coverage.sql @@ -20,10 +20,10 @@ conversion as ( final as ( select - current_timestamp as measured_at, + {{ 'current_timestamp' if target.type != 'trino' else 'current_timestamp(6)' }} as measured_at, count(*) as total_models, sum(is_described_model) as documented_models, - round(sum(is_described_model) * 100.0 / count(*), 2) as documentation_coverage_pct, + round(sum(is_described_model) * 100.00 / count(*), 2) as documentation_coverage_pct, {% for model_type in var('model_types') %} round( {{ dbt_utils.safe_divide( diff --git a/models/marts/structure/fct_model_naming_conventions.sql b/models/marts/structure/fct_model_naming_conventions.sql index d12b7c6a..a3d7403d 100644 --- a/models/marts/structure/fct_model_naming_conventions.sql +++ b/models/marts/structure/fct_model_naming_conventions.sql @@ -20,7 +20,7 @@ appropriate_prefixes as ( {{ dbt.listagg( measure='prefix_value', delimiter_text="', '", - order_by_clause='order by prefix_value' if target.type in ['snowflake','redshift','duckdb']) + order_by_clause='order by prefix_value' if target.type in ['snowflake','redshift','duckdb','trino']) }} as appropriate_prefixes from naming_convention_prefixes group by model_type diff --git a/models/marts/tests/fct_test_coverage.sql b/models/marts/tests/fct_test_coverage.sql index 504aa22a..70a2e2f0 100644 --- a/models/marts/tests/fct_test_coverage.sql +++ b/models/marts/tests/fct_test_coverage.sql @@ -19,7 +19,7 @@ conversion as ( final as ( select - current_timestamp as measured_at, + {{ 'current_timestamp' if target.type != 'trino' else 'current_timestamp(6)' }} as measured_at, count(*) as total_models, sum(number_of_tests_on_model) as total_tests, sum(is_tested_model) as tested_models, @@ -32,7 +32,7 @@ final as ( ) }} , 2) as {{ model_type }}_test_coverage_pct, {% endfor %} - round(sum(number_of_tests_on_model) * 1.0 / count(*), 4) as test_to_model_ratio + round(sum(number_of_tests_on_model) * 1.0000 / count(*), 4) as test_to_model_ratio from test_counts left join conversion diff --git a/seeds/seeds.yml b/seeds/seeds.yml index 1570bed0..a5a28535 100644 --- a/seeds/seeds.yml +++ b/seeds/seeds.yml @@ -6,10 +6,10 @@ seeds: config: column_types: - fct_name: "{{ 'varchar' if target.type in ['redshift', 'postgres', 'snowflake'] else 'string' }}" - column_name: "{{ 'varchar' if target.type in ['redshift', 'postgres', 'snowflake'] else 'string' }}" - id_to_exclude: "{{ 'varchar' if target.type in ['redshift', 'postgres', 'snowflake'] else 'string' }}" - comment: "{{ 'varchar' if target.type in ['redshift', 'postgres', 'snowflake'] else 'string' }}" + fct_name: "{{ 'varchar' if target.type in ['redshift', 'postgres', 'snowflake', 'trino'] else 'string' }}" + column_name: "{{ 'varchar' if target.type in ['redshift', 'postgres', 'snowflake', 'trino'] else 'string' }}" + id_to_exclude: "{{ 'varchar' if target.type in ['redshift', 'postgres', 'snowflake', 'trino'] else 'string' }}" + comment: "{{ 'varchar' if target.type in ['redshift', 'postgres', 'snowflake', 'trino'] else 'string' }}" columns: - name: fct_name