Skip to content

Commit

Permalink
Merge branch 'main' into epapineau/undocumented-sources
Browse files Browse the repository at this point in the history
  • Loading branch information
Elize Papineau authored Sep 20, 2023
2 parents a415a79 + 1fbc682 commit f60c4cf
Show file tree
Hide file tree
Showing 18 changed files with 67 additions and 28 deletions.
15 changes: 15 additions & 0 deletions .circleci/config.yml
Original file line number Diff line number Diff line change
Expand Up @@ -82,6 +82,17 @@ jobs:
- store_artifacts:
path: ./integration_tests/logs

integration-trino:
docker:
- image: cimg/python:3.9.9
steps:
- checkout
- run:
name: "Run Tests - Trino"
command: ./run_test.sh trino
- store_artifacts:
path: ./integration_tests/logs

workflows:
version: 2
test-all:
Expand All @@ -104,3 +115,7 @@ workflows:
context: profile-databricks
requires:
- integration-postgres
- integration-trino:
context: profile-trino
requires:
- integration-postgres
1 change: 1 addition & 0 deletions .github/ISSUE_TEMPLATE/bug_report.md
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,7 @@ If applicable, add screenshots or log output to help explain your problem.
- [ ] redshift
- [ ] bigquery
- [ ] snowflake
- [ ] trino/starburst
- [ ] other (specify: ____________)


Expand Down
1 change: 1 addition & 0 deletions .github/pull_request_template.md
Original file line number Diff line number Diff line change
Expand Up @@ -28,5 +28,6 @@ Screenshot of passing integration tests locally
- [ ] Snowflake
- [ ] Databricks
- [ ] DuckDB
- [ ] Trino/Starburst
- [ ] I have updated the README.md (if applicable)
- [ ] I have added tests & descriptions to my models (and macros if applicable)
1 change: 1 addition & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ Currently, the following adapters are supported:
- Redshift
- Snowflake
- DuckDB
- Trino (tested with Iceberg connector)

## Using This Package

Expand Down
4 changes: 2 additions & 2 deletions dbt_project.yml
Original file line number Diff line number Diff line change
Expand Up @@ -79,8 +79,8 @@ vars:
other_prefixes: ['rpt_']

# -- Performance variables --
chained_views_threshold: 5
chained_views_threshold: "{{ 5 if target.type != 'trino' else 4 }}"

# -- Execution variables --
insert_batch_size: "{{ 500 if target.type == 'bigquery' else 10000 }}"
max_depth_dag: "{{ 9 if target.type in ['bigquery', 'spark', 'databricks'] else -1 }}"
max_depth_dag: "{{ 9 if target.type in ['bigquery', 'spark', 'databricks'] else 4 if target.type == 'trino' else -1 }}"
1 change: 1 addition & 0 deletions docs/index.md
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ Currently, the following adapters are supported:
- Redshift
- Snowflake
- DuckDB
- Trino (tested with Iceberg connector)

## Using This Package

Expand Down
15 changes: 14 additions & 1 deletion integration_tests/ci/sample.profiles.yml
Original file line number Diff line number Diff line change
Expand Up @@ -58,4 +58,17 @@ integration_tests:

duckdb:
type: duckdb
path: ./duck.db
path: ./duck.db

trino:
type: trino
host: "{{ env_var('TRINO_TEST_HOST') }}"
port: "{{ env_var('TRINO_TEST_PORT') | as_number }}"
method: ldap
user: "{{ env_var('TRINO_TEST_USER') }}"
password: "{{ env_var('TRINO_TEST_PASS') }}"
catalog: "{{ env_var('TRINO_TEST_CATALOG_NAME') }}"
schema: dbt_project_evaluator_integration_tests_trino
threads: 5
session_properties:
query_max_stage_count: 200
8 changes: 4 additions & 4 deletions integration_tests/seeds/docs/docs_seeds.yml
Original file line number Diff line number Diff line change
Expand Up @@ -13,10 +13,10 @@ seeds:
- name: test_fct_documentation_coverage
config:
column_types:
staging_documentation_coverage_pct: "{{ 'float' if target.type not in ['spark','databricks','duckdb'] else 'decimal(10,2)' }}"
intermediate_documentation_coverage_pct: "{{ 'float' if target.type not in ['spark','databricks','duckdb'] else 'decimal(10,2)' }}"
marts_documentation_coverage_pct: "{{ 'float' if target.type not in ['spark','databricks','duckdb'] else 'decimal(10,2)' }}"
other_documentation_coverage_pct: "{{ 'float' if target.type not in ['spark','databricks','duckdb'] else 'decimal(10,2)' }}"
staging_documentation_coverage_pct: "{{ 'float' if target.type not in ['spark','databricks','duckdb','trino'] else 'decimal(10,2)' }}"
intermediate_documentation_coverage_pct: "{{ 'float' if target.type not in ['spark','databricks','duckdb','trino'] else 'decimal(10,2)' }}"
marts_documentation_coverage_pct: "{{ 'float' if target.type not in ['spark','databricks','duckdb','trino'] else 'decimal(10,2)' }}"
other_documentation_coverage_pct: "{{ 'float' if target.type not in ['spark','databricks','duckdb','trino'] else 'decimal(10,2)' }}"
tags:
- docs
tests:
Expand Down
10 changes: 5 additions & 5 deletions integration_tests/seeds/tests/tests_seeds.yml
Original file line number Diff line number Diff line change
Expand Up @@ -10,11 +10,11 @@ seeds:
- name: test_fct_test_coverage
config:
column_types:
test_coverage_pct: float
staging_test_coverage_pct: float
intermediate_test_coverage_pct: float
marts_test_coverage_pct: float
other_test_coverage_pct: float
test_coverage_pct: "{{ 'float' if target.type != 'trino' else 'double' }}"
staging_test_coverage_pct: "{{ 'float' if target.type != 'trino' else 'double' }}"
intermediate_test_coverage_pct: "{{ 'float' if target.type != 'trino' else 'double' }}"
marts_test_coverage_pct: "{{ 'float' if target.type != 'trino' else 'double' }}"
other_test_coverage_pct: "{{ 'float' if target.type != 'trino' else 'double' }}"
tests:
- dbt_utils.equality:
name: equality_fct_test_coverage
Expand Down
13 changes: 10 additions & 3 deletions macros/recursive_dag.sql
Original file line number Diff line number Diff line change
Expand Up @@ -120,7 +120,7 @@ all_relationships (
on all_relationships.child_id = direct_relationships.direct_parent_id

{% if var('max_depth_dag') | int > 0 %}
{% if var('max_depth_dag') | int < 2 or var('max_depth_dag') | int < var('chained_views_threshold')%}
{% if var('max_depth_dag') | int < 2 or var('max_depth_dag') | int < var('chained_views_threshold') | int %}
{% do exceptions.raise_compiler_error(
'Variable max_depth_dag must be at least 2 and must be greater or equal to than chained_views_threshold.'
) %}
Expand All @@ -138,7 +138,7 @@ all_relationships (

-- as of Feb 2022 BigQuery doesn't support with recursive in the same way as other DWs
{% set max_depth = var('max_depth_dag') | int %}
{% if max_depth < 2 or max_depth < var('chained_views_threshold') %}
{% if max_depth < 2 or max_depth < var('chained_views_threshold') | int %}
{% do exceptions.raise_compiler_error(
'Variable max_depth_dag must be at least 2 and must be greater or equal to than chained_views_threshold.'
) %}
Expand Down Expand Up @@ -260,4 +260,11 @@ with direct_relationships as (
{% macro spark__recursive_dag() %}
-- as of June 2022 databricks SQL doesn't support "with recursive" in the same way as other DWs
{{ return(bigquery__recursive_dag()) }}
{% endmacro %}
{% endmacro %}


{% macro trino__recursive_dag() %}
{#-- Although Trino supports a recursive WITH-queries,
-- it is less performant than creating CTEs with loops and unioning them --#}
{{ return(bigquery__recursive_dag()) }}
{% endmacro %}
2 changes: 1 addition & 1 deletion models/marts/dag/fct_duplicate_sources.sql
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ source_duplicates as (
{{ dbt.listagg(
measure = 'resource_name',
delimiter_text = "', '",
order_by_clause = 'order by resource_name' if target.type in ['snowflake','redshift','duckdb'])
order_by_clause = 'order by resource_name' if target.type in ['snowflake','redshift','duckdb','trino'])
}} as source_names
from sources
group by source_db_location
Expand Down
2 changes: 1 addition & 1 deletion models/marts/dag/fct_model_fanout.sql
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@ model_fanout_agg as (
{{ dbt.listagg(
measure = 'child',
delimiter_text = "', '",
order_by_clause = 'order by child' if target.type in ['snowflake','redshift','duckdb'])
order_by_clause = 'order by child' if target.type in ['snowflake','redshift','duckdb','trino'])
}} as leaf_children
from model_fanout
group by 1, 2
Expand Down
2 changes: 1 addition & 1 deletion models/marts/dag/fct_multiple_sources_joined.sql
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ multiple_sources_joined as (
{{ dbt.listagg(
measure='parent',
delimiter_text="', '",
order_by_clause='order by parent' if target.type in ['snowflake','redshift','duckdb'])
order_by_clause='order by parent' if target.type in ['snowflake','redshift','duckdb','trino'])
}} as source_parents
from direct_source_relationships
group by 1
Expand Down
2 changes: 1 addition & 1 deletion models/marts/dag/fct_source_fanout.sql
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ source_fanout as (
{{ dbt.listagg(
measure='child',
delimiter_text="', '",
order_by_clause='order by child' if target.type in ['snowflake','redshift','duckdb'])
order_by_clause='order by child' if target.type in ['snowflake','redshift','duckdb','trino'])
}} as model_children
from direct_source_relationships
group by 1
Expand Down
4 changes: 2 additions & 2 deletions models/marts/documentation/fct_documentation_coverage.sql
Original file line number Diff line number Diff line change
Expand Up @@ -20,10 +20,10 @@ conversion as (

final as (
select
current_timestamp as measured_at,
{{ 'current_timestamp' if target.type != 'trino' else 'current_timestamp(6)' }} as measured_at,
count(*) as total_models,
sum(is_described_model) as documented_models,
round(sum(is_described_model) * 100.0 / count(*), 2) as documentation_coverage_pct,
round(sum(is_described_model) * 100.00 / count(*), 2) as documentation_coverage_pct,
{% for model_type in var('model_types') %}
round(
{{ dbt_utils.safe_divide(
Expand Down
2 changes: 1 addition & 1 deletion models/marts/structure/fct_model_naming_conventions.sql
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ appropriate_prefixes as (
{{ dbt.listagg(
measure='prefix_value',
delimiter_text="', '",
order_by_clause='order by prefix_value' if target.type in ['snowflake','redshift','duckdb'])
order_by_clause='order by prefix_value' if target.type in ['snowflake','redshift','duckdb','trino'])
}} as appropriate_prefixes
from naming_convention_prefixes
group by model_type
Expand Down
4 changes: 2 additions & 2 deletions models/marts/tests/fct_test_coverage.sql
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ conversion as (

final as (
select
current_timestamp as measured_at,
{{ 'current_timestamp' if target.type != 'trino' else 'current_timestamp(6)' }} as measured_at,
count(*) as total_models,
sum(number_of_tests_on_model) as total_tests,
sum(is_tested_model) as tested_models,
Expand All @@ -32,7 +32,7 @@ final as (
) }}
, 2) as {{ model_type }}_test_coverage_pct,
{% endfor %}
round(sum(number_of_tests_on_model) * 1.0 / count(*), 4) as test_to_model_ratio
round(sum(number_of_tests_on_model) * 1.0000 / count(*), 4) as test_to_model_ratio

from test_counts
left join conversion
Expand Down
8 changes: 4 additions & 4 deletions seeds/seeds.yml
Original file line number Diff line number Diff line change
Expand Up @@ -6,10 +6,10 @@ seeds:

config:
column_types:
fct_name: "{{ 'varchar' if target.type in ['redshift', 'postgres', 'snowflake'] else 'string' }}"
column_name: "{{ 'varchar' if target.type in ['redshift', 'postgres', 'snowflake'] else 'string' }}"
id_to_exclude: "{{ 'varchar' if target.type in ['redshift', 'postgres', 'snowflake'] else 'string' }}"
comment: "{{ 'varchar' if target.type in ['redshift', 'postgres', 'snowflake'] else 'string' }}"
fct_name: "{{ 'varchar' if target.type in ['redshift', 'postgres', 'snowflake', 'trino'] else 'string' }}"
column_name: "{{ 'varchar' if target.type in ['redshift', 'postgres', 'snowflake', 'trino'] else 'string' }}"
id_to_exclude: "{{ 'varchar' if target.type in ['redshift', 'postgres', 'snowflake', 'trino'] else 'string' }}"
comment: "{{ 'varchar' if target.type in ['redshift', 'postgres', 'snowflake', 'trino'] else 'string' }}"

columns:
- name: fct_name
Expand Down

0 comments on commit f60c4cf

Please sign in to comment.