diff --git a/README.md b/README.md index 5dbaa13d..0967fc4c 100644 --- a/README.md +++ b/README.md @@ -216,11 +216,12 @@ models: ### SQL helpers #### get_column_values ([source](macros/sql/get_column_values.sql)) This macro returns the unique values for a column in a given table. +It takes an options `default` argument for compiling when relation does not already exist. Usage: ``` -- Returns a list of the top 50 states in the `users` table -{% set states = dbt_utils.get_column_values(table=ref('users'), column='state', max_records=50) %} +{% set states = dbt_utils.get_column_values(table=ref('users'), column='state', max_records=50, default=[]) %} {% for state in states %} ... diff --git a/integration_tests/Makefile b/integration_tests/Makefile index 22e9df4b..ce9889ff 100644 --- a/integration_tests/Makefile +++ b/integration_tests/Makefile @@ -1,20 +1,24 @@ test-postgres: + dbt compile --target postgres dbt seed --target postgres --full-refresh dbt run --target postgres --full-refresh --exclude test_insert_by_period dbt test --target postgres --exclude test_insert_by_period test-redshift: + dbt compile --target redshift dbt seed --target redshift --full-refresh dbt run --target redshift --full-refresh dbt test --target redshift test-snowflake: + dbt compile --target snowflake dbt seed --target snowflake --full-refresh dbt run --target snowflake --full-refresh dbt test --target snowflake test-bigquery: + dbt compile --target bigquery dbt seed --target bigquery --full-refresh dbt run --target bigquery --full-refresh dbt test --target bigquery diff --git a/integration_tests/models/sql/test_get_column_values.sql b/integration_tests/models/sql/test_get_column_values.sql index 6fd57e2d..bfe5c486 100644 --- a/integration_tests/models/sql/test_get_column_values.sql +++ b/integration_tests/models/sql/test_get_column_values.sql @@ -1,11 +1,10 @@ -{% set columns = dbt_utils.get_column_values(ref('data_get_column_values'), 'field') %} +{% set columns = dbt_utils.get_column_values(ref('data_get_column_values'), 'field', default = []) %} {% if target.type == 'snowflake' %} select - {% set columns = columns if columns is iterable else [] %} {% for column in columns -%} sum(case when field = '{{ column }}' then 1 else 0 end) as count_{{ column }} @@ -18,7 +17,6 @@ from {{ ref('data_get_column_values') }} {% else %} select - {% set columns = columns if columns is iterable else [] %} {% for column in columns -%} {{dbt_utils.safe_cast("sum(case when field = '" ~ column ~ "' then 1 else 0 end)", dbt_utils.type_string()) }} as count_{{ column }} diff --git a/macros/sql/get_column_values.sql b/macros/sql/get_column_values.sql index 36e9897b..cf2277e1 100644 --- a/macros/sql/get_column_values.sql +++ b/macros/sql/get_column_values.sql @@ -1,4 +1,3 @@ - {# This macro fetches the unique values for `column` in the table `table` @@ -11,19 +10,43 @@ Returns: A list of distinct values for the specified columns #} -{% macro get_column_values(table, column, max_records=none) -%} +{% macro get_column_values(table, column, max_records=none, default=none) -%} + +{#-- Prevent querying of db in parsing mode. This works because this macro does not create any new refs. #} + {%- if not execute -%} + {{ return('') }} + {% endif %} +{#-- #} + + {%- set target_relation = adapter.get_relation(database=table.database, + schema=table.schema, + identifier=table.identifier) -%} + + {%- call statement('get_column_values', fetch_result=true) %} + + {%- if not target_relation and default is none -%} + + {{ exceptions.raise_compiler_error("In get_column_values(): relation " ~ table ~ " does not exist and no default value was provided.") }} + + {%- elif not target_relation and default is not none -%} + + {{ log("Relation " ~ table ~ " does not exist. Returning the default value: " ~ default) }} + + {{ return(default) }} + + {%- else -%} - {%- call statement('get_column_values', fetch_result=True) %} + select + {{ column }} as value - select - {{ column }} as value + from {{ target_relation }} + group by 1 + order by count(*) desc - from {{ table }} - group by 1 - order by count(*) desc + {% if max_records is not none %} + limit {{ max_records }} + {% endif %} - {% if max_records is not none %} - limit {{ max_records }} {% endif %} {%- endcall -%} @@ -34,7 +57,7 @@ Returns: {%- set values = value_list['data'] | map(attribute=0) | list %} {{ return(values) }} {%- else -%} - {{ return([]) }} + {{ return(default) }} {%- endif -%} {%- endmacro %}