From eb1292268b58c0ba1ab6a2c8ec062a53f3955a62 Mon Sep 17 00:00:00 2001 From: Joel Labes Date: Fri, 2 Dec 2022 08:19:27 +1300 Subject: [PATCH] dbt utils v1.0.0-rc1 (#733) * add safe_divide documentation * add safe_divide macro * add integration test for safe_divide macro * Merge changes from main into utils v1 (#699) * Correct link from README to the CONTRIBUTING guide. (#687) * fix typo (#688) Co-authored-by: Alex Malins <22991362+alexmalins@users.noreply.github.com> * Change `escape_single_quotes` Reference in Pivot Macro (#692) * Update pivot.sql * Changelog Updates Co-authored-by: Liam O'Boyle Co-authored-by: Alex Malins Co-authored-by: Alex Malins <22991362+alexmalins@users.noreply.github.com> Co-authored-by: zachoj10 * Use backwards comaptible versions of timestamp macro * moved macro and documentation to new SQL generator section * add tests with expressions * fix syntax errors (#705) * fix syntax errors * remove whitespace in seed file * Restore dbt. prefix for all migrated cross-db macros (#701) * added prefix dbt. on cross db macros * Also prefix for new macro * Adding changelog change * Squashed commit of the following: commit 5eba82b7ac3256d3b8789206fb717ee9377ee342 Author: Deanna Minnick Date: Wed Oct 12 10:30:42 2022 -0400 remove whitespace in seed file commit 7a2a5e3bc0a725b7f642140168becf4e71bc0d98 Author: Deanna Minnick Date: Wed Oct 12 10:22:07 2022 -0400 fix syntax errors Co-authored-by: Joel Labes * Remove obsolete condition argument from expression_is_true (#700) * Remove obsolete condition argument from expression_is_true * Improve docs * Improve docs * Update star.sql to allow for non-quote wrapped column names (#706) * Update star.sql * Update star.sql * feat: add testing to star macro column encased in quotes functionality * chore: update schema.yml * Update star.sql * chore: update star.sql and schema.yml * chore: update star.sql to trim blank space * Update README.md * Update README.md adds example usage of star macro's quote_identifiers argument Co-authored-by: crlough * Switch to dbt.escape_single_quotes * Change deprecation resolution advice * Wrap xdb warnings in if execute block * Slugify for snowflake (#707) * Merge main into utils-v1 (#726) * Feature/safe divide (#697) * add safe_divide documentation * add safe_divide macro * add integration test for safe_divide macro * moved macro and documentation to new SQL generator section Co-authored-by: Grace Goheen * Revert "Feature/safe divide (#697)" (#702) This reverts commit f368cecca34a6488e14029d60d5116d7cc5fca51. * Quick nitpicks (#718) I was doing some studying on these and spotted some stuff. One verb conjugation and a consistency in macro description Co-authored-by: deanna-minnick <41010575+deanna-minnick@users.noreply.github.com> Co-authored-by: Grace Goheen Co-authored-by: ian-fahey-dbt <107962364+ian-fahey-dbt@users.noreply.github.com> * Feat: add macro get_query_results_as_single_value (#696) * feat: add query_results_as_single_value.sql macro * chore: update the macro definition Current error to work through: "failed to find conversion function from unknown to text" * chore: update test * chore: final edits * chore: remove extra model reference * chore: update return() to handle BigQuery * chore: README.md, macro updates * feat: factoring in first review changes * chore: updates to testing * chore: updates tests * chore: update test for bigquery * chore: update cast for bigquery * Use example with a single record in readme * Add default value when no record found * test when no results are found * Rename test file * Add test definitions * Fix incorrect ref * And another one * Update test_get_query_results_as_single_value.sql * cast strings as strings * Put arg in right place * Update test_get_query_results_as_single_value.sql * switch to limit zero for BQ * Update test_get_query_results_as_single_value.sql * quote column name in arg * snowflake wont let you safe cast something to itself * warning to future readers [skip ci] * Add singular test to check for multi row/multi column setup * forgot to save comment [skip ci] * Rename to get_single_value Co-authored-by: crlough-gitkraken Co-authored-by: Joel Labes * Remove rc1 requirement for utils v1 * Recency truncate date option (#731) * WIP changing recency test * Add tests * cast to timestamp for bq * forgot the curlies * avoid lateral column aliasing * ts not dt * cast source as timestamp * don't cast inside test * cast as date instead of truncate * Update recency.sql * log bq events * store pg artifacts * int tests dir * Correctly store artifacts * try casting to date or datetime * order of operations more like order of ooperations * dt -> ts * Do I really have to cast this? * Revert "Do I really have to cast this?" This reverts commit 21e2c0d50a901551c94f5a29251c455f80bda9dc. * Output a warning when star finds no columns, not '*' (#732) * Change star() behaviour when no columns returned * Code review: return a * in compile mode * README changes * Delete xdb_deprecation_warning.sql * Update README.md * Remove from ToC * Update toc * Fix surrogate key variable example Co-authored-by: Deanna Minnick Co-authored-by: Liam O'Boyle Co-authored-by: Alex Malins Co-authored-by: Alex Malins <22991362+alexmalins@users.noreply.github.com> Co-authored-by: zachoj10 Co-authored-by: Grace Goheen Co-authored-by: deanna-minnick <41010575+deanna-minnick@users.noreply.github.com> Co-authored-by: Simon Quvang Co-authored-by: miles Co-authored-by: Connor <61797492+CR-Lough@users.noreply.github.com> Co-authored-by: crlough Co-authored-by: fivetran-catfritz <111930712+fivetran-catfritz@users.noreply.github.com> Co-authored-by: ian-fahey-dbt <107962364+ian-fahey-dbt@users.noreply.github.com> Co-authored-by: crlough-gitkraken --- .circleci/config.yml | 18 +- CHANGELOG.md | 42 +- README.md | 361 +++++------------- dbt_project.yml | 2 +- .../data/sql/data_get_single_value.csv | 2 + .../data/sql/data_safe_divide.csv | 9 + ...ta_safe_divide_denominator_expressions.csv | 7 + ...data_safe_divide_numerator_expressions.csv | 7 + .../data/sql/data_star_quote_identifiers.csv | 2 + integration_tests/dbt_project.yml | 6 + .../generic_tests/recency_time_excluded.sql | 12 + .../generic_tests/recency_time_included.sql | 4 + .../models/generic_tests/schema.yml | 31 +- .../models/generic_tests/test_recency.sql | 16 - integration_tests/models/sql/schema.yml | 48 +++ .../models/sql/test_get_single_value.sql | 42 ++ .../sql/test_get_single_value_default.sql | 28 ++ .../models/sql/test_safe_divide.sql | 38 ++ .../models/sql/test_star_no_columns.sql | 11 + .../sql/test_star_quote_identifiers.sql | 9 + .../tests/jinja_helpers/test_slugify.sql | 16 +- .../test_get_single_value_multiple_rows.sql | 18 + .../deprecated/xdb_deprecation_warning.sql | 4 - macros/generic_tests/cardinality_equality.sql | 4 +- macros/generic_tests/equality.sql | 4 +- macros/generic_tests/expression_is_true.sql | 14 +- macros/generic_tests/recency.sql | 15 +- macros/generic_tests/sequential_values.sql | 2 +- macros/jinja_helpers/slugify.sql | 4 +- macros/sql/date_spine.sql | 6 +- macros/sql/generate_surrogate_key.sql | 4 +- macros/sql/get_single_value.sql | 33 ++ macros/sql/pivot.sql | 2 +- macros/sql/safe_divide.sql | 7 + macros/sql/star.sql | 64 ++-- macros/sql/surrogate_key.sql | 2 +- macros/sql/union.sql | 2 +- macros/sql/unpivot.sql | 4 +- macros/sql/width_bucket.sql | 8 +- macros/web/get_url_host.sql | 12 +- macros/web/get_url_parameter.sql | 2 +- macros/web/get_url_path.sql | 18 +- 42 files changed, 554 insertions(+), 386 deletions(-) create mode 100644 integration_tests/data/sql/data_get_single_value.csv create mode 100644 integration_tests/data/sql/data_safe_divide.csv create mode 100644 integration_tests/data/sql/data_safe_divide_denominator_expressions.csv create mode 100644 integration_tests/data/sql/data_safe_divide_numerator_expressions.csv create mode 100644 integration_tests/data/sql/data_star_quote_identifiers.csv create mode 100644 integration_tests/models/generic_tests/recency_time_excluded.sql create mode 100644 integration_tests/models/generic_tests/recency_time_included.sql delete mode 100644 integration_tests/models/generic_tests/test_recency.sql create mode 100644 integration_tests/models/sql/test_get_single_value.sql create mode 100644 integration_tests/models/sql/test_get_single_value_default.sql create mode 100644 integration_tests/models/sql/test_safe_divide.sql create mode 100644 integration_tests/models/sql/test_star_no_columns.sql create mode 100644 integration_tests/models/sql/test_star_quote_identifiers.sql create mode 100644 integration_tests/tests/sql/test_get_single_value_multiple_rows.sql delete mode 100644 macros/cross_db_utils/deprecated/xdb_deprecation_warning.sql create mode 100644 macros/sql/get_single_value.sql create mode 100644 macros/sql/safe_divide.sql diff --git a/.circleci/config.yml b/.circleci/config.yml index 5c0daa61..31e4a4d1 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -23,7 +23,9 @@ jobs: name: "Run OG Tests - Postgres" command: ./run_test.sh postgres - store_artifacts: - path: ./logs + path: integration_tests/logs + - store_artifacts: + path: integration_tests/target integration-redshift: docker: @@ -35,7 +37,9 @@ jobs: name: "Run OG Tests - Redshift" command: ./run_test.sh redshift - store_artifacts: - path: ./logs + path: integration_tests/logs + - store_artifacts: + path: integration_tests/target integration-snowflake: docker: @@ -47,8 +51,10 @@ jobs: name: "Run OG Tests - Snowflake" command: ./run_test.sh snowflake - store_artifacts: - path: ./logs - + path: integration_tests/logs + - store_artifacts: + path: integration_tests/target + integration-bigquery: environment: BIGQUERY_SERVICE_KEY_PATH: "/home/circleci/bigquery-service-key.json" @@ -64,7 +70,9 @@ jobs: name: "Run OG Tests - BigQuery" command: ./run_test.sh bigquery - store_artifacts: - path: ./logs + path: integration_tests/logs + - store_artifacts: + path: integration_tests/target workflows: version: 2 diff --git a/CHANGELOG.md b/CHANGELOG.md index e1f6a5a9..d5a9a24f 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -11,30 +11,66 @@ # Unreleased ## New features +- Updated the `slugify` macro to prepend "_" to column names beginning with a number since most databases do not allow names to begin with numbers. - Implemented an optional `group_by_columns` argument across many of the generic testing macros to test for properties that only pertain to group-level or are can be more rigorously conducted at the group level. Property available in `recency`, `at_least_one`, `equal_row_count`, `fewer_rows_than`, `not_constant`, `not_null_proportion`, and `sequential` tests [#633](https://github.com/dbt-labs/dbt-utils/pull/633) - New feature to omit the `source_column_name` column on the `union_relations` macro ([#331](https://github.com/dbt-labs/dbt-utils/issues/331), [#624](https://github.com/dbt-labs/dbt-utils/pull/624)) +- New macro `get_single_value` ([#696](https://github.com/dbt-labs/dbt-utils/pull/696)) - New feature to select fewer columns in `expression_is_true` ([#683](https://github.com/dbt-labs/dbt-utils/issues/683), [#686](https://github.com/dbt-labs/dbt-utils/pull/686)) - Add `not_empty_string` generic test that asserts column values are not an empty string. ([#632](https://github.com/dbt-labs/dbt-utils/issues/632), [#634](https://github.com/dbt-labs/dbt-utils/pull/634)) ## Under the hood -- Remove deprecated table argument from unpivot ([#671](https://github.com/dbt-labs/dbt-utils/pull/671)) +- Remove deprecated table argument from `unpivot` ([#671](https://github.com/dbt-labs/dbt-utils/pull/671)) - Delete the deprecated identifier macro ([#672](https://github.com/dbt-labs/dbt-utils/pull/672)) - Handle deprecations in deduplicate macro ([#673](https://github.com/dbt-labs/dbt-utils/pull/673)) -- Fully remove varargs usage in surrogate_key and safe_add ([#674](https://github.com/dbt-labs/dbt-utils/pull/674)) +- Fully remove varargs usage in `surrogate_key` and `safe_add` ([#674](https://github.com/dbt-labs/dbt-utils/pull/674)) +- Remove obsolete condition argument from `expression_is_true` ([#699](https://github.com/dbt-labs/dbt-utils/pull/699)) + +## Migration instructions +- If your project uses the `expression_is_true` macro, replace `condition` argument with `where`. + +Before: +```yaml +version: 2 + +models: + - name: model_name + tests: + - dbt_utils.expression_is_true: + expression: "col_a + col_b = total" + condition: "created_at > '2018-12-31'" +``` +After: +```yaml +version: 2 + +models: + - name: model_name + tests: + - dbt_utils.expression_is_true: + expression: "col_a + col_b = total" + config: + where: "created_at > '2018-12-31'" +``` ## Fixes +- Add star macro option to not encase column names in quotes. ([#706](https://github.com/dbt-labs/dbt-utils/pull/706)) +- Explicitly stating the namespace for cross-db macros so that the dispatch logic works correctly by restoring the dbt. prefix for all migrated cross-db macros ([#701](https://github.com/dbt-labs/dbt-utils/pull/701)) - Better handling of whitespaces in the star macro ([#651](https://github.com/dbt-labs/dbt-utils/pull/651)) - Fix to correct behavior in `mutually_exclusive_ranges` test in certain situations when `zero_length_range_allowed: true` and multiple ranges in a partition have the same value for `lower_bound_column`. ([[#659](https://github.com/dbt-labs/dbt-utils/issues/659)], [#660](https://github.com/dbt-labs/dbt-utils/pull/660)) - Fix to utilize dbt Core version of `escape_single_quotes` instead of version from dbt Utils ([[#689](https://github.com/dbt-labs/dbt-utils/issues/689)], [#692](https://github.com/dbt-labs/dbt-utils/pull/692)) ## Contributors: +- [@CR-Lough] (https://github.com/CR-Lough) (#706) (#696) +- [@fivetran-catfritz](https://github.com/fivetran-catfritz) +- [@crowemi](https://github.com/crowemi) +- [@SimonQuvang](https://github.com/SimonQuvang) (#701) - [@christineberger](https://github.com/christineberger) (#624) - [@epapineau](https://github.com/epapineau) (#634) - [@courentin](https://github.com/courentin) (#651) - [@sfc-gh-ancoleman](https://github.com/sfc-gh-ancoleman) (#660) - [@zachoj10](https://github.com/zachoj10) (#692) - [@miles170](https://github.com/miles170) -- [@emilyriederer](https://github.com/emilyriederer) +- [@emilyriederer](https://github.com/emilyriederer) # dbt-utils v0.8.6 diff --git a/README.md b/README.md index 8bc754f9..36f069f6 100644 --- a/README.md +++ b/README.md @@ -6,6 +6,11 @@ Check [dbt Hub](https://hub.getdbt.com/dbt-labs/dbt_utils/latest/) for the lates ---- +> **Note** +> This readme reflects dbt utils 1.0, currently in release candidate status. The currently shipping version of dbt utils is [0.9.6](https://github.com/dbt-labs/dbt-utils/tree/0.9.6). + +--- + ## Contents **[Generic tests](#generic-tests)** @@ -34,6 +39,7 @@ Check [dbt Hub](https://hub.getdbt.com/dbt-labs/dbt_utils/latest/) for the lates - [get_relations_by_pattern](#get_relations_by_pattern-source) - [get_relations_by_prefix](#get_relations_by_prefix-source) - [get_query_results_as_dict](#get_query_results_as_dict-source) + - [get_single_value](#get_single_value) - [SQL generators](#sql-generators) - [date_spine](#date_spine-source) @@ -43,8 +49,9 @@ Check [dbt Hub](https://hub.getdbt.com/dbt-labs/dbt_utils/latest/) for the lates - [star](#star-source) - [union_relations](#union_relations-source) - [generate_series](#generate_series-source) - - [surrogate_key](#surrogate_key-source) + - [generate_surrogate_key](#generate_surrogate_key-source) - [safe_add](#safe_add-source) + - [safe_divide](#safe_divide-source) - [pivot](#pivot-source) - [unpivot](#unpivot-source) - [width_bucket](#width_bucket-source) @@ -54,16 +61,7 @@ Check [dbt Hub](https://hub.getdbt.com/dbt-labs/dbt_utils/latest/) for the lates - [get_url_host](#get_url_host-source) - [get_url_path](#get_url_path-source) -- [Cross-database macros](#cross-database-macros): - - [dateadd](#dateadd-source) - - [datediff](#datediff-source) - - [split_part](#split_part-source) - - [last_day](#last_day-source) - - [listagg](#listagg-source) - - [array_construct](#array_construct-source) - - [array_append](#array_append-source) - - [array_concat](#array_concat-source) - - [cast_array_to_string](#cast_array_to_string-source) +- [Cross-database macros](#cross-database-macros) - [Jinja Helpers](#jinja-helpers) - [pretty_time](#pretty_time-source) @@ -73,7 +71,7 @@ Check [dbt Hub](https://hub.getdbt.com/dbt-labs/dbt_utils/latest/) for the lates [Materializations](#materializations): -- [insert_by_period](#insert_by_period-source) +- [insert_by_period](#insert_by_period) ---- @@ -156,7 +154,7 @@ models: expression: "col_a + col_b = total" ``` -The macro accepts an optional argument `condition` that allows for asserting +The macro accepts an optional argument `where` that allows for asserting the `expression` on a subset of all records. **Usage:** @@ -169,11 +167,10 @@ models: tests: - dbt_utils.expression_is_true: expression: "col_a + col_b = total" - condition: "created_at > '2018-12-31'" + config: + where: "created_at > '2018-12-31'" ``` -This macro can also be used at the column level. When this is done, the `expression` is evaluated against the column. - ```yaml version: 2 models: @@ -187,7 +184,8 @@ models: tests: - dbt_utils.expression_is_true: expression: '= 1' - condition: col_a = 1 + config: + where: col_a = 1 ``` #### recency ([source](macros/generic_tests/recency.sql)) @@ -816,6 +814,27 @@ select from {{ ref('users') }} ``` +#### get_single_value ([source](macros/sql/get_single_value.sql)) + +This macro returns a single value from a sql query, so that you don't need to interact with the Agate library to operate on the result + +**Usage:** + +``` +{% set sql_statement %} + select max(created_at) from {{ ref('processed_orders') }} +{% endset %} + +{%- set newest_processed_order = dbt_utils.get_single_value(sql_statement) -%} + +select + + *, + last_order_at > '{{ newest_processed_order }}' as has_unprocessed_order + +from {{ ref('users') }} +``` + ### SQL generators These macros generate SQL (either a complete query, or a part of a query). They often implement patterns that should be easy in SQL, but for some reason are much harder than they need to be. @@ -932,6 +951,7 @@ the star macro. This macro also has an optional `relation_alias` argument that will prefix all generated fields with an alias (`relation_alias`.`field_name`). The macro also has optional `prefix` and `suffix` arguments. When one or both are provided, they will be concatenated onto each field's alias in the output (`prefix` ~ `field_name` ~ `suffix`). NB: This prevents the output from being used in any context other than a select statement. +This macro also has an optional `quote_identifiers` argument that will encase the selected columns and their aliases in double quotes. **Args:** @@ -940,6 +960,7 @@ in the output (`prefix` ~ `field_name` ~ `suffix`). NB: This prevents the output - `relation_alias` (optional, default=`''`): will prefix all generated fields with an alias (`relation_alias`.`field_name`). - `prefix` (optional, default=`''`): will prefix the output `field_name` (`field_name as prefix_field_name`). - `suffix` (optional, default=`''`): will suffix the output `field_name` (`field_name as field_name_suffix`). +- `quote_identifiers` (optional, default=`True`): will encase selected columns and aliases in double quotes (`"field_name" as "field_name"`). **Usage:** @@ -950,6 +971,13 @@ from {{ ref('my_model') }} ``` +```sql +select + {{ dbt_utils.star(from=ref('my_model'), quote_identifiers=False) }} +from {{ ref('my_model') }} + +``` + ```sql select {{ dbt_utils.star(from=ref('my_model'), except=["exclude_field_1", "exclude_field_2"]) }} @@ -1004,14 +1032,22 @@ This macro implements a cross-database mechanism to generate an arbitrarily long {{ dbt_utils.generate_series(upper_bound=1000) }} ``` -#### surrogate_key ([source](macros/sql/surrogate_key.sql)) +#### generate_surrogate_key ([source](macros/sql/generate_surrogate_key.sql)) This macro implements a cross-database way to generate a hashed surrogate key using the fields specified. **Usage:** ``` -{{ dbt_utils.surrogate_key(['field_a', 'field_b'[,...]]) }} +{{ dbt_utils.generate_surrogate_key(['field_a', 'field_b'[,...]]) }} +``` + +A precursor to this macro, `surrogate_key()`, treated nulls and blanks strings the same. If you need to enable this incorrect behaviour for backward compatibility reasons, add the following variable to your `dbt_project.yml`: + +```yaml +#dbt_project.yml +vars: + surrogate_key_treat_nulls_as_empty_strings: true #turn on legacy behaviour ``` #### safe_add ([source](macros/sql/safe_add.sql)) @@ -1024,6 +1060,21 @@ This macro implements a cross-database way to sum nullable fields using the fiel {{ dbt_utils.safe_add('field_a', 'field_b'[,...]) }} ``` +#### safe_divide ([source](macros/cross_db_utils/safe_divide.sql)) + +This macro performs division but returns null if the denominator is 0. + +**Args:** + +- `numerator` (required): The number or SQL expression you want to divide. +- `denominator` (required): The number or SQL expression you want to divide by. + +**Usage:** + +``` +{{ dbt_utils.safe_divide('numerator', 'denominator') }} +``` + #### pivot ([source](macros/sql/pivot.sql)) This macro pivots values from rows to columns. @@ -1215,173 +1266,7 @@ This macro extracts a page path from a column containing a url. ### Cross-database macros -These macros make it easier for package authors (especially those writing modeling packages) to implement cross-database -compatibility. In general, you should not use these macros in your own dbt project (unless it is a package) - -Note that most of these macros moved to dbt Core as of dbt_utils v0.9.0 and dbt Core v1.2.0, and will soon be removed from `dbt_utils`. - -To access the version defined in dbt Core, remove the `dbt_utils.` prefix (see [https://docs.getdbt.com/reference/dbt-jinja-functions/cross-database-macros](https://docs.getdbt.com/reference/dbt-jinja-functions/cross-database-macros) for examples). -As highlighted below, some of the cross-database macros are still in the process of being deprecated. - -#### dateadd ([source](macros/cross_db_utils/dateadd.sql)) - -*DEPRECATED: This macro is now provided in dbt Core. It is no longer available in dbt_utils and backwards compatibility will be removed in a future version of the package.* - -This macro adds a time/day interval to the supplied date/timestamp. Note: The `datepart` argument is database-specific. - -**Usage:** - -``` -{{ dbt_utils.dateadd(datepart='day', interval=1, from_date_or_timestamp="'2017-01-01'") }} -``` - -#### datediff ([source](macros/cross_db_utils/datediff.sql)) - -*DEPRECATED: This macro is now provided in dbt Core. It is no longer available in dbt_utils and backwards compatibility will be removed in a future version of the package.* - -This macro calculates the difference between two dates. - -**Usage:** - -``` -{{ dbt_utils.datediff("'2018-01-01'", "'2018-01-20'", 'day') }} -``` - -#### split_part ([source](macros/cross_db_utils/split_part.sql)) - -*DEPRECATED: This macro is now provided in dbt Core. It is no longer available in dbt_utils and backwards compatibility will be removed in a future version of the package.* - -This macro splits a string of text using the supplied delimiter and returns the supplied part number (1-indexed). - -**Args:** - -- `string_text` (required): Text to be split into parts. -- `delimiter_text` (required): Text representing the delimiter to split by. -- `part_number` (required): Requested part of the split (1-based). If the value is negative, the parts are counted backward from the end of the string. - -**Usage:** -When referencing a column, use one pair of quotes. When referencing a string, use single quotes enclosed in double quotes. - -``` -{{ dbt_utils.split_part(string_text='column_to_split', delimiter_text='delimiter_column', part_number=1) }} -{{ dbt_utils.split_part(string_text="'1|2|3'", delimiter_text="'|'", part_number=1) }} -``` - -#### date_trunc ([source](macros/cross_db_utils/date_trunc.sql)) - -*DEPRECATED: This macro is now provided in dbt Core. It is no longer available in dbt_utils and backwards compatibility will be removed in a future version of the package.* - -Truncates a date or timestamp to the specified datepart. Note: The `datepart` argument is database-specific. - -**Usage:** - -``` -{{ dbt_utils.date_trunc(datepart, date) }} -``` - -#### last_day ([source](macros/cross_db_utils/last_day.sql)) - -*DEPRECATED: This macro is now provided in dbt Core. It is no longer available in dbt_utils and backwards compatibility will be removed in a future version of the package.* - -Gets the last day for a given date and datepart. Notes: - -- The `datepart` argument is database-specific. -- This macro currently only supports dateparts of `month` and `quarter`. - -**Usage:** - -``` -{{ dbt_utils.last_day(date, datepart) }} -``` - -#### listagg ([source](macros/cross_db_utils/listagg.sql)) - -*DEPRECATED: This macro is now provided in dbt Core. It is no longer available in dbt_utils and backwards compatibility will be removed in a future version of the package.* - -This macro returns the concatenated input values from a group of rows separated by a specified deliminator. - -**Args:** - -- `measure` (required): The expression (typically a column name) that determines the values to be concatenated. To only include distinct values add keyword DISTINCT to beginning of expression (example: 'DISTINCT column_to_agg'). -- `delimiter_text` (required): Text representing the delimiter to separate concatenated values by. -- `order_by_clause` (optional): An expression (typically a column name) that determines the order of the concatenated values. -- `limit_num` (optional): Specifies the maximum number of values to be concatenated. - -Note: If there are instances of `delimiter_text` within your `measure`, you cannot include a `limit_num`. - -**Usage:** - -``` -{{ dbt_utils.listagg(measure='column_to_agg', delimiter_text="','", order_by_clause="order by order_by_column", limit_num=10) }} -``` - -#### array_construct ([source](macros/cross_db_utils/array_construct.sql)) - -*DEPRECATED: This macro is deprecated and will be removed in a future version of the package, once equivalent functionality is implemented in dbt Core.* - -This macro returns an array constructed from a set of inputs. - -**Args:** - -- `inputs` (optional): The list of array contents. If not provided, this macro will create an empty array. All inputs must be the *same data type* in order to match Postgres functionality and *not null* to match Bigquery functionality. -- `data_type` (optional): Specifies the data type of the constructed array. This is only relevant when creating an empty array (will otherwise use the data type of the inputs). If `inputs` are `data_type` are both not provided, this macro will create an empty array of type integer. - -**Usage:** - -``` -{{ dbt_utils.array_construct(['column_1', 'column_2', 'column_3']) }} -{{ dbt_utils.array_construct([],'integer') }} -``` - -#### array_append ([source](macros/cross_db_utils/array_append.sql)) - -*DEPRECATED: This macro is deprecated and will be removed in a future version of the package, once equivalent functionality is implemented in dbt Core.* - -This macro appends an element to the end of an array and returns the appended array. - -**Args:** - -- `array` (required): The array to append to. -- `new_element` (required): The element to be appended. This element must *match the data type of the existing elements* in the array in order to match Postgres functionality and *not null* to match Bigquery functionality. - -**Usage:** - -``` -{{ dbt_utils.array_append('array_column', 'element_column') }} -``` - -#### array_concat ([source](macros/cross_db_utils/array_concat.sql)) - -*DEPRECATED: This macro is deprecated and will be removed in a future version of the package, once equivalent functionality is implemented in dbt Core.* - -This macro returns the concatenation of two arrays. - -**Args:** - -- `array_1` (required): The array to append to. -- `array_2` (required): The array to be appended to `array_1`. This array must match the data type of `array_1` in order to match Postgres functionality. - -**Usage:** - -``` -{{ dbt_utils.array_concat('array_column_1', 'array_column_2') }} -``` - -#### cast_array_to_string ([source](macros/cross_db_utils/cast_array_to_string.sql)) - -*DEPRECATED: This macro is deprecated and will be removed in a future version of the package, once equivalent functionality is implemented in dbt Core.* - -This macro converts an array to a single string value and returns the resulting string. - -**Args:** - -- `array` (required): The array to convert to a string. - -**Usage:** - -``` -{{ dbt_utils.cast_array_to_string('array_column') }} -``` +These macros were removed from `dbt_utils` version 1.0, as they have been implemented in dbt Core instead. See [https://docs.getdbt.com/reference/dbt-jinja-functions/cross-database-macros](https://docs.getdbt.com/reference/dbt-jinja-functions/cross-database-macros). --- @@ -1428,8 +1313,8 @@ This macro logs a formatted message (with a timestamp) to the command line. This macro is useful for transforming Jinja strings into "slugs", and can be useful when using a Jinja object as a column name, especially when that Jinja object is not hardcoded. -For this example, let's pretend that we have payment methods in our payments table like `['venmo App', 'ca$h-money']`, which we can't use as a column name due to the spaces and special characters. This macro does its best to strip those out in a sensible way: `['venmo_app', -'cah_money']`. +For this example, let's pretend that we have payment methods in our payments table like `['venmo App', 'ca$h-money', '1337pay']`, which we can't use as a column name due to the spaces and special characters. This macro does its best to strip those out in a sensible way: `['venmo_app', +'cah_money', '_1337pay']`. ```sql {%- set payment_methods = dbt_utils.get_column_values( @@ -1456,76 +1341,22 @@ sum(case when payment_method = 'Venmo App' then amount end) sum(case when payment_method = 'ca$h money' then amount end) as cah_money_amount, + +sum(case when payment_method = '1337pay' then amount end) + as _1337pay_amount, ... ``` - +--- ### Materializations -#### insert_by_period ([source](macros/materializations/insert_by_period_materialization.sql)) - -`insert_by_period` allows dbt to insert records into a table one period (i.e. day, week) at a time. - -This materialization is appropriate for event data that can be processed in discrete periods. It is similar in concept to the built-in incremental materialization, but has the added benefit of building the model in chunks even during a full-refresh so is particularly useful for models where the initial run can be problematic. - -Should a run of a model using this materialization be interrupted, a subsequent run will continue building the target table from where it was interrupted (granted the `--full-refresh` flag is omitted). - -Progress is logged in the command line for easy monitoring. - -**Usage:** - -```sql -{{ - config( - materialized = "insert_by_period", - period = "day", - timestamp_field = "created_at", - start_date = "2018-01-01", - stop_date = "2018-06-01") -}} - -with events as ( - - select * - from {{ ref('events') }} - where __PERIOD_FILTER__ -- This will be replaced with a filter in the materialization code - -) - -....complex aggregates here.... - -``` - -**Configuration values:** - -- `period`: period to break the model into, must be a valid [datepart](https://docs.aws.amazon.com/redshift/latest/dg/r_Dateparts_for_datetime_functions.html) (default='Week') -- `timestamp_field`: the column name of the timestamp field that will be used to break the model into smaller queries -- `start_date`: literal date or timestamp - generally choose a date that is earlier than the start of your data -- `stop_date`: literal date or timestamp (default=current_timestamp) - -**Caveats:** - -- This materialization is compatible with dbt 0.10.1. -- This materialization has been written for Redshift. -- This materialization can only be used for a model where records are not expected to change after they are created. -- Any model post-hooks that use `{{ this }}` will fail using this materialization. For example: - -```yaml -models: - project-name: - post-hook: "grant select on {{ this }} to db_reader" -``` - -A useful workaround is to change the above post-hook to: - -```yaml - post-hook: "grant select on {{ this.schema }}.{{ this.name }} to db_reader" -``` +#### insert_by_period +In dbt_utils v1.0, this materialization moved to the [experimental features repository](https://github.com/dbt-labs/dbt-labs-experimental-features/tree/main/insert_by_period). ---- ### Reporting bugs and contributing code -- Want to report a bug or request a feature? Let us know in the `#package-ecosystem` channel on [Slack](http://community.getdbt.com/), or open [an issue](https://github.com/dbt-labs/dbt-utils/issues/new) +- Want to report a bug or request a feature? Let us know in the `#package-ecosystem` channel on [Slack](https://getdbt.com/community), or open [an issue](https://github.com/dbt-labs/dbt-utils/issues/new) - Want to help us build dbt-utils? Check out the [Contributing Guide](https://github.com/dbt-labs/dbt-utils/blob/main/CONTRIBUTING.md) - **TL;DR** Open a Pull Request with 1) your changes, 2) updated documentation for the `README.md` file, and 3) a working integration test. @@ -1538,11 +1369,11 @@ A useful workaround is to change the above post-hook to: - Users and maintainers of community-supported [adapter plugins](https://docs.getdbt.com/docs/available-adapters) - Users who wish to override a low-lying `dbt_utils` macro with a custom implementation, and have that implementation used by other `dbt_utils` macros -If you use Postgres, Redshift, Snowflake, or Bigquery, this likely does not apply to you. +If you use Postgres, Redshift, Snowflake, or BigQuery, this likely does not apply to you. -dbt v0.18.0 introduced [`adapter.dispatch()`](https://docs.getdbt.com/reference/dbt-jinja-functions/adapter#dispatch), a reliable way to define different implementations of the same macro across different databases. +[`adapter.dispatch()`](https://docs.getdbt.com/reference/dbt-jinja-functions/adapter#dispatch) provides a reliable way to define different implementations of the same macro across different databases. -dbt v0.20.0 introduced a new project-level `dispatch` config that enables an "override" setting for all dispatched macros. If you set this config in your project, when dbt searches for implementations of a macro in the `dbt_utils` namespace, it will search through your list of packages instead of just looking in the `dbt_utils` package. +In `dbt_project.yml`, you can define a project-level `dispatch` config that enables an "override" setting for all dispatched macros. When dbt searches for implementations of a macro in the `dbt_utils` namespace, it will search through your list of packages instead of just looking in the `dbt_utils` package. Set the config in `dbt_project.yml`: @@ -1555,17 +1386,17 @@ dispatch: - dbt_utils # always include dbt_utils as the last place to search ``` -If overriding a dispatched macro with a custom implementation in your own project's `macros/` directory, you must name your custom macro with a prefix: either `default__` (note the two underscores), or the name of your adapter followed by two underscores. For example, if you're running on Postgres and wish to override the behavior of `dbt_utils.datediff` (such that `dbt_utils.date_spine` will use your version instead), you can do this by defining a macro called either `default__datediff` or `postgres__datediff`. +If overriding a dispatched macro with a custom implementation in your own project's `macros/` directory, you must name your custom macro with a prefix: either `default__` (note the two underscores), or the name of your adapter followed by two underscores. For example, if you're running on Postgres and wish to override the behavior of `dbt_utils.safe_add` (such that other macros will use your version instead), you can do this by defining a macro called either `default__safe_add` or `postgres__safe_add`. -Let's say we have the config defined above, and we're running on Spark. When dbt goes to dispatch `dbt_utils.datediff`, it will search for macros the following in order: +Let's say we have the config defined above, and we're running on Spark. When dbt goes to dispatch `dbt_utils.safe_add`, it will search for macros the following in order: ``` -first_package_to_search.spark__datediff -first_package_to_search.default__datediff -second_package_to_search.spark__datediff -second_package_to_search.default__datediff -dbt_utils.spark__datediff -dbt_utils.default__datediff +first_package_to_search.spark__safe_add +first_package_to_search.default__safe_add +second_package_to_search.spark__safe_add +second_package_to_search.default__safe_add +dbt_utils.spark__safe_add +dbt_utils.default__safe_add ``` ---- @@ -1574,15 +1405,9 @@ dbt_utils.default__datediff - [What is dbt](https://docs.getdbt.com/docs/introduction)? - Read the [dbt viewpoint](https://docs.getdbt.com/docs/about/viewpoint) -- [Installation](https://docs.getdbt.com/dbt-cli/installation) +- [Installation](https://docs.getdbt.com/docs/get-started/getting-started/overview) - Join the [chat](https://www.getdbt.com/community/) on Slack for live questions and support. ## Code of Conduct -Everyone interacting in the dbt project's codebases, issue trackers, chat rooms, and mailing lists is expected to follow the [PyPA Code of Conduct]. - -[PyPA Code of Conduct]: https://www.pypa.io/en/latest/code-of-conduct/ -[slack-url]: http://ac-slackin.herokuapp.com/ -[Installation]: https://dbt.readme.io/docs/installation -[What is dbt]: https://dbt.readme.io/docs/overview -[dbt viewpoint]: https://dbt.readme.io/docs/viewpoint +Everyone interacting in the dbt project's codebases, issue trackers, chat rooms, and mailing lists is expected to follow the [PyPA Code of Conduct](https://www.pypa.io/en/latest/code-of-conduct/). \ No newline at end of file diff --git a/dbt_project.yml b/dbt_project.yml index 75610f60..b3dd3964 100644 --- a/dbt_project.yml +++ b/dbt_project.yml @@ -1,7 +1,7 @@ name: 'dbt_utils' version: '0.1.0' -require-dbt-version: [">=1.3.0-rc1", "<2.0.0"] +require-dbt-version: [">=1.3.0", "<2.0.0"] config-version: 2 diff --git a/integration_tests/data/sql/data_get_single_value.csv b/integration_tests/data/sql/data_get_single_value.csv new file mode 100644 index 00000000..a3a18139 --- /dev/null +++ b/integration_tests/data/sql/data_get_single_value.csv @@ -0,0 +1,2 @@ +date_value,float_value,int_value,string_value +2017-01-01 00:00:00,3.3,19,string_a \ No newline at end of file diff --git a/integration_tests/data/sql/data_safe_divide.csv b/integration_tests/data/sql/data_safe_divide.csv new file mode 100644 index 00000000..aec2ee1d --- /dev/null +++ b/integration_tests/data/sql/data_safe_divide.csv @@ -0,0 +1,9 @@ +numerator,denominator,output +6,0, +10,5,2 +,, +,0, +17,, +0,, +,9, +0,5,0 \ No newline at end of file diff --git a/integration_tests/data/sql/data_safe_divide_denominator_expressions.csv b/integration_tests/data/sql/data_safe_divide_denominator_expressions.csv new file mode 100644 index 00000000..3e647de1 --- /dev/null +++ b/integration_tests/data/sql/data_safe_divide_denominator_expressions.csv @@ -0,0 +1,7 @@ +numerator,denominator_1,denominator_2,output +,0,4, +6,3,2,1 +0,2,6,0 +0,,8, +5,,2, +4,0,4, \ No newline at end of file diff --git a/integration_tests/data/sql/data_safe_divide_numerator_expressions.csv b/integration_tests/data/sql/data_safe_divide_numerator_expressions.csv new file mode 100644 index 00000000..2673d9b2 --- /dev/null +++ b/integration_tests/data/sql/data_safe_divide_numerator_expressions.csv @@ -0,0 +1,7 @@ +numerator_1,numerator_2,denominator,output +0,5,9,0 +2,3,0, +0,0,0, +3,4,, +,6,14, +2,5,2,5 \ No newline at end of file diff --git a/integration_tests/data/sql/data_star_quote_identifiers.csv b/integration_tests/data/sql/data_star_quote_identifiers.csv new file mode 100644 index 00000000..0ecf1073 --- /dev/null +++ b/integration_tests/data/sql/data_star_quote_identifiers.csv @@ -0,0 +1,2 @@ +column_one +a diff --git a/integration_tests/dbt_project.yml b/integration_tests/dbt_project.yml index 61b0c5db..252b9cd7 100644 --- a/integration_tests/dbt_project.yml +++ b/integration_tests/dbt_project.yml @@ -55,6 +55,12 @@ seeds: # this.incorporate() to hardcode the node's type as otherwise dbt doesn't know it yet +post-hook: "{% do adapter.drop_relation(this.incorporate(type='table')) %}" + data_get_single_value: + +column_types: + date_value: timestamp + float_value: float + int_value: integer + data_width_bucket: +column_types: num_buckets: integer diff --git a/integration_tests/models/generic_tests/recency_time_excluded.sql b/integration_tests/models/generic_tests/recency_time_excluded.sql new file mode 100644 index 00000000..1c18e800 --- /dev/null +++ b/integration_tests/models/generic_tests/recency_time_excluded.sql @@ -0,0 +1,12 @@ +with yesterday_time as ( +select + 1 as col1, + 2 as col2, + {{ dbt.dateadd('day', -1, dbt.current_timestamp()) }} as created_at +) + +select + col1, + col2, + {{ dbt.date_trunc('day', 'created_at') }} as created_at +from yesterday_time \ No newline at end of file diff --git a/integration_tests/models/generic_tests/recency_time_included.sql b/integration_tests/models/generic_tests/recency_time_included.sql new file mode 100644 index 00000000..93fe1db1 --- /dev/null +++ b/integration_tests/models/generic_tests/recency_time_included.sql @@ -0,0 +1,4 @@ +select + 1 as col1, + 2 as col2, + cast({{ dbt.dateadd('hour', -23, dbt.current_timestamp()) }} as {{ dbt.type_timestamp() }}) as created_at diff --git a/integration_tests/models/generic_tests/schema.yml b/integration_tests/models/generic_tests/schema.yml index 1b0f7726..96a46a3b 100644 --- a/integration_tests/models/generic_tests/schema.yml +++ b/integration_tests/models/generic_tests/schema.yml @@ -23,7 +23,8 @@ seeds: expression: col_a + col_b = 1 - dbt_utils.expression_is_true: expression: col_a = 0.5 - condition: col_b = 0.5 + config: + where: col_b = 0.5 columns: - name: col_a tests: @@ -33,7 +34,8 @@ seeds: tests: - dbt_utils.expression_is_true: expression: = 0.5 - condition: col_a = 0.5 + config: + where: col_a = 0.5 - name: data_people columns: @@ -141,23 +143,38 @@ seeds: at_least: 0.9 models: - - name: test_recency + - name: recency_time_included tests: - dbt_utils.recency: datepart: day - field: today + field: created_at interval: 1 - dbt_utils.recency: datepart: day - field: today + field: created_at interval: 1 group_by_columns: ['col1'] - dbt_utils.recency: datepart: day - field: today + field: created_at interval: 1 group_by_columns: ['col1', 'col2'] + - name: recency_time_excluded + tests: + - dbt_utils.recency: + datepart: day + field: created_at + interval: 1 + ignore_time_component: true + - dbt_utils.recency: + datepart: day + field: created_at + interval: 1 + ignore_time_component: false + error_if: "<1" #sneaky way to ensure that the test is returning failing rows + warn_if: "<0" + - name: test_equal_rowcount tests: - dbt_utils.equal_rowcount: @@ -181,4 +198,4 @@ models: compare_model: ref('data_test_fewer_rows_than_table_2') - dbt_utils.fewer_rows_than: compare_model: ref('data_test_fewer_rows_than_table_2') - group_by_columns: ['col_a'] \ No newline at end of file + group_by_columns: ['col_a'] diff --git a/integration_tests/models/generic_tests/test_recency.sql b/integration_tests/models/generic_tests/test_recency.sql deleted file mode 100644 index abede162..00000000 --- a/integration_tests/models/generic_tests/test_recency.sql +++ /dev/null @@ -1,16 +0,0 @@ - -{% if target.type == 'postgres' %} - -select - 1 as col1, - 2 as col2, - {{ date_trunc('day', current_timestamp()) }} as today - -{% else %} - -select - 1 as col1, - 2 as col2, - cast({{ date_trunc('day', current_timestamp()) }} as datetime) as today - -{% endif %} \ No newline at end of file diff --git a/integration_tests/models/sql/schema.yml b/integration_tests/models/sql/schema.yml index d46d9283..fab8a20b 100644 --- a/integration_tests/models/sql/schema.yml +++ b/integration_tests/models/sql/schema.yml @@ -1,6 +1,36 @@ version: 2 models: + - name: test_get_single_value + tests: + - assert_equal: + actual: date_actual + expected: date_expected + - assert_equal: + actual: float_actual + expected: float_expected + - assert_equal: + actual: int_actual + expected: int_expected + - assert_equal: + actual: string_actual + expected: string_expected + + - name: test_get_single_value_default + tests: + - assert_equal: + actual: date_actual + expected: date_expected + - assert_equal: + actual: float_actual + expected: float_expected + - assert_equal: + actual: int_actual + expected: int_expected + - assert_equal: + actual: string_actual + expected: string_expected + - name: test_generate_series tests: - dbt_utils.equality: @@ -91,6 +121,12 @@ models: actual: actual expected: expected + - name: test_safe_divide + tests: + - assert_equal: + actual: actual + expected: expected + - name: test_pivot tests: - dbt_utils.equality: @@ -121,6 +157,12 @@ models: - dbt_utils.equality: compare_model: ref('data_star_expected') + - name: test_star_quote_identifiers + tests: + - assert_equal: + actual: actual + expected: expected + - name: test_star_prefix_suffix tests: - dbt_utils.equality: @@ -136,6 +178,12 @@ models: - dbt_utils.equality: compare_model: ref('data_star_expected') + - name: test_star_no_columns + columns: + - name: canary_column #If the no-columns state isn't hit, this table won't be queryable because there will be a missing comma + tests: + - not_null + - name: test_generate_surrogate_key tests: - assert_equal: diff --git a/integration_tests/models/sql/test_get_single_value.sql b/integration_tests/models/sql/test_get_single_value.sql new file mode 100644 index 00000000..90d3aff1 --- /dev/null +++ b/integration_tests/models/sql/test_get_single_value.sql @@ -0,0 +1,42 @@ +{# + Dear future reader, + Before you go restructuring the delicate web of casts and quotes below, a warning: + I once thought as you are thinking. Proceed with caution. +#} + +{% set date_statement %} + select date_value from {{ ref('data_get_single_value') }} +{% endset %} + +{% set float_statement %} + select float_value from {{ ref('data_get_single_value') }} +{% endset %} + +{% set int_statement %} + select int_value from {{ ref('data_get_single_value') }} +{% endset %} + +{% set string_statement %} + select string_value from {{ ref('data_get_single_value') }} +{% endset %} + +with default_data as ( + + select + cast(date_value as {{ dbt.type_timestamp() }}) as date_expected, + cast({{ dbt.string_literal(dbt_utils.get_single_value(date_statement)) }} as {{ dbt.type_timestamp() }}) as date_actual, + + float_value as float_expected, + {{ dbt_utils.get_single_value(float_statement) }} as float_actual, + + int_value as int_expected, + {{ dbt_utils.get_single_value(int_statement) }} as int_actual, + + string_value as string_expected, + cast({{ dbt.string_literal(dbt_utils.get_single_value(string_statement)) }} as {{ dbt.type_string() }}) as string_actual + + from {{ ref('data_get_single_value') }} +) + +select * +from default_data \ No newline at end of file diff --git a/integration_tests/models/sql/test_get_single_value_default.sql b/integration_tests/models/sql/test_get_single_value_default.sql new file mode 100644 index 00000000..7b049025 --- /dev/null +++ b/integration_tests/models/sql/test_get_single_value_default.sql @@ -0,0 +1,28 @@ +{# + Dear future reader, + Before you go restructuring the delicate web of casts and quotes below, a warning: + I once thought as you are thinking. Proceed with caution. +#} + +{% set false_statement = 'select 1 as id ' ~ limit_zero() %} + +with default_data as ( + + select + cast({{ dbt.string_literal('2022-01-01') }} as {{ dbt.type_timestamp() }}) as date_expected, + cast({{ dbt.string_literal(dbt_utils.get_single_value(false_statement, '2022-01-01')) }} as {{ dbt.type_timestamp() }}) as date_actual, + + 1.23456 as float_expected, + {{ dbt_utils.get_single_value(false_statement, 1.23456) }} as float_actual, + + 123456 as int_expected, + {{ dbt_utils.get_single_value(false_statement, 123456) }} as int_actual, + + cast({{ dbt.string_literal('fallback') }} as {{ dbt.type_string() }}) as string_expected, + cast({{ dbt.string_literal(dbt_utils.get_single_value(false_statement, 'fallback')) }} as {{ dbt.type_string() }}) as string_actual + + from {{ ref('data_get_single_value') }} +) + +select * +from default_data \ No newline at end of file diff --git a/integration_tests/models/sql/test_safe_divide.sql b/integration_tests/models/sql/test_safe_divide.sql new file mode 100644 index 00000000..34624787 --- /dev/null +++ b/integration_tests/models/sql/test_safe_divide.sql @@ -0,0 +1,38 @@ + +with data_safe_divide as ( + + select * from {{ ref('data_safe_divide') }} + +), + +data_safe_divide_numerator_expressions as ( + + select * from {{ ref('data_safe_divide_numerator_expressions') }} +), + +data_safe_divide_denominator_expressions as ( + + select * from {{ ref('data_safe_divide_denominator_expressions') }} +) + +select + {{ dbt_utils.safe_divide('numerator', 'denominator') }} as actual, + output as expected + +from data_safe_divide + +union all + +select + {{ dbt_utils.safe_divide('numerator_1 * numerator_2', 'denominator') }} as actual, + output as expected + +from data_safe_divide_numerator_expressions + +union all + +select + {{ dbt_utils.safe_divide('numerator', 'denominator_1 * denominator_2') }} as actual, + output as expected + +from data_safe_divide_denominator_expressions \ No newline at end of file diff --git a/integration_tests/models/sql/test_star_no_columns.sql b/integration_tests/models/sql/test_star_no_columns.sql new file mode 100644 index 00000000..ff5a5de9 --- /dev/null +++ b/integration_tests/models/sql/test_star_no_columns.sql @@ -0,0 +1,11 @@ +with data as ( + + select + {{ dbt_utils.star(from=ref('data_star'), except=['field_1', 'field_2', 'field_3']) }} + -- if star() returns `*` or a list of columns, this query will fail because there's no comma between the columns + 1 as canary_column + from {{ ref('data_star') }} + +) + +select * from data diff --git a/integration_tests/models/sql/test_star_quote_identifiers.sql b/integration_tests/models/sql/test_star_quote_identifiers.sql new file mode 100644 index 00000000..180d5bca --- /dev/null +++ b/integration_tests/models/sql/test_star_quote_identifiers.sql @@ -0,0 +1,9 @@ +select + {{ dbt.string_literal(adapter.quote("column_one")) | lower }} as expected, + {{ dbt.string_literal(dbt_utils.star(from=ref('data_star_quote_identifiers'), quote_identifiers=True)) | trim | lower }} as actual + +union all + +select + {{ dbt.string_literal("column_one") | lower }} as expected, + {{ dbt.string_literal(dbt_utils.star(from=ref('data_star_quote_identifiers'), quote_identifiers=False)) | trim | lower }} as actual \ No newline at end of file diff --git a/integration_tests/tests/jinja_helpers/test_slugify.sql b/integration_tests/tests/jinja_helpers/test_slugify.sql index c0839f59..7d07ec44 100644 --- a/integration_tests/tests/jinja_helpers/test_slugify.sql +++ b/integration_tests/tests/jinja_helpers/test_slugify.sql @@ -1,7 +1,9 @@ -{% if dbt_utils.slugify('!Hell0 world-hi') == 'hell0_world_hi' %} - {# Return 0 rows for the test to pass #} - select 1 as col_name {{ limit_zero() }} -{% else %} - {# Return >0 rows for the test to fail #} - select 1 as col_name -{% endif %} +with comparisons as ( + select '{{ dbt_utils.slugify("!Hell0 world-hi") }}' as output, 'hell0_world_hi' as expected + union all + select '{{ dbt_utils.slugify("0Hell0 world-hi") }}' as output, '_0hell0_world_hi' as expected +) + +select * +from comparisons +where output != expected \ No newline at end of file diff --git a/integration_tests/tests/sql/test_get_single_value_multiple_rows.sql b/integration_tests/tests/sql/test_get_single_value_multiple_rows.sql new file mode 100644 index 00000000..fc4c9270 --- /dev/null +++ b/integration_tests/tests/sql/test_get_single_value_multiple_rows.sql @@ -0,0 +1,18 @@ +{% set query %} +with input as ( + select 1 as id, 4 as di + union all + select 2 as id, 5 as di + union all + select 3 as id, 6 as di +) +{% endset %} + +with comparisons as ( + select {{ dbt_utils.get_single_value(query ~ " select min(id) from input") }} as output, 1 as expected + union all + select {{ dbt_utils.get_single_value(query ~ " select max(di) from input") }} as output, 6 as expected +) +select * +from comparisons +where output != expected \ No newline at end of file diff --git a/macros/cross_db_utils/deprecated/xdb_deprecation_warning.sql b/macros/cross_db_utils/deprecated/xdb_deprecation_warning.sql deleted file mode 100644 index a66f1015..00000000 --- a/macros/cross_db_utils/deprecated/xdb_deprecation_warning.sql +++ /dev/null @@ -1,4 +0,0 @@ -{% macro xdb_deprecation_warning_without_replacement(macro, package, model) %} - {%- set error_message = "Warning: the `" ~ macro ~"` macro is deprecated and will be removed in a future version of the package, once equivalent functionality is implemented in dbt Core. The " ~ package ~ "." ~ model ~ " model triggered this warning." -%} - {%- do exceptions.warn(error_message) -%} -{% endmacro %} \ No newline at end of file diff --git a/macros/generic_tests/cardinality_equality.sql b/macros/generic_tests/cardinality_equality.sql index 5aa95efd..824f0e51 100644 --- a/macros/generic_tests/cardinality_equality.sql +++ b/macros/generic_tests/cardinality_equality.sql @@ -26,7 +26,7 @@ group by {{ field }} except_a as ( select * from table_a - {{ except() }} + {{ dbt.except() }} select * from table_b ), @@ -34,7 +34,7 @@ except_a as ( except_b as ( select * from table_b - {{ except() }} + {{ dbt.except() }} select * from table_a ), diff --git a/macros/generic_tests/equality.sql b/macros/generic_tests/equality.sql index f2128af5..ffc6a2b8 100644 --- a/macros/generic_tests/equality.sql +++ b/macros/generic_tests/equality.sql @@ -49,7 +49,7 @@ b as ( a_minus_b as ( select {{compare_cols_csv}} from a - {{ except() }} + {{ dbt.except() }} select {{compare_cols_csv}} from b ), @@ -57,7 +57,7 @@ a_minus_b as ( b_minus_a as ( select {{compare_cols_csv}} from b - {{ except() }} + {{ dbt.except() }} select {{compare_cols_csv}} from a ), diff --git a/macros/generic_tests/expression_is_true.sql b/macros/generic_tests/expression_is_true.sql index b8aa7ad4..2c72b8a5 100644 --- a/macros/generic_tests/expression_is_true.sql +++ b/macros/generic_tests/expression_is_true.sql @@ -1,20 +1,14 @@ -{% test expression_is_true(model, expression, column_name=None, condition='1=1') %} -{# T-SQL has no boolean data type so we use 1=1 which returns TRUE #} -{# ref https://stackoverflow.com/a/7170753/3842610 #} - {{ return(adapter.dispatch('test_expression_is_true', 'dbt_utils')(model, expression, column_name, condition)) }} +{% test expression_is_true(model, expression, column_name=None) %} + {{ return(adapter.dispatch('test_expression_is_true', 'dbt_utils')(model, expression, column_name)) }} {% endtest %} -{% macro default__test_expression_is_true(model, expression, column_name, condition) %} +{% macro default__test_expression_is_true(model, expression, column_name) %} {% set column_list = '*' if should_store_failures() else "1" %} -with meet_condition as ( - select * from {{ model }} where {{ condition }} -) - select {{ column_list }} -from meet_condition +from {{ model }} {% if column_name is none %} where not({{ expression }}) {%- else %} diff --git a/macros/generic_tests/recency.sql b/macros/generic_tests/recency.sql index 98ed2ad5..7fe2cafd 100644 --- a/macros/generic_tests/recency.sql +++ b/macros/generic_tests/recency.sql @@ -1,10 +1,11 @@ -{% test recency(model, field, datepart, interval, group_by_columns = []) %} - {{ return(adapter.dispatch('test_recency', 'dbt_utils')(model, field, datepart, interval, group_by_columns)) }} +{% test recency(model, field, datepart, interval, ignore_time_component=False, group_by_columns = []) %} + {{ return(adapter.dispatch('test_recency', 'dbt_utils')(model, field, datepart, interval, ignore_time_component, group_by_columns)) }} {% endtest %} -{% macro default__test_recency(model, field, datepart, interval, group_by_columns) %} +{% macro default__test_recency(model, field, datepart, interval, ignore_time_component, group_by_columns) %} + +{% set threshold = 'cast(' ~ dbt.dateadd(datepart, interval * -1, dbt.current_timestamp()) ~ ' as ' ~ ('date' if ignore_time_component else dbt.type_timestamp()) ~ ')' %} -{% set threshold = dateadd(datepart, interval * -1, current_timestamp()) %} {% if group_by_columns|length() > 0 %} {% set select_gb_cols = group_by_columns|join(' ,') + ', ' %} {% set groupby_gb_cols = 'group by ' + group_by_columns|join(',') %} @@ -16,7 +17,11 @@ with recency as ( select {{ select_gb_cols }} - max({{field}}) as most_recent + {% if ignore_time_component %} + cast(max({{ field }}) as date) as most_recent + {%- else %} + max({{ field }}) as most_recent + {%- endif %} from {{ model }} diff --git a/macros/generic_tests/sequential_values.sql b/macros/generic_tests/sequential_values.sql index 619ee10e..736ccbd8 100644 --- a/macros/generic_tests/sequential_values.sql +++ b/macros/generic_tests/sequential_values.sql @@ -30,7 +30,7 @@ validation_errors as ( * from windowed {% if datepart %} - where not(cast({{ column_name }} as {{ type_timestamp() }})= cast({{ dateadd(datepart, interval, previous_column_name) }} as {{ type_timestamp() }})) + where not(cast({{ column_name }} as {{ dbt.type_timestamp() }})= cast({{ dbt.dateadd(datepart, interval, previous_column_name) }} as {{ dbt.type_timestamp() }})) {% else %} where not({{ column_name }} = {{ previous_column_name }} + {{ interval }}) {% endif %} diff --git a/macros/jinja_helpers/slugify.sql b/macros/jinja_helpers/slugify.sql index 1b3c7272..14c0c74c 100644 --- a/macros/jinja_helpers/slugify.sql +++ b/macros/jinja_helpers/slugify.sql @@ -6,7 +6,9 @@ {% set string = modules.re.sub('[ -]+', '_', string) %} {#- Only take letters, numbers, and underscores -#} {% set string = modules.re.sub('[^a-z0-9_]+', '', string) %} +{#- Prepends "_" if string begins with a number -#} +{% set string = modules.re.sub('^[0-9]', '_' + string[0], string) %} {{ return(string) }} -{% endmacro %} +{% endmacro %} \ No newline at end of file diff --git a/macros/sql/date_spine.sql b/macros/sql/date_spine.sql index 5b4e8be0..43dfafa9 100644 --- a/macros/sql/date_spine.sql +++ b/macros/sql/date_spine.sql @@ -5,7 +5,7 @@ {% macro default__get_intervals_between(start_date, end_date, datepart) -%} {%- call statement('get_intervals_between', fetch_result=True) %} - select {{ datediff(start_date, end_date, datepart) }} + select {{ dbt.datediff(start_date, end_date, datepart) }} {%- endcall -%} @@ -35,7 +35,7 @@ date_spine( "day", "to_date('01/01/2016', 'mm/dd/yyyy')", - "dateadd(week, 1, current_date)" + "dbt.dateadd(week, 1, current_date)" ) #} @@ -51,7 +51,7 @@ all_periods as ( select ( {{ - dateadd( + dbt.dateadd( datepart, "row_number() over (order by 1) - 1", start_date diff --git a/macros/sql/generate_surrogate_key.sql b/macros/sql/generate_surrogate_key.sql index 52165f6f..4de857e2 100644 --- a/macros/sql/generate_surrogate_key.sql +++ b/macros/sql/generate_surrogate_key.sql @@ -15,7 +15,7 @@ {%- for field in field_list -%} {%- do fields.append( - "coalesce(cast(" ~ field ~ " as " ~ type_string() ~ "), '" ~ default_null_value ~"')" + "coalesce(cast(" ~ field ~ " as " ~ dbt.type_string() ~ "), '" ~ default_null_value ~"')" ) -%} {%- if not loop.last %} @@ -24,6 +24,6 @@ {%- endfor -%} -{{ hash(concat(fields)) }} +{{ dbt.hash(dbt.concat(fields)) }} {%- endmacro -%} diff --git a/macros/sql/get_single_value.sql b/macros/sql/get_single_value.sql new file mode 100644 index 00000000..479a45f1 --- /dev/null +++ b/macros/sql/get_single_value.sql @@ -0,0 +1,33 @@ +{% macro get_single_value(query, default=none) %} + {{ return(adapter.dispatch('get_single_value', 'dbt_utils')(query, default)) }} +{% endmacro %} + +{% macro default__get_single_value(query, default) %} + +{# This macro returns the (0, 0) record in a query, i.e. the first row of the first column #} + + {%- call statement('get_query_result', fetch_result=True, auto_begin=false) -%} + + {{ query }} + + {%- endcall -%} + + {%- if execute -%} + + {% set r = load_result('get_query_result').table.columns[0].values() %} + {% if r | length == 0 %} + {% do print('Query `' ~ query ~ '` returned no rows. Using the default value: ' ~ default) %} + {% set sql_result = default %} + {% else %} + {% set sql_result = r[0] %} + {% endif %} + + {%- else -%} + + {% set sql_result = default %} + + {%- endif -%} + + {% do return(sql_result) %} + +{% endmacro %} \ No newline at end of file diff --git a/macros/sql/pivot.sql b/macros/sql/pivot.sql index 4233ba9d..3eabc727 100644 --- a/macros/sql/pivot.sql +++ b/macros/sql/pivot.sql @@ -69,7 +69,7 @@ Arguments: {{ agg }}( {% if distinct %} distinct {% endif %} case - when {{ column }} {{ cmp }} '{{ escape_single_quotes(value) }}' + when {{ column }} {{ cmp }} '{{ dbt.escape_single_quotes(value) }}' then {{ then_value }} else {{ else_value }} end diff --git a/macros/sql/safe_divide.sql b/macros/sql/safe_divide.sql new file mode 100644 index 00000000..791f72c3 --- /dev/null +++ b/macros/sql/safe_divide.sql @@ -0,0 +1,7 @@ +{% macro safe_divide(numerator, denominator) -%} + {{ return(adapter.dispatch('safe_divide', 'dbt_utils')(numerator, denominator)) }} +{%- endmacro %} + +{% macro default__safe_divide(numerator, denominator) %} + ( {{ numerator }} ) / nullif( ( {{ denominator }} ), 0) +{% endmacro %} \ No newline at end of file diff --git a/macros/sql/star.sql b/macros/sql/star.sql index 99787e22..515b77b6 100644 --- a/macros/sql/star.sql +++ b/macros/sql/star.sql @@ -1,24 +1,40 @@ -{% macro star(from, relation_alias=False, except=[], prefix='', suffix='') -%} - {{ return(adapter.dispatch('star', 'dbt_utils')(from, relation_alias, except, prefix, suffix)) }} -{% endmacro %} - -{% macro default__star(from, relation_alias=False, except=[], prefix='', suffix='') -%} - {%- do dbt_utils._is_relation(from, 'star') -%} - {%- do dbt_utils._is_ephemeral(from, 'star') -%} - - {#-- Prevent querying of db in parsing mode. This works because this macro does not create any new refs. #} - {%- if not execute -%} - {{ return('*') }} - {%- endif -%} - - {% set cols = dbt_utils.get_filtered_columns_in_relation(from, except) %} - - {%- if cols|length <= 0 -%} - {{- return('*') -}} - {%- else -%} - {%- for col in cols %} - {%- if relation_alias %}{{ relation_alias }}.{% else %}{%- endif -%}{{ adapter.quote(col)|trim }} {%- if prefix!='' or suffix!='' %} as {{ adapter.quote(prefix ~ col ~ suffix)|trim }} {%- endif -%} - {%- if not loop.last %},{{ '\n ' }}{% endif %} - {%- endfor -%} - {% endif %} -{%- endmacro %} +{% macro star(from, relation_alias=False, except=[], prefix='', suffix='', quote_identifiers=True) -%} + {{ return(adapter.dispatch('star', 'dbt_utils')(from, relation_alias, except, prefix, suffix, quote_identifiers)) }} +{% endmacro %} + +{% macro default__star(from, relation_alias=False, except=[], prefix='', suffix='', quote_identifiers=True) -%} + {%- do dbt_utils._is_relation(from, 'star') -%} + {%- do dbt_utils._is_ephemeral(from, 'star') -%} + + {#-- Prevent querying of db in parsing mode. This works because this macro does not create any new refs. #} + {%- if not execute -%} + {% do return('*') %} + {%- endif -%} + + {% set cols = dbt_utils.get_filtered_columns_in_relation(from, except) %} + + {%- if cols|length <= 0 -%} + {% if flags.WHICH == 'compile' %} + {% set response %} +* +/* No columns were returned. Maybe the relation doesn't exist yet +or all columns were excluded. This star is only output during +dbt compile, and exists to keep SQLFluff happy. */ + {% endset %} + {% do return(response) %} + {% else %} + {% do return("/* no columns returned from star() macro */") %} + {% endif %} + {%- else -%} + {%- for col in cols %} + {%- if relation_alias %}{{ relation_alias }}.{% else %}{%- endif -%} + {%- if quote_identifiers -%} + {{ adapter.quote(col)|trim }} {%- if prefix!='' or suffix!='' %} as {{ adapter.quote(prefix ~ col ~ suffix)|trim }} {%- endif -%} + {%- else -%} + {{ col|trim }} {%- if prefix!='' or suffix!='' %} as {{ (prefix ~ col ~ suffix)|trim }} {%- endif -%} + {% endif %} + {%- if not loop.last %},{{ '\n ' }}{%- endif -%} + {%- endfor -%} + {% endif %} +{%- endmacro %} + diff --git a/macros/sql/surrogate_key.sql b/macros/sql/surrogate_key.sql index 65c67e66..98e968ae 100644 --- a/macros/sql/surrogate_key.sql +++ b/macros/sql/surrogate_key.sql @@ -9,7 +9,7 @@ Warning: `dbt_utils.surrogate_key` has been replaced by \ `dbt_utils.generate_surrogate_key`. The new macro treats null values \ differently to empty strings. To restore the behaviour of the original \ -macro, add a variable scoped to the dbt_utils package called \ +macro, add a global variable in dbt_project.yml called \ `surrogate_key_treat_nulls_as_empty_strings` to your \ dbt_project.yml file with a value of True. \ The {}.{} model triggered this warning. \ diff --git a/macros/sql/union.sql b/macros/sql/union.sql index 3a906e1c..ac289e2f 100644 --- a/macros/sql/union.sql +++ b/macros/sql/union.sql @@ -100,7 +100,7 @@ select {%- if source_column_name is not none %} - cast({{ string_literal(relation) }} as {{ type_string() }}) as {{ source_column_name }}, + cast({{ dbt.string_literal(relation) }} as {{ dbt.type_string() }}) as {{ source_column_name }}, {%- endif %} {% for col_name in ordered_column_names -%} diff --git a/macros/sql/unpivot.sql b/macros/sql/unpivot.sql index b88a6191..371b314b 100644 --- a/macros/sql/unpivot.sql +++ b/macros/sql/unpivot.sql @@ -48,9 +48,9 @@ Arguments: {{ exclude_col }}, {%- endfor %} - cast('{{ col.column }}' as {{ type_string() }}) as {{ field_name }}, + cast('{{ col.column }}' as {{ dbt.type_string() }}) as {{ field_name }}, cast( {% if col.data_type == 'boolean' %} - {{ cast_bool_to_text(col.column) }} + {{ dbt.cast_bool_to_text(col.column) }} {% else %} {{ col.column }} {% endif %} diff --git a/macros/sql/width_bucket.sql b/macros/sql/width_bucket.sql index 9a3b3d11..324e8594 100644 --- a/macros/sql/width_bucket.sql +++ b/macros/sql/width_bucket.sql @@ -13,8 +13,8 @@ case when mod( - {{ dbt.safe_cast(expr, type_numeric() ) }}, - {{ dbt.safe_cast(bin_size, type_numeric() ) }} + {{ dbt.safe_cast(expr, dbt.type_numeric() ) }}, + {{ dbt.safe_cast(bin_size, dbt.type_numeric() ) }} ) = 0 then 1 else 0 @@ -38,8 +38,8 @@ -- to break ties when the amount is exactly at the bucket edge case when - {{ dbt.safe_cast(expr, type_numeric() ) }} % - {{ dbt.safe_cast(bin_size, type_numeric() ) }} + {{ dbt.safe_cast(expr, dbt.type_numeric() ) }} % + {{ dbt.safe_cast(bin_size, dbt.type_numeric() ) }} = 0 then 1 else 0 diff --git a/macros/web/get_url_host.sql b/macros/web/get_url_host.sql index d78dff1a..ddd01974 100644 --- a/macros/web/get_url_host.sql +++ b/macros/web/get_url_host.sql @@ -5,11 +5,11 @@ {% macro default__get_url_host(field) -%} {%- set parsed = - split_part( - split_part( - replace( - replace( - replace(field, "'android-app://'", "''" + dbt.split_part( + dbt.split_part( + dbt.replace( + dbt.replace( + dbt.replace(field, "'android-app://'", "''" ), "'http://'", "''" ), "'https://'", "''" ), "'/'", 1 @@ -21,7 +21,7 @@ {{ dbt.safe_cast( parsed, - type_string() + dbt.type_string() )}} {%- endmacro %} diff --git a/macros/web/get_url_parameter.sql b/macros/web/get_url_parameter.sql index fb92ad97..8147b41f 100644 --- a/macros/web/get_url_parameter.sql +++ b/macros/web/get_url_parameter.sql @@ -6,7 +6,7 @@ {%- set formatted_url_parameter = "'" + url_parameter + "='" -%} -{%- set split = split_part(split_part(field, formatted_url_parameter, 2), "'&'", 1) -%} +{%- set split = dbt.split_part(dbt.split_part(field, formatted_url_parameter, 2), "'&'", 1) -%} nullif({{ split }},'') diff --git a/macros/web/get_url_path.sql b/macros/web/get_url_path.sql index cf18f386..b59401df 100644 --- a/macros/web/get_url_path.sql +++ b/macros/web/get_url_path.sql @@ -5,30 +5,30 @@ {% macro default__get_url_path(field) -%} {%- set stripped_url = - replace( - replace(field, "'http://'", "''"), "'https://'", "''") + dbt.replace( + dbt.replace(field, "'http://'", "''"), "'https://'", "''") -%} {%- set first_slash_pos -%} coalesce( - nullif({{ position("'/'", stripped_url) }}, 0), - {{ position("'?'", stripped_url) }} - 1 + nullif({{ dbt.position("'/'", stripped_url) }}, 0), + {{ dbt.position("'?'", stripped_url) }} - 1 ) {%- endset -%} {%- set parsed_path = - split_part( - right( + dbt.split_part( + dbt.right( stripped_url, - length(stripped_url) ~ "-" ~ first_slash_pos + dbt.length(stripped_url) ~ "-" ~ first_slash_pos ), "'?'", 1 ) -%} - {{ safe_cast( + {{ dbt.safe_cast( parsed_path, - type_string() + dbt.type_string() )}} {%- endmacro %}