Skip to content

Commit

Permalink
Merge pull request #60 from dbt-msft/synapse_rdbms_type
Browse files Browse the repository at this point in the history
enable Azure SQL's "elastic query"-style external tables (RDBMS / cross-database)
  • Loading branch information
jtcohen6 authored Dec 3, 2020
2 parents b7e8c28 + dc442ee commit 56c3211
Show file tree
Hide file tree
Showing 10 changed files with 139 additions and 49 deletions.
23 changes: 19 additions & 4 deletions .circleci/config.yml
Original file line number Diff line number Diff line change
Expand Up @@ -58,14 +58,28 @@ jobs:
- store_artifacts:
path: ./logs

integration-sqlserver:
integration-synapse:
docker:
- image: dataders/pyodbc:1.2
steps:
- checkout
- run:
name: "Run Tests - sqlserver"
command: ./run_test.sh sqlserver
name: "Run Tests - synapse"
command: ./run_test.sh synapse
- store_artifacts:
path: ./logs

integration-azuresql:
docker:
- image: dataders/pyodbc:1.2
steps:
- checkout
- run:
name: "wait for Synapse tests to finish"
command: sleep 60
- run:
name: "Run Tests - azuresql"
command: ./run_test.sh azuresql
- store_artifacts:
path: ./logs

Expand All @@ -78,4 +92,5 @@ workflows:
- integration-snowflake
- integration-bigquery
- integration-databricks
- integration-sqlserver
- integration-synapse
- integration-azuresql
2 changes: 2 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ This package provides:
* BigQuery
* Spark
* Synapse
* Azure SQL

![sample docs](etc/sample_docs.png)

Expand Down Expand Up @@ -46,6 +47,7 @@ The macros assume that you:
- an external stage (Snowflake)
- an external schema + S3 bucket (Redshift Spectrum)
- an external data source and file format (Synapse)
- an external data source and databse-scoped credential (Azure SQL)
- a Google Cloud Storage bucket (BigQuery)
- an accessible set of files (Spark)
2. Have the appropriate permissions on to create tables using that scaffolding
Expand Down
15 changes: 14 additions & 1 deletion integration_tests/ci/sample.profiles.yml
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,7 @@ integration_tests:
token: "{{ env_var('DBT_DATABRICKS_TOKEN') }}"
schema: dbt_external_tables_integration_tests_databricks

sqlserver:
synapse:
type: sqlserver
driver: "ODBC Driver 17 for SQL Server"
port: 1433
Expand All @@ -61,3 +61,16 @@ integration_tests:
encrypt: 'yes'
trust_cert: 'yes'
threads: 1

azuresql:
type: sqlserver
driver: "ODBC Driver 17 for SQL Server"
port: 1433
host: "{{ env_var('DBT_AZURESQL_SERVER') }}"
database: "{{ env_var('DBT_AZURESQL_DB') }}"
username: "{{ env_var('DBT_AZURESQL_UID') }}"
password: "{{ env_var('DBT_AZURESQL_PWD') }}"
schema: dbt_external_tables_integration_tests_azuresql
encrypt: yes
trust_cert: yes
threads: 1
6 changes: 4 additions & 2 deletions integration_tests/dbt_project.yml
Original file line number Diff line number Diff line change
Expand Up @@ -33,8 +33,10 @@ sources:
+enabled: "{{ target.type == 'bigquery' }}"
spark_external:
+enabled: "{{ target.type == 'spark' }}"
sqlserver_external:
+enabled: "{{ target.type == 'sqlserver' }}"
synapse_external:
+enabled: "{{ target.name == 'synapse' }}"
azuresql_external:
+enabled: "{{ target.name == 'azuresql' }}"

seeds:
quote_columns: false
90 changes: 63 additions & 27 deletions integration_tests/macros/plugins/sqlserver/prep_external.sql
Original file line number Diff line number Diff line change
@@ -1,35 +1,71 @@
{% macro sqlserver__prep_external() %}

{% set external_data_source = target.schema ~ '.dbt_external_tables_testing' %}

{% set create_external_data_source %}
IF NOT EXISTS ( SELECT * FROM sys.external_data_sources WHERE name = '{{external_data_source}}' )

CREATE EXTERNAL DATA SOURCE [{{external_data_source}}] WITH (
TYPE = HADOOP,
LOCATION = 'wasbs://[email protected]'
)
{% endset %}

{% set external_file_format = target.schema ~ '.dbt_external_ff_testing' %}

{% set create_external_file_format %}
IF NOT EXISTS ( SELECT * FROM sys.external_file_formats WHERE name = '{{external_file_format}}' )

CREATE EXTERNAL FILE FORMAT [{{external_file_format}}]
WITH (
FORMAT_TYPE = DELIMITEDTEXT,
FORMAT_OPTIONS (
FIELD_TERMINATOR = N',',
FIRST_ROW = 2,
USE_TYPE_DEFAULT = True

{% if target.name == "synapse"%}

{% set create_external_data_source %}
IF NOT EXISTS ( SELECT * FROM sys.external_data_sources WHERE name = '{{external_data_source}}' )

CREATE EXTERNAL DATA SOURCE [{{external_data_source}}] WITH (
TYPE = HADOOP,
LOCATION = 'wasbs://[email protected]'
)
)
{% endset %}
{% endset %}

{% set external_file_format = target.schema ~ '.dbt_external_ff_testing' %}

{% set create_external_file_format %}
IF NOT EXISTS ( SELECT * FROM sys.external_file_formats WHERE name = '{{external_file_format}}' )

CREATE EXTERNAL FILE FORMAT [{{external_file_format}}]
WITH (
FORMAT_TYPE = DELIMITEDTEXT,
FORMAT_OPTIONS (
FIELD_TERMINATOR = N',',
FIRST_ROW = 2,
USE_TYPE_DEFAULT = True
)
)
{% endset %}

{% elif target.name == "azuresql" %}

{% set cred_name = 'synapse_reader' %}

{% set create_database_scoped_credential %}
IF NOT EXISTS ( SELECT * FROM sys.database_scoped_credentials WHERE name = '{{ cred_name }}')
CREATE DATABASE SCOPED CREDENTIAL [{{ cred_name }}] WITH
IDENTITY = '{{ env_var("DBT_SYNAPSE_UID") }}',
SECRET = '{{ env_var("DBT_SYNAPSE_PWD") }}'

{% endset %}

{% set create_external_data_source %}
IF NOT EXISTS ( SELECT * FROM sys.external_data_sources WHERE name = '{{external_data_source}}' )

CREATE EXTERNAL DATA SOURCE [{{external_data_source}}] WITH (
TYPE = RDBMS,
LOCATION = '{{ env_var("DBT_SYNAPSE_SERVER") }}',
DATABASE_NAME = '{{ env_var("DBT_SYNAPSE_DB") }}',
CREDENTIAL = [{{ cred_name }}]
)
{% endset %}

{%- endif %}


{% if target.name == "azuresql" -%}
{% do log('Creating database scoped credential ' ~ cred_name, info = true) %}
{% do run_query(create_database_scoped_credential) %}
{%- endif %}

{% do log('Creating external data source ' ~ external_data_source, info = true) %}
{% do run_query(create_external_data_source) %}
{% do log('Creating external file format ' ~ external_file_format, info = true) %}
{% do run_query(create_external_file_format) %}


{% if target.name == "synapse" -%}
{% do log('Creating external file format ' ~ external_file_format, info = true) %}
{% do run_query(create_external_file_format) %}
{%- endif %}

{% endmacro %}
14 changes: 12 additions & 2 deletions integration_tests/models/plugins/sqlserver.yml
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
version: 2

sources:
- name: sqlserver_external
- name: synapse_external
schema: "{{ target.schema }}"
loader: ADLSblob

Expand Down Expand Up @@ -42,7 +42,17 @@ sources:
# partitions: &parts-of-the-people
# - name: section
# data_type: varchar
# expression: "substr(split_part(metadata$filename, 'section=', 2), 1, 1)"
columns: *cols-of-the-people
tests: *equal-to-the-people
- name: azuresql_external
schema: "{{ target.schema }}"
loader: RDBMS cross database query
tables:
- name: people_csv_unpartitioned
external:
data_source: "{{ target.schema ~ '.dbt_external_tables_testing' }}"
schema_name: 'dbt_external_tables_integration_tests_synapse'
object_name: 'people_csv_unpartitioned'
columns: *cols-of-the-people
tests: *equal-to-the-people

Expand Down
4 changes: 1 addition & 3 deletions macros/common/stage_external_sources.sql
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,7 @@
{% set source_nodes = graph.sources.values() if graph.sources else [] %}

{% for node in source_nodes %}

{% if node.external.location %}
{% if node.external %}

{% if select %}

Expand All @@ -30,7 +29,6 @@
{% do sources_to_stage.append(node) %}

{% endif %}

{% endif %}

{% endfor %}
Expand Down
17 changes: 10 additions & 7 deletions macros/plugins/sqlserver/create_external_table.sql
Original file line number Diff line number Diff line change
Expand Up @@ -15,13 +15,16 @@
{% endfor %}
)
WITH (
{% set dict = {'DATA_SOURCE': external.data_source,
'LOCATION' : external.location,
'FILE_FORMAT' : external.file_format,
'REJECT_TYPE' : external.reject_type,
'REJECT_VALUE' : external.reject_value} -%}
{%- for key, value in dict.items() %}
{{key}} = {% if key == "LOCATION" -%} '{{value}}' {%- elif key in ["DATA_SOURCE","FILE_FORMAT"] -%} [{{value}}] {%- else -%} {{value}} {%- endif -%}
{# remove keys that are None (i.e. not defined for a given source) #}
{%- for key, value in external.items() if value is not none and key not in ['ansi_nulls', 'quoted_identifier'] -%}
{{key}} =
{%- if key in ["location", "schema_name", "object_name"] -%}
'{{value}}'
{% elif key in ["data_source","file_format"] -%}
[{{value}}]
{% else -%}
{{value}}
{%- endif -%}
{{- ',' if not loop.last -}}
{%- endfor -%}
)
Expand Down
5 changes: 4 additions & 1 deletion run_test.sh
Original file line number Diff line number Diff line change
Expand Up @@ -8,9 +8,12 @@ if [[ ! -f $VENV ]]; then
if [ $1 == 'databricks' ]
then
pip install dbt-spark[ODBC] --upgrade
elif [ $1 == 'sqlserver' ]
elif [ $1 == 'synapse' ]
then
pip install dbt-synapse --upgrade
elif [ $1 == 'azuresql' ]
then
pip install dbt-sqlserver --upgrade
else
pip install dbt --upgrade
fi
Expand Down
12 changes: 10 additions & 2 deletions sample_sources/synapse.yml
Original file line number Diff line number Diff line change
Expand Up @@ -11,14 +11,22 @@ sources:
description: |
from raw DW.
external:
data_source: SynapseContainer # External Data Source name (created prior)
# Delimited Files in Blob/Lake
# External Data Source name (created prior)
data_source: SynapseContainer # made with TYPE= 'HADOOP'
location: /marketing/Marketo/LeadActivities/ # path on above data source
file_format: CommaDelimited # External File Format name (created prior)
# External File Format name (created prior)
file_format: CommaDelimited
reject_type: VALUE
reject_value: 0
ansi_nulls: true
quoted_identifier: true

# Cross database query (i.e. RDBMS) Azure SQL ONLY
data_source: AEDW # made with TYPE= 'RDBMS'
schema_name: Business
object_name: LeadActivities

columns:
- name: id
description: unique Activity ID
Expand Down

0 comments on commit 56c3211

Please sign in to comment.