diff --git a/dwh/redshift/querytkdb.sql b/dwh/redshift/querytkdb.sql index 4c9fbbf..0f46f27 100644 --- a/dwh/redshift/querytkdb.sql +++ b/dwh/redshift/querytkdb.sql @@ -10,8 +10,7 @@ FROM ( SELECT buyerid, sum(qtysold) total_quantity FROM sales GROUP BY buyerid - ORDER BY total_quantity desc limit 10) Q, - users + ORDER BY total_quantity desc limit 10) Q, users WHERE Q.buyerid = userid ORDER BY Q.total_quantity desc; @@ -33,12 +32,12 @@ create table ext_file_upload.sales( commission decimal(8,2), saletime timestamp); ---load data from external file in s3 -COPY ext_file_upload.sales from 's3://abc123/sales_tab.txt' +--load data in redshift from external file in s3 +COPY ext_file_upload.sales FROM 's3://abc123/sales_tab.txt' -- n files 's3://abc123/sales_' credentials 'aws_iam_role=arn:aws:iam::1234567891012:role/RedshiftS3Access' -- map the role to the cluster permissions +region '' delimiter '\t' -timeformat 'MM/DD/YYYY HH:MI:SS' -region ''; +timeformat 'MM/DD/YYYY HH:MI:SS'; --validation query: get total count of sales records @@ -49,3 +48,10 @@ SELECT sum(qtysold) FROM ext_file_upload.sales s, date WHERE s.dateid = date.dateid AND caldate = '2008-01-05'; + + +-- unload data from redshift to s3 folder +UNLOAD ('select * from sales') TO 's3://abc123/unload/sales_' +iam_role 'arn:aws:iam::1234567891012:role/test-redshift-role-s3'; + + diff --git a/dwh/snowflake/snowpark_ml.py b/dwh/snowflake/snowpark_ml.py index b585a98..4f7ea42 100644 --- a/dwh/snowflake/snowpark_ml.py +++ b/dwh/snowflake/snowpark_ml.py @@ -5,7 +5,6 @@ from snowflake.snowpark import Session from snowflake.snowpark.functions import col, udf, sproc, call_udf -from snowflake.snowpark.types import StructType, StructField, DecimalType, StringType from snowflake.snowpark.types import PandasDataFrame, PandasSeries import pandas as pd