diff --git a/synapse_data_warehouse/synapse/dynamic_tables/V2.36.4__file_latest_dynamic_table.sql b/synapse_data_warehouse/synapse/dynamic_tables/V2.36.4__file_latest_dynamic_table.sql new file mode 100644 index 00000000..344a9ad0 --- /dev/null +++ b/synapse_data_warehouse/synapse/dynamic_tables/V2.36.4__file_latest_dynamic_table.sql @@ -0,0 +1,41 @@ +-- Introduce the dynamic table +USE SCHEMA {{database_name}}.synapse; --noqa: JJ01,PRS,TMP +CREATE OR REPLACE DYNAMIC TABLE FILE_LATEST + TARGET_LAG = '1 day' + WAREHOUSE = compute_xsmall + AS + WITH dedup_filesnapshots AS ( + SELECT + * + FROM {{database_name}}.SYNAPSE_RAW.FILESNAPSHOTS --noqa: TMP + WHERE + SNAPSHOT_DATE >= CURRENT_TIMESTAMP - INTERVAL '30 days' + QUALIFY + ROW_NUMBER() OVER ( + PARTITION BY ID + ORDER BY CHANGE_TIMESTAMP DESC, SNAPSHOT_TIMESTAMP DESC + ) = 1 + ) + SELECT + CHANGE_TYPE, + CHANGE_TIMESTAMP, + CHANGE_USER_ID, + SNAPSHOT_TIMESTAMP, + ID, + CREATED_BY, + CREATED_ON, + MODIFIED_ON, + CONCRETE_TYPE, + CONTENT_MD5, + CONTENT_TYPE, + FILE_NAME, + STORAGE_LOCATION_ID, + CONTENT_SIZE, + BUCKET, + KEY, + PREVIEW_ID, + IS_PREVIEW, + STATUS, + SNAPSHOT_DATE + FROM + dedup_filesnapshots; \ No newline at end of file diff --git a/synapse_data_warehouse/synapse/dynamic_tables/V2.36.5__add_column_comments_to_file_latest.sql b/synapse_data_warehouse/synapse/dynamic_tables/V2.36.5__add_column_comments_to_file_latest.sql new file mode 100644 index 00000000..878b9b58 --- /dev/null +++ b/synapse_data_warehouse/synapse/dynamic_tables/V2.36.5__add_column_comments_to_file_latest.sql @@ -0,0 +1,25 @@ +-- Add table and column comments to userprofile_latest dynamic table +USE SCHEMA {{database_name}}.synapse; --noqa: JJ01,PRS,TMP +-- Table comments +COMMENT ON DYNAMIC TABLE FILE_LATEST IS 'This dynamic table contains the latest snapshot of files during the past 30 days. Snapshots are taken when files are created or modified. Note: Snapshots are also taken periodically and independently of the changes. The snapshot_timestamp records when the snapshot was taken.'; +-- Column comments +COMMENT ON COLUMN FILE_LATEST.CHANGE_TYPE IS 'The type of change that occurred on the file handle, e.g., CREATE, UPDATE, DELETE.'; +COMMENT ON COLUMN FILE_LATEST.CHANGE_TIMESTAMP IS 'The time when the change (created/updated/deleted) on the file is pushed to the queue for snapshotting.'; +COMMENT ON COLUMN FILE_LATEST.CHANGE_USER_ID IS 'The unique identifier of the user who made the change to the file.'; +COMMENT ON COLUMN FILE_LATEST.SNAPSHOT_TIMESTAMP IS 'The time when the snapshot was taken (It is usually after the change happened).'; +COMMENT ON COLUMN FILE_LATEST.ID IS 'The unique identifier of the file handle.'; +COMMENT ON COLUMN FILE_LATEST.CREATED_BY IS 'The unique identifier of the user who created the file handle.'; +COMMENT ON COLUMN FILE_LATEST.CREATED_ON IS 'The creation timestamp of the file handle.'; +COMMENT ON COLUMN FILE_LATEST.MODIFIED_ON IS 'The most recent change time of the file handle.'; +COMMENT ON COLUMN FILE_LATEST.CONCRETE_TYPE IS 'The type of the file handle. Allowed file handles are: S3FileHandle, ProxyFileHandle, ExternalFileHandle, ExternalObjectStoreFileHandle, GoogleCloudFileHandle.'; +COMMENT ON COLUMN FILE_LATEST.CONTENT_MD5 IS 'The md5 hash (using MD5 algorithm) of the file referenced by the file handle.'; +COMMENT ON COLUMN FILE_LATEST.CONTENT_TYPE IS 'Metadata about the content of the file, e.g., application/json, application/zip, application/octet-stream.'; +COMMENT ON COLUMN FILE_LATEST.FILE_NAME IS 'The name of the file referenced by the file handle.'; +COMMENT ON COLUMN FILE_LATEST.STORAGE_LOCATION_ID IS 'The identifier of the environment, where the physical files are stored.'; +COMMENT ON COLUMN FILE_LATEST.CONTENT_SIZE IS 'The size of the file referenced by the file handle.'; +COMMENT ON COLUMN FILE_LATEST.BUCKET IS 'The bucket where the file is physically stored. Applicable for s3 and GCP, otherwise empty.'; +COMMENT ON COLUMN FILE_LATEST.KEY IS 'The key name uniquely identifies the object (file) in the bucket.'; +COMMENT ON COLUMN FILE_LATEST.PREVIEW_ID IS 'The identifier of the file handle that contains a preview of the file referenced by this file handle.'; +COMMENT ON COLUMN FILE_LATEST.IS_PREVIEW IS 'If true, the file referenced by this file handle is a preview of another file.'; +COMMENT ON COLUMN FILE_LATEST.STATUS IS 'The availability status of the file referenced by the file handle. AVAILABLE: accessible via Synapse; UNLINKED: not referenced by Synapse and therefore available for garbage collection; ARCHIVED: the file has been garbage collected.'; +COMMENT ON COLUMN FILE_LATEST.SNAPSHOT_DATE IS 'The data is partitioned for fast and cost effective queries. The snapshot_timestamp field is converted into a date and stored in the snapshot_date field for partitioning. The date should be used as a condition (WHERE CLAUSE) in the queries.'; \ No newline at end of file diff --git a/synapse_data_warehouse/synapse/tables/V2.36.0__add_file_latest_backup.sql b/synapse_data_warehouse/synapse/tables/V2.36.0__add_file_latest_backup.sql new file mode 100644 index 00000000..8b087a29 --- /dev/null +++ b/synapse_data_warehouse/synapse/tables/V2.36.0__add_file_latest_backup.sql @@ -0,0 +1,4 @@ +-- Backup the original latest table +USE SCHEMA {{database_name}}.synapse; --noqa: JJ01,PRS,TMP +-- Clone the FILE_LATEST table to ``FILE_LATEST_BACKUP`` for validation purposes +CREATE TABLE IF NOT EXISTS FILE_LATEST_BACKUP CLONE FILE_LATEST; \ No newline at end of file diff --git a/synapse_data_warehouse/synapse/tables/V2.36.3__drop_file_latest.sql b/synapse_data_warehouse/synapse/tables/V2.36.3__drop_file_latest.sql new file mode 100644 index 00000000..b9027ab2 --- /dev/null +++ b/synapse_data_warehouse/synapse/tables/V2.36.3__drop_file_latest.sql @@ -0,0 +1,3 @@ +-- Drop FILE_LATEST table +USE SCHEMA {{database_name}}.synapse; +DROP TABLE IF EXISTS FILE_LATEST; \ No newline at end of file diff --git a/synapse_data_warehouse/synapse_raw/streams/V2.36.2__drop_filesnapshots_stream.sql b/synapse_data_warehouse/synapse_raw/streams/V2.36.2__drop_filesnapshots_stream.sql new file mode 100644 index 00000000..b5a1f177 --- /dev/null +++ b/synapse_data_warehouse/synapse_raw/streams/V2.36.2__drop_filesnapshots_stream.sql @@ -0,0 +1,3 @@ +-- Drop the snapshot stream +USE SCHEMA {{database_name}}.synapse_raw; +DROP STREAM IF EXISTS FILESNAPSHOTS_STREAM; \ No newline at end of file diff --git a/synapse_data_warehouse/synapse_raw/tasks/V2.36.1__drop_file_latest_tasks.sql b/synapse_data_warehouse/synapse_raw/tasks/V2.36.1__drop_file_latest_tasks.sql new file mode 100644 index 00000000..3b8e0484 --- /dev/null +++ b/synapse_data_warehouse/synapse_raw/tasks/V2.36.1__drop_file_latest_tasks.sql @@ -0,0 +1,10 @@ +-- Drop any scheduled tasks +USE SCHEMA {{database_name}}.synapse_raw; +-- Suspend ROOT TASK +ALTER TASK REFRESH_SYNAPSE_WAREHOUSE_S3_STAGE_TASK SUSPEND; +-- Drop UPSERT_TO_FILE_LATEST_TASK +DROP TASK IF EXISTS UPSERT_TO_FILE_LATEST_TASK; +-- Drop REMOVE_DELETE_FILES_TASK +DROP TASK IF EXISTS REMOVE_DELETE_FILES_TASK; +-- Resume the ROOT task and its child tasks +SELECT SYSTEM$TASK_DEPENDENTS_ENABLE( 'REFRESH_SYNAPSE_WAREHOUSE_S3_STAGE_TASK' ); \ No newline at end of file