Skip to content

Commit

Permalink
[SNOW-172] Convert file_latest to dynamic table (#110)
Browse files Browse the repository at this point in the history
* convert file latest to dynamic table
  • Loading branch information
danlu1 authored and philerooski committed Feb 3, 2025
1 parent 8b045b3 commit 5c7740d
Show file tree
Hide file tree
Showing 6 changed files with 86 additions and 0 deletions.
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
-- Introduce the dynamic table
USE SCHEMA {{database_name}}.synapse; --noqa: JJ01,PRS,TMP
CREATE OR REPLACE DYNAMIC TABLE FILE_LATEST
TARGET_LAG = '1 day'
WAREHOUSE = compute_xsmall
AS
WITH dedup_filesnapshots AS (
SELECT
*
FROM {{database_name}}.SYNAPSE_RAW.FILESNAPSHOTS --noqa: TMP
WHERE
SNAPSHOT_DATE >= CURRENT_TIMESTAMP - INTERVAL '30 days'
QUALIFY
ROW_NUMBER() OVER (
PARTITION BY ID
ORDER BY CHANGE_TIMESTAMP DESC, SNAPSHOT_TIMESTAMP DESC
) = 1
)
SELECT
CHANGE_TYPE,
CHANGE_TIMESTAMP,
CHANGE_USER_ID,
SNAPSHOT_TIMESTAMP,
ID,
CREATED_BY,
CREATED_ON,
MODIFIED_ON,
CONCRETE_TYPE,
CONTENT_MD5,
CONTENT_TYPE,
FILE_NAME,
STORAGE_LOCATION_ID,
CONTENT_SIZE,
BUCKET,
KEY,
PREVIEW_ID,
IS_PREVIEW,
STATUS,
SNAPSHOT_DATE
FROM
dedup_filesnapshots;
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
-- Add table and column comments to userprofile_latest dynamic table
USE SCHEMA {{database_name}}.synapse; --noqa: JJ01,PRS,TMP
-- Table comments
COMMENT ON DYNAMIC TABLE FILE_LATEST IS 'This dynamic table contains the latest snapshot of files during the past 30 days. Snapshots are taken when files are created or modified. Note: Snapshots are also taken periodically and independently of the changes. The snapshot_timestamp records when the snapshot was taken.';
-- Column comments
COMMENT ON COLUMN FILE_LATEST.CHANGE_TYPE IS 'The type of change that occurred on the file handle, e.g., CREATE, UPDATE, DELETE.';
COMMENT ON COLUMN FILE_LATEST.CHANGE_TIMESTAMP IS 'The time when the change (created/updated/deleted) on the file is pushed to the queue for snapshotting.';
COMMENT ON COLUMN FILE_LATEST.CHANGE_USER_ID IS 'The unique identifier of the user who made the change to the file.';
COMMENT ON COLUMN FILE_LATEST.SNAPSHOT_TIMESTAMP IS 'The time when the snapshot was taken (It is usually after the change happened).';
COMMENT ON COLUMN FILE_LATEST.ID IS 'The unique identifier of the file handle.';
COMMENT ON COLUMN FILE_LATEST.CREATED_BY IS 'The unique identifier of the user who created the file handle.';
COMMENT ON COLUMN FILE_LATEST.CREATED_ON IS 'The creation timestamp of the file handle.';
COMMENT ON COLUMN FILE_LATEST.MODIFIED_ON IS 'The most recent change time of the file handle.';
COMMENT ON COLUMN FILE_LATEST.CONCRETE_TYPE IS 'The type of the file handle. Allowed file handles are: S3FileHandle, ProxyFileHandle, ExternalFileHandle, ExternalObjectStoreFileHandle, GoogleCloudFileHandle.';
COMMENT ON COLUMN FILE_LATEST.CONTENT_MD5 IS 'The md5 hash (using MD5 algorithm) of the file referenced by the file handle.';
COMMENT ON COLUMN FILE_LATEST.CONTENT_TYPE IS 'Metadata about the content of the file, e.g., application/json, application/zip, application/octet-stream.';
COMMENT ON COLUMN FILE_LATEST.FILE_NAME IS 'The name of the file referenced by the file handle.';
COMMENT ON COLUMN FILE_LATEST.STORAGE_LOCATION_ID IS 'The identifier of the environment, where the physical files are stored.';
COMMENT ON COLUMN FILE_LATEST.CONTENT_SIZE IS 'The size of the file referenced by the file handle.';
COMMENT ON COLUMN FILE_LATEST.BUCKET IS 'The bucket where the file is physically stored. Applicable for s3 and GCP, otherwise empty.';
COMMENT ON COLUMN FILE_LATEST.KEY IS 'The key name uniquely identifies the object (file) in the bucket.';
COMMENT ON COLUMN FILE_LATEST.PREVIEW_ID IS 'The identifier of the file handle that contains a preview of the file referenced by this file handle.';
COMMENT ON COLUMN FILE_LATEST.IS_PREVIEW IS 'If true, the file referenced by this file handle is a preview of another file.';
COMMENT ON COLUMN FILE_LATEST.STATUS IS 'The availability status of the file referenced by the file handle. AVAILABLE: accessible via Synapse; UNLINKED: not referenced by Synapse and therefore available for garbage collection; ARCHIVED: the file has been garbage collected.';
COMMENT ON COLUMN FILE_LATEST.SNAPSHOT_DATE IS 'The data is partitioned for fast and cost effective queries. The snapshot_timestamp field is converted into a date and stored in the snapshot_date field for partitioning. The date should be used as a condition (WHERE CLAUSE) in the queries.';
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
-- Backup the original latest table
USE SCHEMA {{database_name}}.synapse; --noqa: JJ01,PRS,TMP
-- Clone the FILE_LATEST table to ``FILE_LATEST_BACKUP`` for validation purposes
CREATE TABLE IF NOT EXISTS FILE_LATEST_BACKUP CLONE FILE_LATEST;
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
-- Drop FILE_LATEST table
USE SCHEMA {{database_name}}.synapse;
DROP TABLE IF EXISTS FILE_LATEST;
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
-- Drop the snapshot stream
USE SCHEMA {{database_name}}.synapse_raw;
DROP STREAM IF EXISTS FILESNAPSHOTS_STREAM;
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
-- Drop any scheduled tasks
USE SCHEMA {{database_name}}.synapse_raw;
-- Suspend ROOT TASK
ALTER TASK REFRESH_SYNAPSE_WAREHOUSE_S3_STAGE_TASK SUSPEND;
-- Drop UPSERT_TO_FILE_LATEST_TASK
DROP TASK IF EXISTS UPSERT_TO_FILE_LATEST_TASK;
-- Drop REMOVE_DELETE_FILES_TASK
DROP TASK IF EXISTS REMOVE_DELETE_FILES_TASK;
-- Resume the ROOT task and its child tasks
SELECT SYSTEM$TASK_DEPENDENTS_ENABLE( 'REFRESH_SYNAPSE_WAREHOUSE_S3_STAGE_TASK' );

0 comments on commit 5c7740d

Please sign in to comment.