Release/v0.17.0 (#169)

* initial * feature/unstructured-data * add coalesce_cast * update filters * update and consolidate models * model revisions * restructure * documentation * remove extra comma * regen docs * formatting * update max token docs * Update CHANGELOG.md * bug/missing-sla-policies * update changelog and add integrity test * update test * update changelog, readme and tests * update test * bug/intercepted-period-joins * adjustmnt * update weeks * update weeks * add integrity test * update weeks * update changelog * bugfix/too-many-partitions (#165) * bugfix/too-many-partitions * docs regen * Update CHANGELOG.md Co-authored-by: fivetran-catfritz <[email protected]> --------- Co-authored-by: fivetran-catfritz <[email protected]> * update changelog * revert docs to main * Documentation Standard Updates (#166) * MagicBot/documentation-updates * Apply suggestions from code review * Update README.md Co-authored-by: fivetran-catfritz <[email protected]> --------- Co-authored-by: fivetran-catfritz <[email protected]> * update default max_tokens * update changelog * Apply suggestions from code review Co-authored-by: Joe Markiewicz <[email protected]> * update readme * regen docs * update yml * Apply suggestions from code review Co-authored-by: Renee Li <[email protected]> * add comments and update changelog * update changelog * Update packages.yml --------- Co-authored-by: Renee Li <[email protected]> Co-authored-by: Joe Markiewicz <[email protected]> Co-authored-by: Renee Li <[email protected]>
fivetran · Sep 4, 2024 · 0b73c19 · 0b73c19
1 parent 66d7b7c
commit 0b73c19
Showing 40 changed files with 540 additions and 157 deletions.
diff --git a/.buildkite/pipeline.yml b/.buildkite/pipeline.yml
@@ -58,7 +58,7 @@ steps:
     commands: |
       bash .buildkite/scripts/run_models.sh redshift
 
-  - label: ":bricks: Run Tests - Databricks"
+  - label: ":databricks: Run Tests - Databricks"
     key: "run_dbt_databricks"
     plugins:
       - docker#v3.13.0:

diff --git a/.buildkite/scripts/run_models.sh b/.buildkite/scripts/run_models.sh
@@ -19,7 +19,7 @@ dbt deps
 dbt seed --target "$db" --full-refresh
 dbt run --target "$db" --full-refresh
 dbt test --target "$db"
-dbt run --vars '{using_schedules: false, using_domain_names: false, using_user_tags: false, using_ticket_form_history: false, using_organization_tags: false}' --target "$db" --full-refresh
+dbt run --vars '{zendesk__unstructured_enabled: true, using_schedules: false, using_domain_names: false, using_user_tags: false, using_ticket_form_history: false, using_organization_tags: false}' --target "$db" --full-refresh
 dbt test --target "$db"
 
 dbt run-operation fivetran_utils.drop_schemas_automation --target "$db"
diff --git a/.github/ISSUE_TEMPLATE/bug-report.yml b/.github/ISSUE_TEMPLATE/bug-report.yml
@@ -1,7 +1,7 @@
 name: 🐞 Bug
 description: Report a bug or an issue you've found within the dbt package
 title: "[Bug] <title>"
-labels: ["bug", "triage"]
+labels: ["type:bug"]
 body:
   - type: markdown
     attributes:
@@ -35,6 +35,12 @@ body:
       description: A concise description of what you expected to happen.
     validations:
       required: true
+  - type: textarea
+    attributes:
+      label: Possible solution
+      description: Were you able to investigate and/or discover a potential fix to this bug in your investigation? If so, it would be much appreciated if you could submit code samples to show us how your fix resolved this issue. 
+    validations:
+      required: false
   - type: textarea
     attributes:
       label: dbt Project configurations
@@ -61,6 +67,19 @@ body:
         - other (mention it in "Additional Context")
     validations:
       required: true
+  - type: dropdown
+    id: orchestration_type
+    attributes:
+      label: How are you running this dbt package?
+      multiple: true
+      options:
+        - Fivetran Quickstart Data Model
+        - Fivetran Transformations
+        - dbt Core™
+        - dbt Cloud™
+        - other (mention it in "Additional Context")
+    validations:
+      required: true
   - type: textarea
     attributes:
       label: dbt Version
@@ -83,6 +102,6 @@ body:
       description: Our team will assess this issue and let you know if we will add it to a future sprint. However, if you would like to expedite the solution, we encourage you to contribute to the package via a PR. Our team will then work with you to approve and merge your contributions as soon as possible.
       options:
         - label: Yes.
-        - label: Yes, but I will need assistance and will schedule time during our [office hours](https://calendly.com/fivetran-solutions-team/fivetran-solutions-team-office-hours) for guidance
+        - label: Yes, but I will need assistance.
         - label: No.
           required: false
diff --git a/.github/ISSUE_TEMPLATE/feature-request.yml b/.github/ISSUE_TEMPLATE/feature-request.yml
@@ -1,7 +1,7 @@
 name: 🎉 Feature
 description: Suggest a new feature for the Fivetran dbt package
 title: "[Feature] <title>"
-labels: ["enhancement"]
+labels: ["type:enhancement"]
 body:
   - type: markdown
     attributes:
@@ -20,6 +20,13 @@ body:
       description: A clear and concise description of what you want to happen and why you want the new feature.
     validations:
       required: true
+  - type: textarea
+    attributes:
+      label: How would you implement this feature?
+      description: |
+        How would you build out this feature with your existing data? Any code examples you can provide to help accelerate development on this issue?
+    validations:
+      required: true
   - type: textarea
     attributes:
       label: Describe alternatives you've considered
@@ -34,7 +41,7 @@ body:
       description: Our team will assess this feature and let you know if we will add it to a future sprint. However, if you would like to expedite the feature, we encourage you to contribute to the package via a PR. Our team will then work with you to approve and merge your contributions as soon as possible.
       options:
         - label: Yes.
-        - label: Yes, but I will need assistance and will schedule time during your [office hours](https://calendly.com/fivetran-solutions-team/fivetran-solutions-team-office-hours) for guidance.
+        - label: Yes, but I will need assistance.
         - label: No.
           required: false
   - type: textarea

diff --git a/.github/PULL_REQUEST_TEMPLATE/maintainer_pull_request_template.md b/.github/PULL_REQUEST_TEMPLATE/maintainer_pull_request_template.md
@@ -16,7 +16,6 @@ Please acknowledge that you have successfully performed the following commands l
 Before marking this PR as "ready for review" the following have been applied:
 - [ ] The appropriate issue has been linked, tagged, and properly assigned
 - [ ] All necessary documentation and version upgrades have been applied
-    <!--- Be sure to update the package version in the dbt_project.yml, integration_tests/dbt_project.yml, and README if necessary. -->
 - [ ] docs were regenerated (unless this PR does not include any code or yml updates)
 - [ ] BuildKite integration tests are passing
 - [ ] Detailed validation steps have been provided below

diff --git a/.github/pull_request_template.md b/.github/pull_request_template.md
@@ -19,7 +19,13 @@
 <!--- To select a checkbox you simply need to add an "x" with no spaces between the brackets (eg. [x] Yes). -->
 - [ ] Yes
 
-**Provide an emoji that best describes your current mood**
+**Typically there are additional maintenance changes required before this will be ready for an upcoming release. Are you comfortable with the Fivetran team making a few commits directly to your branch?**
+<!--- If you select Yes this will help expedite your PR in case there are small changes required before approval. We encourage you not to use this branch in a production environment as we may make additional updates.  -->
+<!--- If you select No, we will not make any changes directly to your branch and will either communicate any planned changes via the PR thread or will merge your PR into a separate branch so we may make changes without modifying your branch.. -->
+- [ ] Yes
+- [ ] No
+
+**If you had to summarize this PR in an emoji, which would it be?**
 <!--- For a complete list of markdown compatible emojis check our this git repo (https://gist.github.com/rxaviers/7360908)  --> 
 :dancer:
 
@@ -30,4 +36,4 @@ We are so excited you decided to contribute to the Fivetran community dbt packag
 **PR Template** 
 - [Community Pull Request Template](?expand=1&template=pull_request_template.md) (default)
 
-- [Maintainer Pull Request Template](?expand=1&template=maintainer_pull_request_template.md) (to be used by maintainers)
+- [Maintainer Pull Request Template](?expand=1&template=maintainer_pull_request_template.md) (to be used by maintainers)
diff --git a/.quickstart/quickstart.yml b/.quickstart/quickstart.yml
@@ -23,3 +23,12 @@ destination_configurations:
     dispatch:
       - macro_namespace: dbt_utils
         search_order: [ 'spark_utils', 'dbt_utils' ]
+
+public_models: [
+  "zendesk__ticket_enriched",
+  "zendesk__ticket_metrics",
+  "zendesk__ticket_summary",
+  "zendesk__ticket_field_history",
+  "zendesk__sla_policies",
+  "zendesk__ticket_backlog"
+]
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -1,3 +1,42 @@
+# dbt_zendesk v0.17.0
+
+## New model ([#161](https://github.com/fivetran/dbt_zendesk/pull/161))
+- Addition of the `zendesk__document` model, designed to structure Zendesk textual data for vectorization and integration into NLP workflows. The model outputs a table with:
+  - `document_id`: Corresponding to the `ticket_id`
+  - `chunk_index`: For text segmentation
+  - `chunk`: The text chunk itself
+  - `chunk_tokens_approximate`: Approximate token count for each segment
+- This model is currently disabled by default. You may enable it by setting the `zendesk__unstructured_enabled` variable as `true` in your `dbt_project.yml`.
+  - This model was developed with the limit of chunk sizes to approximately 5000 tokens for use with OpenAI, however you can change this limit by setting the variable `zendesk_max_tokens` in your `dbt_project.yml`.
+  - See the README section [Enabling the unstructured document model for NLP](https://github.com/fivetran/dbt_zendesk/blob/main/README.md#enabling-the-unstructured-document-model-for-nlp) for more information.
+
+## Breaking Changes (Full refresh required after upgrading)
+- Incremental models running on BigQuery have had the `partition_by` logic adjusted to include a granularity of a month. This change only impacts BigQuery warehouses and was applied to avoid the common `too many partitions` error some users have experienced when partitioning by day. Therefore, adjusting the partition to a month granularity will decrease the number of partitions created and allow for more performant querying and incremental loads. This change was applied to the following models ([#165](https://github.com/fivetran/dbt_zendesk/pull/165)):
+  - `int_zendesk__field_calendar_spine`
+  - `int_zendesk__field_history_pivot`
+  - `zendesk__ticket_field_history`
+
+- In the [dbt_zendesk_source v0.12.0 release](https://github.com/fivetran/dbt_zendesk_source/releases/tag/v0.12.0), the field `_fivetran_deleted` was added to the following models for use in `zendesk__document` model ([#161](https://github.com/fivetran/dbt_zendesk/pull/161)):
+  - `stg_zendesk__ticket`
+  - `stg_zendesk__ticket_comment`
+  - `stg_zendesk__user`
+  - If you have already added `_fivetran_deleted` as a passthrough column via the `zendesk__ticket_passthrough_columns` or `zendesk__user_passthrough_columns` variable, you will need to remove or alias this field from the variable to avoid duplicate column errors.
+
+## Bug Fixes
+- Fixed an issue in the `zendesk__sla_policies` model where tickets that were opened and solved outside of scheduled hours were not being reported, specifically for the metrics `requester_wait_time` and `agent_work_time`. 
+  - Resolved by adjusting the join logic in models `int_zendesk__agent_work_time_business_hours` and `int_zendesk__requester_wait_time_business_hours`. ([#164](https://github.com/fivetran/dbt_zendesk/pull/164), [#156](https://github.com/fivetran/dbt_zendesk/pull/156))
+- Fixed an issue in the `zendesk__ticket_metrics` model where certain tickets had miscalculated metrics. 
+  - Resolved by adjusting the join logic in models `int_zendesk__ticket_work_time_business`, `int_zendesk__ticket_first_resolution_time_business`, and `int_zendesk__ticket_full_resolution_time_business`. ([#167](https://github.com/fivetran/dbt_zendesk/pull/167))
+
+## Under the hood
+- Added integrity validations:
+  - Test to ensure `zendesk__sla_policies` and `zendesk__ticket_metrics` models produce consistent time results. ([#164](https://github.com/fivetran/dbt_zendesk/pull/164))
+  - Test to ensure `zendesk__ticket_metrics` contains all the tickets found in `stg_zendesk__ticket`. ([#167](https://github.com/fivetran/dbt_zendesk/pull/167))
+- Modified the `consistency_sla_policy_count` validation test to group by `ticket_id` for more accurate testing. ([#165](https://github.com/fivetran/dbt_zendesk/pull/165))
+- Updated casting in joins from timestamps to dates so that the whole day is considered. This produces more accurate results. ([#164](https://github.com/fivetran/dbt_zendesk/pull/164), [#156](https://github.com/fivetran/dbt_zendesk/pull/156), [#167](https://github.com/fivetran/dbt_zendesk/pull/167))
+- Reduced the weeks looking ahead from 208 to 52 to improve performance, as tracking ticket SLAs beyond one year was unnecessary. ([#156](https://github.com/fivetran/dbt_zendesk/pull/156), [#167](https://github.com/fivetran/dbt_zendesk/pull/167))
+- Updated seed files to reflect a real world ticket field history update scenario. ([#165](https://github.com/fivetran/dbt_zendesk/pull/165))
+
 # dbt_zendesk v0.16.0
 ## 🚨 Minor Upgrade 🚨
 Although this update is not a breaking change, it will likely impact the output of the `zendesk__sla_policies` and `zendesk__sla_metrics` models. [PR #154](https://github.com/fivetran/dbt_zendesk/pull/154) includes the following changes:

diff --git a/README.md b/README.md
diff --git a/dbt_project.yml b/dbt_project.yml
@@ -1,5 +1,5 @@
 name: 'zendesk'
-version: '0.16.0'
+version: '0.17.0'
 
 
 config-version: 2
@@ -24,6 +24,9 @@ models:
     ticket_history:
       +schema: zendesk_intermediate
       +materialized: ephemeral
+    unstructured:
+      +schema: zendesk_unstructured
+      +materialized: table
     utils:
       +materialized: ephemeral
 vars:

diff --git a/docs/catalog.json b/docs/catalog.json
diff --git a/docs/index.html b/docs/index.html
diff --git a/docs/manifest.json b/docs/manifest.json
diff --git a/docs/run_results.json b/docs/run_results.json
diff --git a/integration_tests/dbt_project.yml b/integration_tests/dbt_project.yml
@@ -1,7 +1,7 @@
 config-version: 2
 
 name: 'zendesk_integration_tests'
-version: '0.16.0'
+version: '0.17.0'
 
 profile: 'integration_tests'
 
@@ -26,6 +26,9 @@ vars:
     zendesk_user_identifier:                   "user_data"
     zendesk_user_tag_identifier:               "user_tag_data"
 
+  ## Uncomment for docs generation
+  # zendesk__unstructured_enabled: True
+
   ## For validation testing. To be commented out before release.
   # zendesk_schema: zendesk_test_env
   # using_domain_names: false
@@ -35,7 +38,6 @@ vars:
   # fivetran_consistency_ticket_metrics_exclusion_tickets: (11092,11093,11094)
   # fivetran_integrity_sla_count_match_tickets: (76)
 
-
 models:
   +schema: "zendesk_{{ var('directed_schema','dev') }}"
 

diff --git a/integration_tests/seeds/ticket_data.csv b/integration_tests/seeds/ticket_data.csv
@@ -2,7 +2,7 @@ id,_fivetran_synced,allow_channelback,assignee_id,brand_id,created_at,descriptio
 1595,2020-03-20 02:32:49.426,FALSE,,360003529474,2020-02-19 01:54:52,description1,,,,360006965034,FALSE,TRUE,370295712714,,,email@email.com,396331237134,deleted,subject1,396331237134,,360002048693,incident,2020-02-19 01:55:11,https://zendesk.com/api/v2/tickets/1595.json,web,,,,example@email.com,,[],,,
 16988,2021-01-13 20:09:16.325,FALSE,418284131934,360003529474,2020-12-22 00:19:23,description1,,,,360013366274,FALSE,TRUE,370469077513,,,email@email.com,1500656884401,solved,subject1,1500656884401,,360002048693,,2021-01-13 18:42:39,https://zendesk.com/api/v2/tickets/16988.json,email,,,,example@email.com,Support,[],,[],
 14173,2020-11-11 20:08:45.130,FALSE,396371699653,360003529474,2020-10-28 12:03:02,description1,,,,360006965034,FALSE,TRUE,370321120273,,,email@email.com,424883466453,closed,subject1,424883466453,,360002048693,,2020-11-11 17:01:32,https://zendesk.com/api/v2/tickets/14173.json,email,,,,example@email.com,Support,[],,,
-11071,2020-10-02 14:08:33.216,FALSE,,360003529474,2020-08-28 18:06:36,description1,,,,,FALSE,TRUE,,,,email@email.com,419755385214,deleted,subject1,419755385214,,360002048693,,2020-09-02 11:01:27,https://zendesk.com/api/v2/tickets/11071.json,email,,,,X,Support,[],,,
+11071,2020-10-02 14:08:33.216,FALSE,1111,360003529474,2020-08-28 18:06:36,Ticket to test field history changes,,,,360006965034,FALSE,TRUE,,urgent,,email@email.com,2222,closed,subject1,2222,,360002048693,,2020-11-15 11:01:27,https://zendesk.com/api/v2/tickets/11071.json,email,,,,X,Support,[],,,
 1966,2020-03-25 20:32:23.617,FALSE,396315360434,360003529474,2020-02-27 06:05:08,description1,,,,360006965034,FALSE,TRUE,370295721514,,,email@email.com,402813302773,closed,subject1,402813302773,,360002048693,,2020-03-25 16:03:26,https://zendesk.com/api/v2/tickets/1966.json,email,,,,example@email.com,Support,[1967],,,
 11013,2020-10-02 20:08:20.449,FALSE,402851697393,360003529474,2020-08-27 23:09:52,description1,,,,360008376313,FALSE,TRUE,370297881854,,,email@email.com,419688934974,deleted,subject1,419688934974,,360002048693,,2020-09-02 15:53:16,https://zendesk.com/api/v2/tickets/11013.json,email,,,,X,Support,[],,,
 1404,2020-03-05 04:53:46.466,FALSE,396371699653,360003529474,2020-02-13 21:43:58,description1,,,,360006965034,FALSE,TRUE,370295709874,,,email@email.com,403125197514,closed,subject1,403125197514,,360002048693,,2020-02-28 01:01:57,https://zendesk.com/api/v2/tickets/1404.json,email,,,,example@email.com,Support,,,,

diff --git a/integration_tests/seeds/ticket_field_history_data.csv b/integration_tests/seeds/ticket_field_history_data.csv
@@ -1,8 +1,25 @@
 field_name,ticket_id,updated,_fivetran_synced,user_id,value
+status,11071,2020-08-28 11:01:27,2020-10-02 14:08:33.216,,open
+assignee_id,11071,2020-08-28 11:01:27,2020-03-11 14:32:23.872,,1111
+priority,11071,2020-08-28 11:01:27,2020-03-11 14:32:23.872,,normal
+status,11071,2020-08-29 11:01:27,2020-10-02 14:08:33.216,,pending
+status,11071,2020-09-02 11:01:27,2020-10-02 14:08:33.216,,on-hold
+status,11071,2020-09-05 11:01:27,2020-10-02 14:08:33.216,,pending
+status,11071,2020-09-15 11:01:27,2020-10-02 14:08:33.216,,open
+status,11071,2020-09-30 11:01:27,2020-10-02 14:08:33.216,,pending
+priority,11071,2020-09-30 11:01:27,2020-03-11 14:32:23.872,,medium
+status,11071,2020-10-01 11:01:27,2020-10-02 14:08:33.216,,on-hold
+status,11071,2020-10-15 11:01:27,2020-10-02 14:08:33.216,,open
+priority,11071,2020-10-15 11:01:27,2020-03-11 14:32:23.872,,high
+status,11071,2020-10-17 11:01:27,2020-10-02 14:08:33.216,,on-hold
+status,11071,2020-10-20 11:01:27,2020-10-02 14:08:33.216,,pending
+status,11071,2020-11-02 11:01:27,2020-10-02 14:08:33.216,,open
+priority,11071,2020-11-02 11:01:27,2020-03-11 14:32:23.872,,urgent
+status,11071,2020-11-14 11:01:27,2020-10-02 14:08:33.216,,solved
+status,11071,2020-11-15 11:01:27,2020-10-02 14:08:33.216,,closed
 status,6964,2020-06-01 21:11:59,2020-07-02 02:09:05.984,,solved
 status,974,2020-02-10 21:47:41,2020-03-12 02:32:23.808,,solved
 priority,980,2020-02-10 22:06:57,2020-03-12 02:32:23.808,,solved
-status,11071,2020-09-02 11:01:27,2020-10-02 14:08:33.216,,solved
 status,8205,2020-07-07 23:01:47,2020-08-07 02:09:08.192,,solved
 status,103,2020-02-10 08:36:38,2020-03-11 14:32:23.872,,solved
 assignee_id,108,2020-02-10 08:36:38,2020-03-11 14:32:23.872,,1111

diff --git a/integration_tests/seeds/user_data.csv b/integration_tests/seeds/user_data.csv
@@ -1,6 +1,6 @@
 id,_fivetran_synced,active,alias,authenticity_token,chat_only,created_at,details,email,external_id,last_login_at,locale,locale_id,moderator,name,notes,only_private_comments,organization_id,phone,remote_photo_url,restricted_agent,role,shared,shared_agent,signature,suspended,ticket_restriction,time_zone,two_factor_auth_enabled,updated_at,url,verified
-403958466973,2020-03-05 05:03:34.208,TRUE,,,FALSE,2020-02-26 22:55:12,,example@email.com,,2023-02-26 22:55:12,en-US,1,FALSE,John Doe,,FALSE,370297696174,,,TRUE,end-user,FALSE,FALSE,,FALSE,requested,Pacific Time (US & Canada),FALSE,2020-02-26 22:55:12,https://fivetran1813.zendesk.com/api/v2/users/403958466973.json,TRUE
-403969371634,2020-03-05 05:03:34.208,TRUE,,,FALSE,2020-02-26 22:41:37,,example@email.com,,2023-02-26 22:55:12,en-US,1,FALSE,John Doe,,FALSE,,,,TRUE,end-user,FALSE,FALSE,,FALSE,requested,Pacific Time (US & Canada),FALSE,2020-02-26 22:41:37,https://fivetran1813.zendesk.com/api/v2/users/403969371634.json,TRUE
+1111,2020-03-05 05:03:34.208,TRUE,,,FALSE,2020-02-26 22:55:12,,example@email.com,,2023-02-26 22:55:12,en-US,1,FALSE,Arthur Agent,,FALSE,370326203233,,,TRUE,agent,FALSE,FALSE,,FALSE,requested,Pacific Time (US & Canada),FALSE,2020-02-26 22:55:12,https://fivetran1813.zendesk.com/api/v2/users/403958466973.json,TRUE
+2222,2020-03-05 05:03:34.208,TRUE,,,FALSE,2020-02-26 22:41:37,,example@email.com,,2023-02-26 22:55:12,en-US,1,FALSE,Earnest End User,,FALSE,370297696174,,,TRUE,end-user,FALSE,FALSE,,FALSE,requested,Pacific Time (US & Canada),FALSE,2020-02-26 22:41:37,https://fivetran1813.zendesk.com/api/v2/users/403969371634.json,TRUE
 403957746773,2020-03-05 05:03:34.208,TRUE,,,FALSE,2020-02-26 22:35:14,,example@email.com,,2023-02-26 22:55:12,en-US,1,FALSE,John Doe,,FALSE,370319191913,,,TRUE,end-user,FALSE,FALSE,,FALSE,requested,Pacific Time (US & Canada),FALSE,2020-02-26 22:35:15,https://fivetran1813.zendesk.com/api/v2/users/403957746773.json,TRUE
 403970285734,2020-03-05 05:03:34.208,TRUE,,,FALSE,2020-02-26 23:07:41,,example@email.com,,2023-02-26 22:55:12,en-US,1,FALSE,John Doe,,FALSE,370326203233,,,TRUE,end-user,FALSE,FALSE,,FALSE,requested,Pacific Time (US & Canada),FALSE,2020-02-26 23:07:41,https://fivetran1813.zendesk.com/api/v2/users/403970285734.json,TRUE
 403969943274,2020-03-05 05:03:34.208,TRUE,,,FALSE,2020-02-26 22:57:51,,example@email.com,,2023-02-26 22:55:12,en-US,1,FALSE,John Doe,,FALSE,370326203233,,,TRUE,end-user,FALSE,FALSE,,FALSE,requested,Pacific Time (US & Canada),FALSE,2020-02-26 22:57:52,https://fivetran1813.zendesk.com/api/v2/users/403969943274.json,TRUE

diff --git a/integration_tests/tests/consistency/consistency_sla_policies.sql b/integration_tests/tests/consistency/consistency_sla_policies.sql
@@ -5,43 +5,63 @@
 ) }}
 
 with prod as (
-    select
+    select 
         ticket_id,
-        metric, 
+        sla_policy_name,
+        metric,
         sla_applied_at,
-        sla_elapsed_time,
+        target,
+        in_business_hours,
+        sla_breach_at,
+        round(sla_elapsed_time, -1) as sla_elapsed_time, --round to the nearest tens
+        is_active_sla,
         is_sla_breach
     from {{ target.schema }}_zendesk_prod.zendesk__sla_policies
 ),
 
 dev as (
     select
         ticket_id,
-        metric, 
+        sla_policy_name,
+        metric,
         sla_applied_at,
-        sla_elapsed_time,
+        target,
+        in_business_hours,
+        sla_breach_at,
+        round(sla_elapsed_time, -1) as sla_elapsed_time, --round to the nearest tens
+        is_active_sla,
         is_sla_breach
     from {{ target.schema }}_zendesk_dev.zendesk__sla_policies
 ),
 
+prod_not_in_dev as (
+    -- rows from prod not found in dev
+    select * from prod
+    except distinct
+    select * from dev
+),
+
+dev_not_in_prod as (
+    -- rows from dev not found in prod
+    select * from dev
+    except distinct
+    select * from prod
+),
+
 final as (
-    select 
-        prod.ticket_id,
-        prod.metric,
-        prod.sla_applied_at,
-        prod.sla_elapsed_time as prod_sla_elapsed_time,
-        dev.sla_elapsed_time as dev_sla_elapsed_time,
-        prod.is_sla_breach as prod_is_sla_breach,
-        dev.is_sla_breach as dev_is_sla_breach
-    from prod
-    full outer join dev 
-        on dev.ticket_id = prod.ticket_id
-            and dev.metric = prod.metric
-            and dev.sla_applied_at = prod.sla_applied_at
+    select
+        *,
+        'from prod' as source
+    from prod_not_in_dev
+
+    union all -- union since we only care if rows are produced
+
+    select
+        *,
+        'from dev' as source
+    from dev_not_in_prod
 )
 
 select *
 from final
-where (abs(prod_sla_elapsed_time - dev_sla_elapsed_time) >= 5
-    or prod_is_sla_breach != dev_is_sla_breach)
-    {{ "and prod.ticket_id not in " ~ var('fivetran_consistency_sla_policies_exclusion_tickets',[]) ~ "" if var('fivetran_consistency_sla_policies_exclusion_tickets',[]) }}
+{{ "where ticket_id not in " ~ var('fivetran_consistency_sla_policies_exclusion_tickets',[]) ~ "" if var('fivetran_consistency_sla_policies_exclusion_tickets',[]) }}
diff --git a/integration_tests/tests/consistency/consistency_sla_policy_count.sql b/integration_tests/tests/consistency/consistency_sla_policy_count.sql
@@ -6,30 +6,33 @@
 
 with prod as (
     select
-        1 as join_key,
+        ticket_id,
         count(*) as total_slas
     from {{ target.schema }}_zendesk_prod.zendesk__sla_policies
+    {{ "where ticket_id not in " ~ var('fivetran_consistency_sla_policy_count_exclusion_tickets',[]) ~ "" if var('fivetran_consistency_sla_policy_count_exclusion_tickets',[]) }}
     group by 1
 ),
 
 dev as (
     select
-        1 as join_key,
+        ticket_id,
         count(*) as total_slas
     from {{ target.schema }}_zendesk_dev.zendesk__sla_policies
+    {{ "where ticket_id not in " ~ var('fivetran_consistency_sla_policy_count_exclusion_tickets',[]) ~ "" if var('fivetran_consistency_sla_policy_count_exclusion_tickets',[]) }}
     group by 1
 ),
 
 final as (
     select 
-        prod.join_key,
+        prod.ticket_id as prod_ticket_id,
+        dev.ticket_id as dev_ticket_id,
         prod.total_slas as prod_sla_total,
         dev.total_slas as dev_sla_total
     from prod
     full outer join dev 
-        on dev.join_key = prod.join_key
+        on dev.ticket_id = prod.ticket_id
 )
 
 select *
 from final
-where prod_sla_total != dev_sla_total
+where prod_sla_total != dev_sla_total
diff --git a/integration_tests/tests/integrity/metrics_count_match.sql b/integration_tests/tests/integrity/metrics_count_match.sql
@@ -0,0 +1,24 @@
+
+{{ config(
+    tags="fivetran_validations",
+    enabled=var('fivetran_validation_tests_enabled', false)
+) }}
+
+-- check that all the tickets are accounted for in the metrics
+with stg_count as (
+    select
+        count(*) as stg_ticket_count
+    from {{ ref('stg_zendesk__ticket') }}
+),
+
+metric_count as (
+    select
+        count(*) as metric_ticket_count
+    from source
+    from {{ ref('zendesk__ticket_metrics') }}
+)
+
+select *
+from stg_count
+join metric_count
+    on stg_ticket_count != metric_ticket_count
diff --git a/integration_tests/tests/integrity/sla_metrics_parity.sql b/integration_tests/tests/integrity/sla_metrics_parity.sql
@@ -0,0 +1,36 @@
+{{ config(
+    tags="fivetran_validations",
+    enabled=var('fivetran_validation_tests_enabled', false)
+) }}
+
+/*
+This test is to ensure the sla_elapsed_time from zendesk__sla_policies matches the corresponding time in zendesk__ticket_metrics.
+*/
+
+with dev_slas as (
+    select *
+    from {{ target.schema }}_zendesk_dev.zendesk__sla_policies
+    where in_business_hours
+
+), dev_metrics as (
+    select *
+    from {{ target.schema }}_zendesk_dev.zendesk__ticket_metrics
+
+), dev_compare as (
+    select 
+        dev_slas.ticket_id,
+        dev_slas.metric,
+        cast(dev_slas.sla_elapsed_time as {{ dbt.type_int() }}) as time_from_slas,
+        case when metric = 'agent_work_time' then dev_metrics.agent_work_time_in_business_minutes
+            when metric = 'requester_wait_time' then dev_metrics.requester_wait_time_in_business_minutes
+            when metric = 'first_reply_time' then dev_metrics.first_reply_time_business_minutes
+        end as time_from_metrics
+    from dev_slas
+    left join dev_metrics
+        on dev_metrics.ticket_id = dev_slas.ticket_id
+)
+
+select *
+from dev_compare
+where abs(time_from_slas - time_from_metrics) >= 5
+{{ "and ticket_id not in " ~ var('fivetran_integrity_sla_metric_parity_exclusion_tickets',[]) ~ "" if var('fivetran_integrity_sla_metric_parity_exclusion_tickets',[]) }}
diff --git a/macros/coalesce_cast.sql b/macros/coalesce_cast.sql
@@ -0,0 +1,12 @@
+{% macro coalesce_cast(column_list, datatype) -%}
+  {{ return(adapter.dispatch('coalesce_cast', 'zendesk')(column_list, datatype)) }}
+{%- endmacro %}
+
+{% macro default__coalesce_cast(column_list, datatype) %}
+  coalesce(
+    {%- for column in column_list %}
+      cast({{ column }} as {{ datatype }})
+      {%- if not loop.last -%},{%- endif -%}
+    {% endfor %}
+  )
+{% endmacro %}
diff --git a/macros/count_tokens.sql b/macros/count_tokens.sql
@@ -0,0 +1,7 @@
+{% macro count_tokens(column_name) -%}
+  {{ return(adapter.dispatch('count_tokens', 'zendesk')(column_name)) }}
+{%- endmacro %}
+
+{% macro default__count_tokens(column_name) %}
+  {{ dbt.length(column_name) }} / 4 -- 1 token is approximately 4 characters, and we only need an approximation here.
+{% endmacro %}
diff --git a/models/agent_work_time/int_zendesk__ticket_work_time_business.sql b/models/agent_work_time/int_zendesk__ticket_work_time_business.sql
@@ -63,7 +63,7 @@ with ticket_historical_status as (
 
 ), weeks as (
 
-    {{ dbt_utils.generate_series(208) }}
+    {{ dbt_utils.generate_series(52) }}
 
 ), weeks_cross_ticket_full_solved_time as (
     -- because time is reported in minutes since the beginning of the week, we have to split up time spent on the ticket into calendar weeks
@@ -99,14 +99,14 @@ with ticket_historical_status as (
       schedule.end_time_utc as schedule_end_time,
       least(ticket_week_end_time, schedule.end_time_utc) - greatest(weekly_periods.ticket_week_start_time, schedule.start_time_utc) as scheduled_minutes
     from weekly_periods
-    join schedule on 
-      ticket_week_start_time <= schedule.end_time_utc 
+    join schedule
+      on ticket_week_start_time <= schedule.end_time_utc 
       and ticket_week_end_time >= schedule.start_time_utc
       and weekly_periods.schedule_id = schedule.schedule_id
       -- this chooses the Daylight Savings Time or Standard Time version of the schedule
       -- We have everything calculated within a week, so take us to the appropriate week first by adding the week_number * minutes-in-a-week to the minute-mark where we start and stop counting for the week
-      and cast( {{ dbt.dateadd(datepart='minute', interval='week_number * (7*24*60) + ticket_week_end_time', from_date_or_timestamp='start_week_date') }} as {{ dbt.type_timestamp() }}) > cast(schedule.valid_from as {{ dbt.type_timestamp() }})
-      and cast( {{ dbt.dateadd(datepart='minute', interval='week_number * (7*24*60) + ticket_week_start_time', from_date_or_timestamp='start_week_date') }} as {{ dbt.type_timestamp() }}) < cast(schedule.valid_until as {{ dbt.type_timestamp() }})
+      and cast( {{ dbt.dateadd(datepart='minute', interval='week_number * (7*24*60) + ticket_week_end_time', from_date_or_timestamp='start_week_date') }} as date) > cast(schedule.valid_from as date)
+      and cast( {{ dbt.dateadd(datepart='minute', interval='week_number * (7*24*60) + ticket_week_start_time', from_date_or_timestamp='start_week_date') }} as date) < cast(schedule.valid_until as date)
 
 ), business_minutes as (
 

diff --git a/models/reply_times/int_zendesk__ticket_first_reply_time_business.sql b/models/reply_times/int_zendesk__ticket_first_reply_time_business.sql
@@ -57,7 +57,7 @@ with ticket_reply_times as (
 
 ), weeks as (
 
-    {{ dbt_utils.generate_series(208) }}
+    {{ dbt_utils.generate_series(52) }}
 
 ), weeks_cross_ticket_first_reply as (
     -- because time is reported in minutes since the beginning of the week, we have to split up time spent on the ticket into calendar weeks
@@ -102,6 +102,6 @@ with ticket_reply_times as (
 )
 
   select ticket_id,
-         sum(scheduled_minutes) as first_reply_time_business_minutes
+        sum(scheduled_minutes) as first_reply_time_business_minutes
   from intercepted_periods
   group by 1
diff --git a/models/resolution_times/int_zendesk__ticket_first_resolution_time_business.sql b/models/resolution_times/int_zendesk__ticket_first_resolution_time_business.sql
@@ -46,7 +46,7 @@ with ticket_resolution_times_calendar as (
 
 ), weeks as (
 
-    {{ dbt_utils.generate_series(208) }}
+    {{ dbt_utils.generate_series(52) }}
 
 ), weeks_cross_ticket_first_resolution_time as (
     -- because time is reported in minutes since the beginning of the week, we have to split up time spent on the ticket into calendar weeks
@@ -72,22 +72,24 @@ with ticket_resolution_times_calendar as (
 
 ), intercepted_periods as (
 
-  select ticket_id,
-         week_number,
-         weekly_periods.schedule_id,
-         ticket_week_start_time,
-         ticket_week_end_time,
-         schedule.start_time_utc as schedule_start_time,
-         schedule.end_time_utc as schedule_end_time,
-         least(ticket_week_end_time, schedule.end_time_utc) - greatest(ticket_week_start_time, schedule.start_time_utc) as scheduled_minutes
+  select 
+    ticket_id,
+    week_number,
+    weekly_periods.schedule_id,
+    ticket_week_start_time,
+    ticket_week_end_time,
+    schedule.start_time_utc as schedule_start_time,
+    schedule.end_time_utc as schedule_end_time,
+    least(ticket_week_end_time, schedule.end_time_utc) - greatest(ticket_week_start_time, schedule.start_time_utc) as scheduled_minutes
   from weekly_periods
-  join schedule on ticket_week_start_time <= schedule.end_time_utc 
+  join schedule
+    on ticket_week_start_time <= schedule.end_time_utc 
     and ticket_week_end_time >= schedule.start_time_utc
     and weekly_periods.schedule_id = schedule.schedule_id
     -- this chooses the Daylight Savings Time or Standard Time version of the schedule
     -- We have everything calculated within a week, so take us to the appropriate week first by adding the week_number * minutes-in-a-week to the minute-mark where we start and stop counting for the week
-    and cast( {{ dbt.dateadd(datepart='minute', interval='week_number * (7*24*60) + ticket_week_end_time', from_date_or_timestamp='start_week_date') }} as {{ dbt.type_timestamp() }}) > cast(schedule.valid_from as {{ dbt.type_timestamp() }})
-    and cast( {{ dbt.dateadd(datepart='minute', interval='week_number * (7*24*60) + ticket_week_start_time', from_date_or_timestamp='start_week_date') }} as {{ dbt.type_timestamp() }}) < cast(schedule.valid_until as {{ dbt.type_timestamp() }})
+    and cast( {{ dbt.dateadd(datepart='minute', interval='week_number * (7*24*60) + ticket_week_end_time', from_date_or_timestamp='start_week_date') }} as date) > cast(schedule.valid_from as date)
+    and cast( {{ dbt.dateadd(datepart='minute', interval='week_number * (7*24*60) + ticket_week_start_time', from_date_or_timestamp='start_week_date') }} as date) < cast(schedule.valid_until as date)
 
 )
 

diff --git a/models/resolution_times/int_zendesk__ticket_full_resolution_time_business.sql b/models/resolution_times/int_zendesk__ticket_full_resolution_time_business.sql
@@ -45,7 +45,7 @@ with ticket_resolution_times_calendar as (
 
 ), weeks as (
 
-    {{ dbt_utils.generate_series(208) }}
+    {{ dbt_utils.generate_series(52) }}
 
 ), weeks_cross_ticket_full_resolution_time as (
     -- because time is reported in minutes since the beginning of the week, we have to split up time spent on the ticket into calendar weeks
@@ -85,8 +85,8 @@ with ticket_resolution_times_calendar as (
     and weekly_periods.schedule_id = schedule.schedule_id
     -- this chooses the Daylight Savings Time or Standard Time version of the schedule
     -- We have everything calculated within a week, so take us to the appropriate week first by adding the week_number * minutes-in-a-week to the minute-mark where we start and stop counting for the week
-    and cast( {{ dbt.dateadd(datepart='minute', interval='week_number * (7*24*60) + ticket_week_end_time', from_date_or_timestamp='start_week_date') }} as {{ dbt.type_timestamp() }}) > cast(schedule.valid_from as {{ dbt.type_timestamp() }})
-    and cast( {{ dbt.dateadd(datepart='minute', interval='week_number * (7*24*60) + ticket_week_start_time', from_date_or_timestamp='start_week_date') }} as {{ dbt.type_timestamp() }}) < cast(schedule.valid_until as {{ dbt.type_timestamp() }})
+    and cast( {{ dbt.dateadd(datepart='minute', interval='week_number * (7*24*60) + ticket_week_end_time', from_date_or_timestamp='start_week_date') }} as date) > cast(schedule.valid_from as date)
+    and cast( {{ dbt.dateadd(datepart='minute', interval='week_number * (7*24*60) + ticket_week_start_time', from_date_or_timestamp='start_week_date') }} as date) < cast(schedule.valid_until as date)
 
 )
 

diff --git a/models/sla_policy/agent_work_time/int_zendesk__agent_work_time_business_hours.sql b/models/sla_policy/agent_work_time/int_zendesk__agent_work_time_business_hours.sql
@@ -75,7 +75,7 @@ with agent_work_time_filtered_statuses as (
 
 ), weeks as (
 
-    {{ dbt_utils.generate_series(208) }}
+    {{ dbt_utils.generate_series(52) }}
 
 ), weeks_cross_ticket_full_solved_time as (
     -- because time is reported in minutes since the beginning of the week, we have to split up time spent on the ticket into calendar weeks
@@ -120,17 +120,21 @@ with agent_work_time_filtered_statuses as (
       weekly_period_agent_work_time.week_number,
       weekly_period_agent_work_time.ticket_week_start_time_minute,
       weekly_period_agent_work_time.ticket_week_end_time_minute,
-      schedule.start_time_utc as schedule_start_time,
+      coalesce(schedule.start_time_utc, 0) as schedule_start_time, -- fill 0 for schedules completely outside schedule window. Only necessary for this field for use downstream.
       schedule.end_time_utc as schedule_end_time,
-      least(ticket_week_end_time_minute, schedule.end_time_utc) - greatest(weekly_period_agent_work_time.ticket_week_start_time_minute, schedule.start_time_utc) as scheduled_minutes
+      coalesce(
+        least(ticket_week_end_time_minute, schedule.end_time_utc)
+        - greatest(weekly_period_agent_work_time.ticket_week_start_time_minute, schedule.start_time_utc),
+        0) as scheduled_minutes -- fill 0 for schedules completely outside schedule window. Only necessary for this field for use downstream.
     from weekly_period_agent_work_time
-    join schedule on ticket_week_start_time_minute <= schedule.end_time_utc 
+    left join schedule -- using a left join to account for tickets started and completed entirely outside of a schedule, otherwise they are filtered out
+      on ticket_week_start_time_minute <= schedule.end_time_utc 
       and ticket_week_end_time_minute >= schedule.start_time_utc
       and weekly_period_agent_work_time.schedule_id = schedule.schedule_id
       -- this chooses the Daylight Savings Time or Standard Time version of the schedule
       -- We have everything calculated within a week, so take us to the appropriate week first by adding the week_number * minutes-in-a-week to the minute-mark where we start and stop counting for the week
-      and cast( {{ dbt.dateadd(datepart='minute', interval='week_number * (7*24*60) + ticket_week_end_time_minute', from_date_or_timestamp='start_week_date') }} as {{ dbt.type_timestamp() }}) > cast(schedule.valid_from as {{ dbt.type_timestamp() }})
-      and cast( {{ dbt.dateadd(datepart='minute', interval='week_number * (7*24*60) + ticket_week_start_time_minute', from_date_or_timestamp='start_week_date') }} as {{ dbt.type_timestamp() }}) < cast(schedule.valid_until as {{ dbt.type_timestamp() }})
+      and cast( {{ dbt.dateadd(datepart='minute', interval='week_number * (7*24*60) + ticket_week_end_time_minute', from_date_or_timestamp='start_week_date') }} as date) > cast(schedule.valid_from as date)
+      and cast( {{ dbt.dateadd(datepart='minute', interval='week_number * (7*24*60) + ticket_week_start_time_minute', from_date_or_timestamp='start_week_date') }} as date) < cast(schedule.valid_until as date)
 
 ), intercepted_periods_with_running_total as (
 
@@ -152,7 +156,7 @@ with agent_work_time_filtered_statuses as (
     lag(target - running_total_scheduled_minutes) over
           (partition by ticket_id, sla_applied_at order by valid_starting_at, week_number, schedule_end_time) as lag_check,
     case when (target - running_total_scheduled_minutes) = 0 then true
-       when (target - running_total_scheduled_minutes) < 0 
+      when (target - running_total_scheduled_minutes) < 0 
         and 
           (lag(target - running_total_scheduled_minutes) over
           (partition by ticket_id, sla_applied_at order by valid_starting_at, week_number, schedule_end_time) > 0 

diff --git a/models/sla_policy/requester_wait_time/int_zendesk__requester_wait_time_business_hours.sql b/models/sla_policy/requester_wait_time/int_zendesk__requester_wait_time_business_hours.sql
@@ -75,7 +75,7 @@ with requester_wait_time_filtered_statuses as (
 
 ), weeks as (
 
-    {{ dbt_utils.generate_series(208) }}
+    {{ dbt_utils.generate_series(52) }}
 
 ), weeks_cross_ticket_full_solved_time as (
     -- because time is reported in minutes since the beginning of the week, we have to split up time spent on the ticket into calendar weeks
@@ -120,18 +120,22 @@ with requester_wait_time_filtered_statuses as (
       weekly_period_requester_wait_time.week_number,
       weekly_period_requester_wait_time.ticket_week_start_time_minute,
       weekly_period_requester_wait_time.ticket_week_end_time_minute,
-      schedule.start_time_utc as schedule_start_time,
+      coalesce(schedule.start_time_utc, 0) as schedule_start_time,  -- fill 0 for schedules completely outside schedule window. Only necessary for this field for use downstream.
       schedule.end_time_utc as schedule_end_time,
-      least(ticket_week_end_time_minute, schedule.end_time_utc) - greatest(weekly_period_requester_wait_time.ticket_week_start_time_minute, schedule.start_time_utc) as scheduled_minutes
+      coalesce(
+        least(ticket_week_end_time_minute, schedule.end_time_utc)
+        - greatest(weekly_period_requester_wait_time.ticket_week_start_time_minute, schedule.start_time_utc),
+        0) as scheduled_minutes --- fill 0 for schedules completely outside schedule window. Only necessary for this field for use downstream.
     from weekly_period_requester_wait_time
-    join schedule on ticket_week_start_time_minute <= schedule.end_time_utc 
+    left join schedule -- using a left join to account for tickets started and completed entirely outside of a schedule, otherwise they are filtered out
+      on ticket_week_start_time_minute <= schedule.end_time_utc 
       and ticket_week_end_time_minute >= schedule.start_time_utc
       and weekly_period_requester_wait_time.schedule_id = schedule.schedule_id
       -- this chooses the Daylight Savings Time or Standard Time version of the schedule
       -- We have everything calculated within a week, so take us to the appropriate week first by adding the week_number * minutes-in-a-week to the minute-mark where we start and stop counting for the week
-      and cast( {{ dbt.dateadd(datepart='minute', interval='week_number * (7*24*60) + ticket_week_end_time_minute', from_date_or_timestamp='start_week_date') }} as {{ dbt.type_timestamp() }}) > cast(schedule.valid_from as {{ dbt.type_timestamp() }})
-      and cast( {{ dbt.dateadd(datepart='minute', interval='week_number * (7*24*60) + ticket_week_start_time_minute', from_date_or_timestamp='start_week_date') }} as {{ dbt.type_timestamp() }}) < cast(schedule.valid_until as {{ dbt.type_timestamp() }})
-  
+      and cast( {{ dbt.dateadd(datepart='minute', interval='week_number * (7*24*60) + ticket_week_end_time_minute', from_date_or_timestamp='start_week_date') }} as date) > cast(schedule.valid_from as date)
+      and cast( {{ dbt.dateadd(datepart='minute', interval='week_number * (7*24*60) + ticket_week_start_time_minute', from_date_or_timestamp='start_week_date') }} as date) < cast(schedule.valid_until as date)
+
 ), intercepted_periods_with_running_total as (
 
     select 
@@ -152,7 +156,7 @@ with requester_wait_time_filtered_statuses as (
     lag(target - running_total_scheduled_minutes) over
           (partition by ticket_id, sla_applied_at order by valid_starting_at, week_number, schedule_end_time) as lag_check,
     case when (target - running_total_scheduled_minutes) = 0 then true
-       when (target - running_total_scheduled_minutes) < 0 
+      when (target - running_total_scheduled_minutes) < 0 
         and 
           (lag(target - running_total_scheduled_minutes) over
           (partition by ticket_id, sla_applied_at order by valid_starting_at, week_number, schedule_end_time) > 0 

diff --git a/models/ticket_history/int_zendesk__field_calendar_spine.sql b/models/ticket_history/int_zendesk__field_calendar_spine.sql
@@ -1,7 +1,7 @@
 {{
     config(
         materialized='incremental',
-        partition_by = {'field': 'date_day', 'data_type': 'date'} if target.type not in ['spark', 'databricks'] else ['date_day'],
+        partition_by = {'field': 'date_day', 'data_type': 'date', 'granularity': 'month'} if target.type not in ['spark', 'databricks'] else ['date_day'],
         unique_key='ticket_day_id',
         incremental_strategy = 'merge' if target.type not in ('snowflake', 'postgres', 'redshift') else 'delete+insert',
         file_format='delta'

diff --git a/models/ticket_history/int_zendesk__field_history_pivot.sql b/models/ticket_history/int_zendesk__field_history_pivot.sql
@@ -3,7 +3,7 @@
 {{ 
     config(
         materialized='incremental',
-        partition_by = {'field': 'date_day', 'data_type': 'date'} if target.type not in ['spark', 'databricks'] else ['date_day'],
+        partition_by = {'field': 'date_day', 'data_type': 'date', 'granularity': 'month'} if target.type not in ['spark', 'databricks'] else ['date_day'],
         unique_key='ticket_day_id',
         incremental_strategy = 'merge' if target.type not in ('snowflake', 'postgres', 'redshift') else 'delete+insert',
         file_format='delta'

diff --git a/models/unstructured/intermediate/int_zendesk__ticket_comment_document.sql b/models/unstructured/intermediate/int_zendesk__ticket_comment_document.sql
@@ -0,0 +1,60 @@
+{{ config(enabled=var('zendesk__unstructured_enabled', False)) }}
+
+with ticket_comments as (
+    select *
+    from {{ var('ticket_comment') }}
+
+), users as (
+    select *
+    from {{ var('user') }}
+
+), comment_details as (
+    select 
+        ticket_comments.ticket_comment_id,
+        ticket_comments.ticket_id,
+        {{ zendesk.coalesce_cast(["users.email", "'UNKNOWN'"], dbt.type_string()) }} as commenter_email,
+        {{ zendesk.coalesce_cast(["users.name", "'UNKNOWN'"], dbt.type_string()) }} as commenter_name,
+        ticket_comments.created_at as comment_time,
+        ticket_comments.body as comment_body
+    from ticket_comments
+    left join users
+        on ticket_comments.user_id = users.user_id
+    where not coalesce(ticket_comments._fivetran_deleted, False)
+        and not coalesce(users._fivetran_deleted, False)
+
+), comment_markdowns as (
+    select
+        ticket_comment_id,
+        ticket_id,
+        comment_time,
+        cast(
+            {{ dbt.concat([
+                "'### message from '", "commenter_name", "' ('", "commenter_email", "')\\n'",
+                "'##### sent @ '", "comment_time", "'\\n'",
+                "comment_body"
+            ]) }} as {{ dbt.type_string() }})
+            as comment_markdown
+    from comment_details
+
+), comments_tokens as (
+    select
+        *,
+        {{ zendesk.count_tokens("comment_markdown") }} as comment_tokens
+    from comment_markdowns
+
+), truncated_comments as (
+    select
+        ticket_comment_id,
+        ticket_id,
+        comment_time,
+        case when comment_tokens > {{ var('zendesk_max_tokens', 5000) }} then left(comment_markdown, {{ var('zendesk_max_tokens', 5000) }} * 4)  -- approximate 4 characters per token
+            else comment_markdown
+            end as comment_markdown,
+        case when comment_tokens > {{ var('zendesk_max_tokens', 5000) }} then {{ var('zendesk_max_tokens', 5000) }}
+            else comment_tokens
+            end as comment_tokens
+    from comments_tokens
+)
+
+select *
+from truncated_comments
diff --git a/models/unstructured/intermediate/int_zendesk__ticket_comment_documents_grouped.sql b/models/unstructured/intermediate/int_zendesk__ticket_comment_documents_grouped.sql
@@ -0,0 +1,32 @@
+{{ config(enabled=var('zendesk__unstructured_enabled', False)) }}
+
+with filtered_comment_documents as (
+  select *
+  from {{ ref('int_zendesk__ticket_comment_document') }}
+),
+
+grouped_comment_documents as (
+  select 
+    ticket_id,
+    comment_markdown,
+    comment_tokens,
+    comment_time,
+    sum(comment_tokens) over (
+      partition by ticket_id 
+      order by comment_time
+      rows between unbounded preceding and current row
+    ) as cumulative_length
+  from filtered_comment_documents
+)
+
+select 
+  ticket_id,
+  cast({{ dbt_utils.safe_divide('floor(cumulative_length - 1)', var('zendesk_max_tokens', 5000)) }} as {{ dbt.type_int() }}) as chunk_index,
+  {{ dbt.listagg(
+    measure="comment_markdown",
+    delimiter_text="'\\n\\n---\\n\\n'",
+    order_by_clause="order by comment_time"
+    ) }} as comments_group_markdown,
+  sum(comment_tokens) as chunk_tokens
+from grouped_comment_documents
+group by 1,2
diff --git a/models/unstructured/intermediate/int_zendesk__ticket_document.sql b/models/unstructured/intermediate/int_zendesk__ticket_document.sql
@@ -0,0 +1,42 @@
+{{ config(enabled=var('zendesk__unstructured_enabled', False)) }}
+
+with tickets as (
+    select *
+    from {{ var('ticket') }}
+
+), users as (
+    select *
+    from {{ var('user') }}
+
+), ticket_details as (
+    select
+        tickets.ticket_id,
+        tickets.subject AS ticket_name,
+        {{ zendesk.coalesce_cast(["users.name", "'UNKNOWN'"], dbt.type_string()) }} as user_name,
+        {{ zendesk.coalesce_cast(["users.email", "'UNKNOWN'"], dbt.type_string()) }} as created_by,
+        tickets.created_at AS created_on,
+        {{ zendesk.coalesce_cast(["tickets.status", "'UNKNOWN'"], dbt.type_string()) }} as status,
+        {{ zendesk.coalesce_cast(["tickets.priority", "'UNKNOWN'"], dbt.type_string()) }} as priority
+    from tickets
+    left join users
+        on tickets.requester_id = users.user_id
+    where not coalesce(tickets._fivetran_deleted, False)
+        and not coalesce(users._fivetran_deleted, False)
+
+), final as (
+    select
+        ticket_id,
+        {{ dbt.concat([
+            "'# Ticket : '", "ticket_name", "'\\n\\n'",
+            "'Created By : '", "user_name", "' ('", "created_by", "')\\n'",
+            "'Created On : '", "created_on", "'\\n'",
+            "'Status : '", "status", "'\\n'",
+            "'Priority : '", "priority"
+        ]) }} as ticket_markdown
+    from ticket_details
+)
+
+select 
+    *,
+    {{ zendesk.count_tokens("ticket_markdown") }} as ticket_tokens
+from final
diff --git a/models/unstructured/zendesk__document.sql b/models/unstructured/zendesk__document.sql
@@ -0,0 +1,27 @@
+{{ config(enabled=var('zendesk__unstructured_enabled', False)) }}
+
+with ticket_document as (
+    select *
+    from {{ ref('int_zendesk__ticket_document') }}
+
+), grouped as (
+    select *
+    from {{ ref('int_zendesk__ticket_comment_documents_grouped') }}
+
+), final as (
+    select
+        cast(ticket_document.ticket_id as {{ dbt.type_string() }}) as document_id,
+        grouped.chunk_index,
+        grouped.chunk_tokens as chunk_tokens_approximate,
+        {{ dbt.concat([
+            "ticket_document.ticket_markdown",
+            "'\\n\\n## COMMENTS\\n\\n'",
+            "grouped.comments_group_markdown"]) }}
+            as chunk
+    from ticket_document
+    join grouped
+        on grouped.ticket_id = ticket_document.ticket_id
+)
+
+select *
+from final
diff --git a/models/unstructured/zendesk_unstructured.yml b/models/unstructured/zendesk_unstructured.yml
@@ -0,0 +1,14 @@
+version: 2
+
+models:
+  - name: zendesk__document
+    description: Each record represents a Zendesk ticket, enriched with data about it's tags, assignees, requester, submitter, organization and group.  
+    columns:
+      - name: document_id
+        description: Equivalent to `ticket_id`.
+      - name: chunk_index
+        description: The index of the chunk associated with the `document_id`.
+      - name: chunk_tokens_approximate
+        description: Approximate number of tokens for the chunk, assuming 4 characters per token.
+      - name: chunk
+        description: The text of the chunk.
diff --git a/models/zendesk.yml b/models/zendesk.yml
@@ -171,6 +171,8 @@ models:
         description: Boolean indicating if the ticket had a satisfaction score went from good to bad.
       - name: is_bad_to_good_satisfaction_score
         description: Boolean indicating if the ticket had a satisfaction score went from bad to good.
+      - name: _fivetran_deleted
+        description: Boolean created by Fivetran to indicate whether the ticket has been deleted.
 
   - name: zendesk__sla_policies
     description: Each record represents an SLA policy event and additional sla breach and achievement metrics. Calendar and business hour SLA breaches for `first_reply_time`, `next_reply_time`, `requester_wait_time`, and `agent_work_time` are supported. If there is a SLA you would like supported that is not included, please create a feature request.
@@ -492,6 +494,8 @@ models:
         description: The time in minutes the ticket was in an unassigned state
       - name: last_status_assignment_date
         description: The time the status was last changed on the ticket
+      - name: _fivetran_deleted
+        description: Boolean created by Fivetran to indicate whether the ticket has been deleted.
 
   - name: zendesk__ticket_summary
     description: A single record table containing Zendesk ticket and user summary metrics. These metrics are updated for the current day the model is run.

diff --git a/models/zendesk__ticket_field_history.sql b/models/zendesk__ticket_field_history.sql
@@ -1,7 +1,7 @@
 {{ 
     config(
         materialized='incremental',
-        partition_by = {'field': 'date_day', 'data_type': 'date'} if target.type not in ['spark', 'databricks'] else ['date_day'],
+        partition_by = {'field': 'date_day', 'data_type': 'date', 'granularity': 'month' } if target.type not in ['spark', 'databricks'] else ['date_day'],
         unique_key='ticket_day_id',
         incremental_strategy = 'merge' if target.type not in ('snowflake', 'postgres', 'redshift') else 'delete+insert',
         file_format='delta'

diff --git a/packages.yml b/packages.yml
@@ -1,6 +1,5 @@
 packages:
   - package: fivetran/zendesk_source
-    version: [">=0.11.0", "<0.12.0"]
-
+    version: [">=0.12.0", "<0.13.0"]
   - package: calogica/dbt_date
     version: [">=0.9.0", "<1.0.0"]