From da759d38043be03923b00610a1d3d92634541095 Mon Sep 17 00:00:00 2001 From: harshpatel4_crest Date: Thu, 2 Sep 2021 18:50:31 +0530 Subject: [PATCH 01/36] make keys automatic --- tap_exacttarget/dao.py | 17 +- tap_exacttarget/endpoints/campaigns.py | 1 + tap_exacttarget/endpoints/content_areas.py | 2 + tap_exacttarget/endpoints/data_extensions.py | 44 +++- tap_exacttarget/endpoints/emails.py | 2 + tap_exacttarget/endpoints/events.py | 2 + tap_exacttarget/endpoints/folders.py | 2 + tap_exacttarget/endpoints/list_sends.py | 23 +-- tap_exacttarget/endpoints/list_subscribers.py | 2 + tap_exacttarget/endpoints/lists.py | 2 + tap_exacttarget/endpoints/sends.py | 2 + tap_exacttarget/endpoints/subscribers.py | 2 + tests/base.py | 191 ++++++++++++++++++ tests/test_exacttarget_base.py | 95 --------- tests/test_exacttarget_discover.py | 160 +++++++++------ 15 files changed, 353 insertions(+), 194 deletions(-) create mode 100644 tests/base.py delete mode 100644 tests/test_exacttarget_base.py diff --git a/tap_exacttarget/dao.py b/tap_exacttarget/dao.py index ec7224d..be2d557 100644 --- a/tap_exacttarget/dao.py +++ b/tap_exacttarget/dao.py @@ -28,16 +28,23 @@ def generate_catalog(self): cls = self.__class__ mdata = metadata.new() - metadata.write(mdata, (), 'inclusion', 'available') - for prop in cls.SCHEMA['properties']: # pylint:disable=unsubscriptable-object - metadata.write(mdata, ('properties', prop), 'inclusion', 'available') + mdata = metadata.get_standard_metadata( + schema=self.SCHEMA, + key_properties=self.KEY_PROPERTIES, + valid_replication_keys=self.REPLICATION_KEYS if self.REPLICATION_KEYS else None, + replication_method=self.REPLICATION_METHOD + ) + + mdata_map = metadata.to_map(mdata) + for replication_key in self.REPLICATION_KEYS: + mdata_map[('properties', replication_key)]['inclusion'] = 'automatic' return [{ 'tap_stream_id': cls.TABLE, 'stream': cls.TABLE, 'key_properties': cls.KEY_PROPERTIES, 'schema': cls.SCHEMA, - 'metadata': metadata.to_list(mdata) + 'metadata': metadata.to_list(mdata_map) }] def filter_keys_and_parse(self, obj): @@ -78,6 +85,8 @@ def sync(self): SCHEMA = None TABLE = None KEY_PROPERTIES = None + REPLICATION_KEYS = [] + REPLICATION_METHOD = None def sync_data(self): # pylint: disable=no-self-use raise RuntimeError('sync_data is not implemented!') diff --git a/tap_exacttarget/endpoints/campaigns.py b/tap_exacttarget/endpoints/campaigns.py index 6c3d264..d657dfa 100644 --- a/tap_exacttarget/endpoints/campaigns.py +++ b/tap_exacttarget/endpoints/campaigns.py @@ -36,6 +36,7 @@ class CampaignDataAccessObject(DataAccessObject): TABLE = 'campaign' KEY_PROPERTIES = ['id'] + REPLICATION_METHOD = 'FULL_TABLE' def sync_data(self): cursor = request( diff --git a/tap_exacttarget/endpoints/content_areas.py b/tap_exacttarget/endpoints/content_areas.py index e44f6a9..d7d9645 100644 --- a/tap_exacttarget/endpoints/content_areas.py +++ b/tap_exacttarget/endpoints/content_areas.py @@ -104,6 +104,8 @@ class ContentAreaDataAccessObject(DataAccessObject): TABLE = 'content_area' KEY_PROPERTIES = ['ID'] + REPLICATION_METHOD = 'INCREMENTAL' + REPLICATION_KEYS = ['ModifiedDate'] def sync_data(self): table = self.__class__.TABLE diff --git a/tap_exacttarget/endpoints/data_extensions.py b/tap_exacttarget/endpoints/data_extensions.py index 8479585..9537c25 100644 --- a/tap_exacttarget/endpoints/data_extensions.py +++ b/tap_exacttarget/endpoints/data_extensions.py @@ -79,11 +79,24 @@ def _get_extensions(self): } } }, - 'metadata': [{'breadcrumb': (), 'metadata': {'inclusion':'available'}}, - {'breadcrumb': ('properties', '_CustomObjectKey'), - 'metadata': {'inclusion':'available'}}, - {'breadcrumb': ('properties', 'CategoryID'), - 'metadata': {'inclusion':'available'}}] + 'metadata': [ + { + 'breadcrumb': (), + 'metadata': { + 'inclusion':'available', + 'forced-replication-method': 'FULL_TABLE', + "table-key-properties": [ + "_CustomObjectKey" + ] + } + }, + { + 'breadcrumb': ('properties', '_CustomObjectKey'), + 'metadata': {'inclusion':'automatic'}}, + { + 'breadcrumb': ('properties', 'CategoryID'), + 'metadata': {'inclusion':'available'}} + ] } return to_return @@ -97,11 +110,13 @@ def _get_fields(self, extensions): self.auth_stub) for field in result: + is_primary_key = False extension_id = field.DataExtension.CustomerKey field = sudsobj_to_dict(field) field_name = field['Name'] if field.get('IsPrimaryKey'): + is_primary_key = True to_return = _merge_in( to_return, [extension_id, 'key_properties'], @@ -120,12 +135,23 @@ def _get_fields(self, extensions): [extension_id, 'schema', 'properties', field_name], field_schema) + if is_primary_key: + for mdata in to_return[extension_id]['metadata']: + if not mdata.get('breadcrumb'): + mdata.get('metadata').get('table-key-properties').append(field_name) + # These fields are defaulted into the schema, do not add to metadata again. if field_name not in {'_CustomObjectKey', 'CategoryID'}: - to_return[extension_id]['metadata'].append({ - 'breadcrumb': ('properties', field_name), - 'metadata': {'inclusion': 'available'} - }) + if is_primary_key: + to_return[extension_id]['metadata'].append({ + 'breadcrumb': ('properties', field_name), + 'metadata': {'inclusion': 'automatic'} + }) + else: + to_return[extension_id]['metadata'].append({ + 'breadcrumb': ('properties', field_name), + 'metadata': {'inclusion': 'available'} + }) return to_return diff --git a/tap_exacttarget/endpoints/emails.py b/tap_exacttarget/endpoints/emails.py index 4c0c089..0131f86 100644 --- a/tap_exacttarget/endpoints/emails.py +++ b/tap_exacttarget/endpoints/emails.py @@ -107,6 +107,8 @@ class EmailDataAccessObject(DataAccessObject): TABLE = 'email' KEY_PROPERTIES = ['ID'] + REPLICATION_METHOD = 'INCREMENTAL' + REPLICATION_KEYS = ['ModifiedDate'] def parse_object(self, obj): to_return = obj.copy() diff --git a/tap_exacttarget/endpoints/events.py b/tap_exacttarget/endpoints/events.py index 8ed64d5..4d48983 100644 --- a/tap_exacttarget/endpoints/events.py +++ b/tap_exacttarget/endpoints/events.py @@ -45,6 +45,8 @@ class EventDataAccessObject(DataAccessObject): TABLE = 'event' KEY_PROPERTIES = ['SendID', 'EventType', 'SubscriberKey', 'EventDate'] + REPLICATION_METHOD = 'INCREMENTAL' + REPLICATION_KEYS = ['EventDate'] def sync_data(self): table = self.__class__.TABLE diff --git a/tap_exacttarget/endpoints/folders.py b/tap_exacttarget/endpoints/folders.py index e52247a..5682f76 100644 --- a/tap_exacttarget/endpoints/folders.py +++ b/tap_exacttarget/endpoints/folders.py @@ -52,6 +52,8 @@ class FolderDataAccessObject(DataAccessObject): TABLE = 'folder' KEY_PROPERTIES = ['ID'] + REPLICATION_METHOD = 'INCREMENTAL' + REPLICATION_KEYS = ['ModifiedDate'] def parse_object(self, obj): to_return = obj.copy() diff --git a/tap_exacttarget/endpoints/list_sends.py b/tap_exacttarget/endpoints/list_sends.py index 136697a..5578857 100644 --- a/tap_exacttarget/endpoints/list_sends.py +++ b/tap_exacttarget/endpoints/list_sends.py @@ -94,6 +94,7 @@ class ListSendDataAccessObject(DataAccessObject): TABLE = 'list_send' KEY_PROPERTIES = ['ListID', 'SendID'] + REPLICATION_METHOD = 'FULL_TABLE' def parse_object(self, obj): to_return = obj.copy() @@ -106,29 +107,13 @@ def sync_data(self): table = self.__class__.TABLE selector = FuelSDK.ET_ListSend - search_filter = None - retrieve_all_since = get_last_record_value_for_table(self.state, table) - - if retrieve_all_since is not None: - search_filter = { - 'Property': 'ModifiedDate', - 'SimpleOperator': 'greaterThan', - 'Value': retrieve_all_since - } - + # making this endpoint as FULL_TABLE, as 'ModifiedDate' is not retrievable as discussed + # here: https://salesforce.stackexchange.com/questions/354332/not-getting-modifieddate-for-listsend-endpoint stream = request('ListSend', selector, - self.auth_stub, - search_filter) + self.auth_stub) for list_send in stream: list_send = self.filter_keys_and_parse(list_send) - self.state = incorporate(self.state, - table, - 'ModifiedDate', - list_send.get('ModifiedDate')) - singer.write_records(table, [list_send]) - - save_state(self.state) diff --git a/tap_exacttarget/endpoints/list_subscribers.py b/tap_exacttarget/endpoints/list_subscribers.py index 5a7d23f..0e67b55 100644 --- a/tap_exacttarget/endpoints/list_subscribers.py +++ b/tap_exacttarget/endpoints/list_subscribers.py @@ -55,6 +55,8 @@ class ListSubscriberDataAccessObject(DataAccessObject): TABLE = 'list_subscriber' KEY_PROPERTIES = ['SubscriberKey', 'ListID'] + REPLICATION_METHOD = 'INCREMENTAL' + REPLICATION_KEYS = ['ModifiedDate'] def __init__(self, config, state, auth_stub, catalog): super(ListSubscriberDataAccessObject, self).__init__( diff --git a/tap_exacttarget/endpoints/lists.py b/tap_exacttarget/endpoints/lists.py index b9f2dbf..69836da 100644 --- a/tap_exacttarget/endpoints/lists.py +++ b/tap_exacttarget/endpoints/lists.py @@ -49,6 +49,8 @@ class ListDataAccessObject(DataAccessObject): TABLE = 'list' KEY_PROPERTIES = ['ID'] + REPLICATION_METHOD = 'INCREMENTAL' + REPLICATION_KEYS = ['ModifiedDate'] def sync_data(self): table = self.__class__.TABLE diff --git a/tap_exacttarget/endpoints/sends.py b/tap_exacttarget/endpoints/sends.py index 5129197..c521941 100644 --- a/tap_exacttarget/endpoints/sends.py +++ b/tap_exacttarget/endpoints/sends.py @@ -81,6 +81,8 @@ class SendDataAccessObject(DataAccessObject): TABLE = 'send' KEY_PROPERTIES = ['ID'] + REPLICATION_METHOD = 'INCREMENTAL' + REPLICATION_KEYS = ['ModifiedDate'] def parse_object(self, obj): to_return = obj.copy() diff --git a/tap_exacttarget/endpoints/subscribers.py b/tap_exacttarget/endpoints/subscribers.py index 9a846a2..e081998 100644 --- a/tap_exacttarget/endpoints/subscribers.py +++ b/tap_exacttarget/endpoints/subscribers.py @@ -102,6 +102,8 @@ class SubscriberDataAccessObject(DataAccessObject): SCHEMA = SCHEMA TABLE = 'subscriber' KEY_PROPERTIES = ['ID'] + REPLICATION_METHOD = 'INCREMENTAL' + REPLICATION_KEYS = ['ModifiedDate'] def parse_object(self, obj): to_return = obj.copy() diff --git a/tests/base.py b/tests/base.py new file mode 100644 index 0000000..3f21ab0 --- /dev/null +++ b/tests/base.py @@ -0,0 +1,191 @@ +import tap_tester.connections as connections +import tap_tester.menagerie as menagerie +import tap_tester.runner as runner +import os +import unittest +from datetime import datetime as dt +import time + +class ExactTargetBase(unittest.TestCase): + START_DATE = "" + DATETIME_FMT = { + "%Y-%m-%dT%H:%M:%SZ", + "%Y-%m-%d %H:%M:%S", + "%Y-%m-%dT%H:%M:%S.%fZ" + } + PRIMARY_KEYS = "table-key-properties" + REPLICATION_METHOD = "forced-replication-method" + REPLICATION_KEYS = "valid-replication-keys" + FULL_TABLE = "FULL_TABLE" + INCREMENTAL = "INCREMENTAL" + + def name(self): + return "tap_tester_exacttarget_base" + + def tap_name(self): + return "tap-exacttarget" + + def setUp(self): + required_env = { + "TAP_EXACTTARGET_CLIENT_ID", + "TAP_EXACTTARGET_CLIENT_SECRET", + "TAP_EXACTTARGET_TENANT_SUBDOMAIN", + "TAP_EXACTTARGET_V2_CLIENT_ID", + "TAP_EXACTTARGET_V2_CLIENT_SECRET", + "TAP_EXACTTARGET_V2_TENANT_SUBDOMAIN", + } + missing_envs = [v for v in required_env if not os.getenv(v)] + if missing_envs: + raise Exception("set " + ", ".join(missing_envs)) + + def get_type(self): + return "platform.exacttarget" + + def get_credentials(self): + return { + 'client_secret': os.getenv('TAP_EXACTTARGET_CLIENT_SECRET') + } + + def get_properties(self, original: bool = True): + return_value = { + 'start_date': '2014-01-01T00:00:00Z', + 'client_id': os.getenv('TAP_EXACTTARGET_CLIENT_ID'), + 'tenant_subdomain': os.getenv('TAP_EXACTTARGET_TENANT_SUBDOMAIN') + } + if original: + return return_value + + # Reassign start date + return_value["start_date"] = self.START_DATE + return return_value + + def expected_metadata(self): + return { + "campaign": { + self.PRIMARY_KEYS: {"id"}, + self.REPLICATION_METHOD: self.FULL_TABLE + }, + "content_area":{ + self.PRIMARY_KEYS: {"ID"}, + self.REPLICATION_METHOD: self.INCREMENTAL, + self.REPLICATION_KEYS: {"ModifiedDate"}, + }, + "data_extension.test emails":{ + self.PRIMARY_KEYS: {"_CustomObjectKey", "ID"}, + self.REPLICATION_METHOD: self.FULL_TABLE, + }, + "data_extension.This is a test":{ + self.PRIMARY_KEYS: {"_CustomObjectKey", "ID"}, + self.REPLICATION_METHOD: self.FULL_TABLE, + }, + "data_extension.my_test":{ + self.PRIMARY_KEYS: {"_CustomObjectKey", "ID"}, + self.REPLICATION_METHOD: self.FULL_TABLE, + }, + "email":{ + self.PRIMARY_KEYS: {"ID"}, + self.REPLICATION_METHOD: self.INCREMENTAL, + self.REPLICATION_KEYS: {"ModifiedDate"}, + }, + "event": { + self.PRIMARY_KEYS: {"SendID", "EventType", "SubscriberKey", "EventDate"}, + self.REPLICATION_METHOD: self.INCREMENTAL, + self.REPLICATION_KEYS: {"EventDate"}, + }, + "folder":{ + self.PRIMARY_KEYS: {"ID"}, + self.REPLICATION_METHOD: self.INCREMENTAL, + self.REPLICATION_KEYS: {"ModifiedDate"}, + }, + "list":{ + self.PRIMARY_KEYS: {"ID"}, + self.REPLICATION_METHOD: self.INCREMENTAL, + self.REPLICATION_KEYS: {"ModifiedDate"}, + }, + "list_send":{ + self.PRIMARY_KEYS: {"ListID", "SendID"}, + self.REPLICATION_METHOD: self.FULL_TABLE, + }, + "list_subscriber":{ + self.PRIMARY_KEYS: {"SubscriberKey", "ListID"}, + self.REPLICATION_METHOD: self.INCREMENTAL, + self.REPLICATION_KEYS: {"ModifiedDate"}, + }, + "send":{ + self.PRIMARY_KEYS: {"ID"}, + self.REPLICATION_METHOD: self.INCREMENTAL, + self.REPLICATION_KEYS: {"ModifiedDate"}, + }, + "subscriber":{ + self.PRIMARY_KEYS: {"ID"}, + self.REPLICATION_METHOD: self.INCREMENTAL, + self.REPLICATION_KEYS: {"ModifiedDate"}, + } + } + + def streams_to_select(self): + return set(self.expected_metadata().keys()) - {'event', 'list_subscriber', 'subscriber'} + + def expected_replication_keys(self): + return {table: properties.get(self.REPLICATION_KEYS, set()) + for table, properties + in self.expected_metadata().items()} + + def expected_primary_keys(self): + return {table: properties.get(self.PRIMARY_KEYS, set()) + for table, properties + in self.expected_metadata().items()} + + def expected_replication_method(self): + return {table: properties.get(self.REPLICATION_METHOD, set()) + for table, properties + in self.expected_metadata().items()} + + def select_found_catalogs(self, conn_id, catalogs, only_streams=None, deselect_all_fields: bool = False, non_selected_props=[]): + """Select all streams and all fields within streams""" + for catalog in catalogs: + if only_streams and catalog["stream_name"] not in only_streams: + continue + schema = menagerie.get_annotated_schema(conn_id, catalog['stream_id']) + + non_selected_properties = non_selected_props if not deselect_all_fields else [] + if deselect_all_fields: + # get a list of all properties so that none are selected + non_selected_properties = schema.get('annotated-schema', {}).get( + 'properties', {}) + non_selected_properties = non_selected_properties.keys() + additional_md = [] + + connections.select_catalog_and_fields_via_metadata( + conn_id, catalog, schema, additional_md=additional_md, + non_selected_fields=non_selected_properties + ) + + def run_and_verify_sync(self, conn_id): + sync_job_name = runner.run_sync_mode(self, conn_id) + + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + sync_record_count = runner.examine_target_output_file( + self, conn_id, self.streams_to_select(), self.expected_primary_keys()) + + self.assertGreater( + sum(sync_record_count.values()), 0, + msg="failed to replicate any data: {}".format(sync_record_count) + ) + print("total replicated row count: {}".format(sum(sync_record_count.values()))) + + return sync_record_count + + def dt_to_ts(self, dtime): + for date_format in self.DATETIME_FMT: + try: + date_stripped = int(time.mktime(dt.strptime(dtime, date_format).timetuple())) + return date_stripped + except ValueError: + continue + + def is_incremental(self, stream): + return self.expected_metadata()[stream][self.REPLICATION_METHOD] == self.INCREMENTAL \ No newline at end of file diff --git a/tests/test_exacttarget_base.py b/tests/test_exacttarget_base.py deleted file mode 100644 index 3c70772..0000000 --- a/tests/test_exacttarget_base.py +++ /dev/null @@ -1,95 +0,0 @@ -from tap_tester.scenario import SCENARIOS - -import datetime -import tap_tester.connections as connections -import tap_tester.menagerie as menagerie -import tap_tester.runner as runner -import os -import unittest -import pdb -import json -import requests - - -class ExactTargetBase(unittest.TestCase): - - def name(self): - return "tap_tester_exacttarget_base" - - def tap_name(self): - return "tap-exacttarget" - - def setUp(self): - required_env = { - "client_id": "TAP_EXACTTARGET_CLIENT_ID", - "client_secret": "TAP_EXACTTARGET_CLIENT_SECRET", - } - missing_envs = [v for v in required_env.values() if not os.getenv(v)] - if missing_envs: - raise Exception("set " + ", ".join(missing_envs)) - - def get_type(self): - return "platform.exacttarget" - - def get_credentials(self): - return { - 'client_secret': os.getenv('TAP_EXACTTARGET_CLIENT_SECRET') - } - - def get_properties(self): - yesterday = datetime.datetime.now(datetime.timezone.utc) - datetime.timedelta(days=1) - return { - 'start_date': yesterday.strftime("%Y-%m-%dT%H:%M:%SZ"), - 'client_id': os.getenv('TAP_EXACTTARGET_CLIENT_ID') - } - - def streams_to_select(self): - # Note: Custom streams failed on our account with an error on - # `_CustomObjectKey` not being valid - return ["campaign", - "content_area", - "email", - "event", - "folder", - "list", - "list_send", - "list_subscriber", - "send", - "subscriber"] - - def select_found_catalogs(self, conn_id, found_catalogs, only_streams=None): - selected = [] - for catalog in found_catalogs: - if only_streams and catalog["tap_stream_id"] not in only_streams: - continue - schema = menagerie.select_catalog(conn_id, catalog) - - selected.append({ - "key_properties": catalog.get("key_properties"), - "schema": schema, - "tap_stream_id": catalog.get("tap_stream_id"), - "replication_method": catalog.get("replication_method"), - "replication_key": catalog.get("replication_key"), - }) - - for catalog_entry in selected: - connections.select_catalog_and_fields_via_metadata( - conn_id, - catalog_entry, - {"annotated-schema": catalog_entry['schema']} - ) - - def test_run(self): - conn_id = connections.ensure_connection(self) - runner.run_check_mode(self, conn_id) - - found_catalogs = menagerie.get_catalogs(conn_id) - self.select_found_catalogs(conn_id, found_catalogs, only_streams=self.streams_to_select()) - sync_job_name = runner.run_sync_mode(self, conn_id) - - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - -SCENARIOS.add(ExactTargetBase) diff --git a/tests/test_exacttarget_discover.py b/tests/test_exacttarget_discover.py index b183e17..9c247ff 100644 --- a/tests/test_exacttarget_discover.py +++ b/tests/test_exacttarget_discover.py @@ -1,87 +1,115 @@ -import datetime +from base import ExactTargetBase import tap_tester.connections as connections import tap_tester.menagerie as menagerie import tap_tester.runner as runner import os -import unittest -import pdb -import json -import requests - -class ExactTargetDiscover(unittest.TestCase): +class ExactTargetDiscover(ExactTargetBase): def name(self): return "tap_tester_exacttarget_discover_v1" - def tap_name(self): - return "tap-exacttarget" - - def setUp(self): - required_env = { - "TAP_EXACTTARGET_CLIENT_ID", - "TAP_EXACTTARGET_CLIENT_SECRET", - "TAP_EXACTTARGET_TENANT_SUBDOMAIN", - "TAP_EXACTTARGET_V2_CLIENT_ID", - "TAP_EXACTTARGET_V2_CLIENT_SECRET", - "TAP_EXACTTARGET_V2_TENANT_SUBDOMAIN", - } - missing_envs = [v for v in required_env if not os.getenv(v)] - if missing_envs: - raise Exception("set " + ", ".join(missing_envs)) - - def get_type(self): - return "platform.exacttarget" - def get_credentials(self): return { 'client_secret': os.getenv('TAP_EXACTTARGET_CLIENT_SECRET') } - def get_properties(self): - yesterday = datetime.datetime.now(datetime.timezone.utc) - datetime.timedelta(days=1) - return { - 'start_date': yesterday.strftime("%Y-%m-%dT%H:%M:%SZ"), - 'client_id': os.getenv('TAP_EXACTTARGET_CLIENT_ID') - } + def get_properties(self, *args, **kwargs): + props = super().get_properties(*args, **kwargs) + props.pop('tenant_subdomain') + return props def test_run(self): + """ + Testing that discovery creates the appropriate catalog with valid metadata. + • Verify number of actual streams discovered match expected + • Verify the stream names discovered were what we expect + streams should only have lowercase alphas and underscores + • verify there is only 1 top level breadcrumb + • verify primary key(s) + • verify replication key(s) + • verify that primary keys and replication keys are given the inclusion of automatic. + • verify that all other fields have inclusion of available in metadata. + """ conn_id = connections.ensure_connection(self) runner.run_check_mode(self, conn_id) - found_catalog = menagerie.get_catalog(conn_id) - for catalog_entry in found_catalog['streams']: - field_names_in_schema = set([ k for k in catalog_entry['schema']['properties'].keys()]) - field_names_in_breadcrumbs = set([x['breadcrumb'][1] for x in catalog_entry['metadata'] if len(x['breadcrumb']) == 2]) - self.assertEqual(field_names_in_schema, field_names_in_breadcrumbs) - - inclusions_set = set([(x['breadcrumb'][1], x['metadata']['inclusion']) - for x in catalog_entry['metadata'] - if len(x['breadcrumb']) == 2]) - # Validate that all fields are in metadata - self.assertEqual(len(inclusions_set), len(field_names_in_schema)) - self.assertEqual(set([i[0] for i in inclusions_set]), field_names_in_schema) - # Validate that all metadata['inclusion'] are 'available' - unique_inclusions = set([i[1] for i in inclusions_set]) - self.assertTrue(len(unique_inclusions) == 1 and 'available' in unique_inclusions) + streams_to_test = self.streams_to_select() + found_catalogs = menagerie.get_catalogs(conn_id) + + for stream in streams_to_test: + with self.subTest(stream=stream): + + # Verify ensure the catalog is found for a given stream + catalog = next(iter([catalog for catalog in found_catalogs + if catalog["stream_name"] == stream])) + self.assertIsNotNone(catalog) + + # collecting expected values + expected_primary_keys = self.expected_primary_keys()[stream] + expected_replication_keys = self.expected_replication_keys()[stream] + + # add primary keys and replication keys in automatically replicated keys to check + expected_automatic_fields = expected_primary_keys | expected_replication_keys + + # collecting actual values... + schema_and_metadata = menagerie.get_annotated_schema(conn_id, catalog['stream_id']) + metadata = schema_and_metadata["metadata"] + stream_properties = [item for item in metadata if item.get("breadcrumb") == []] + actual_primary_keys = set( + stream_properties[0].get( + "metadata", {"table-key-properties": []}).get("table-key-properties", []) + ) + actual_replication_keys = set( + stream_properties[0].get( + "metadata", {"valid-replication-keys": []}).get("valid-replication-keys", []) + ) + actual_automatic_fields = set( + item.get("breadcrumb", ["properties", None])[1] for item in metadata + if item.get("metadata").get("inclusion") == "automatic" + ) + + ########################################################################## + ### metadata assertions + ########################################################################## + + # verify there is only 1 top level breadcrumb in metadata + self.assertTrue(len(stream_properties) == 1, + msg="There is NOT only one top level breadcrumb for {}".format(stream) + \ + "\nstream_properties | {}".format(stream_properties)) + + # verify primary key(s) match expectations + self.assertSetEqual( + expected_primary_keys, actual_primary_keys, + ) + + # verify replication key(s) match expectations + self.assertSetEqual( + expected_replication_keys, actual_replication_keys, + ) + + # verify that primary keys + # are given the inclusion of automatic in metadata. + self.assertSetEqual(expected_automatic_fields, actual_automatic_fields) + + # verify that all other fields have inclusion of available + # This assumes there are no unsupported fields for SaaS sources + self.assertTrue( + all({item.get("metadata").get("inclusion") == "available" + for item in metadata + if item.get("breadcrumb", []) != [] + and item.get("breadcrumb", ["properties", None])[1] + not in actual_automatic_fields}), + msg="Not all non key properties are set to available in metadata") class ExactTargetDiscover2(ExactTargetDiscover): def name(self): return "tap_tester_exacttarget_discover_v1_with_subdomain" - def get_credentials(self): - return { - 'client_secret': os.getenv('TAP_EXACTTARGET_CLIENT_SECRET') - } - - def get_properties(self): - yesterday = datetime.datetime.now(datetime.timezone.utc) - datetime.timedelta(days=1) - return { - 'start_date': yesterday.strftime("%Y-%m-%dT%H:%M:%SZ"), - 'client_id': os.getenv('TAP_EXACTTARGET_CLIENT_ID'), - 'tenant_subdomain': os.getenv('TAP_EXACTTARGET_TENANT_SUBDOMAIN') - } - + def get_properties(self, *args, **kwargs): + props = super().get_properties(*args, **kwargs) + props['tenant_subdomain'] = os.getenv('TAP_EXACTTARGET_TENANT_SUBDOMAIN') + return props class ExactTargetDiscover3(ExactTargetDiscover): def name(self): @@ -92,10 +120,8 @@ def get_credentials(self): 'client_secret': os.getenv('TAP_EXACTTARGET_V2_CLIENT_SECRET') } - def get_properties(self): - yesterday = datetime.datetime.now(datetime.timezone.utc) - datetime.timedelta(days=1) - return { - 'start_date': yesterday.strftime("%Y-%m-%dT%H:%M:%SZ"), - 'client_id': os.getenv('TAP_EXACTTARGET_V2_CLIENT_ID'), - 'tenant_subdomain': os.getenv('TAP_EXACTTARGET_V2_TENANT_SUBDOMAIN') - } + def get_properties(self, *args, **kwargs): + props = super().get_properties(*args, **kwargs) + props['client_id'] = os.getenv('TAP_EXACTTARGET_V2_CLIENT_ID') + props['tenant_subdomain'] = os.getenv('TAP_EXACTTARGET_V2_TENANT_SUBDOMAIN') + return props \ No newline at end of file From 23a93b3791162126fa14a0eb5f22f9cd8c70d5c1 Mon Sep 17 00:00:00 2001 From: harshpatel4_crest Date: Thu, 2 Sep 2021 18:52:50 +0530 Subject: [PATCH 02/36] pylint resolve --- tap_exacttarget/endpoints/list_sends.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/tap_exacttarget/endpoints/list_sends.py b/tap_exacttarget/endpoints/list_sends.py index 5578857..0fa16c1 100644 --- a/tap_exacttarget/endpoints/list_sends.py +++ b/tap_exacttarget/endpoints/list_sends.py @@ -6,8 +6,6 @@ from tap_exacttarget.schemas import ID_FIELD, CUSTOM_PROPERTY_LIST, \ CREATED_DATE_FIELD, CUSTOMER_KEY_FIELD, OBJECT_ID_FIELD, \ MODIFIED_DATE_FIELD, with_properties -from tap_exacttarget.state import incorporate, save_state, \ - get_last_record_value_for_table LOGGER = singer.get_logger() From f3de6a90b46d1cb1ca3999f5a65a7600ff9ee7fe Mon Sep 17 00:00:00 2001 From: harshpatel4_crest Date: Thu, 2 Sep 2021 19:00:26 +0530 Subject: [PATCH 03/36] add full replication test case --- tests/test_exacttarget_full_replication.py | 92 ++++++++++++++++++++++ 1 file changed, 92 insertions(+) create mode 100644 tests/test_exacttarget_full_replication.py diff --git a/tests/test_exacttarget_full_replication.py b/tests/test_exacttarget_full_replication.py new file mode 100644 index 0000000..c175a7f --- /dev/null +++ b/tests/test_exacttarget_full_replication.py @@ -0,0 +1,92 @@ +from base import ExactTargetBase +import tap_tester.connections as connections +import tap_tester.menagerie as menagerie +import tap_tester.runner as runner +import json +import datetime + +class FullReplicationTest(ExactTargetBase): + """Test tap gets all records for streams with full replication""" + + def name(self): + return "tap_tester_exacttarget_full_replication" + + def test_run(self): + conn_id_1 = connections.ensure_connection(self) + runner.run_check_mode(self, conn_id_1) + + # Select streams + found_catalogs = menagerie.get_catalogs(conn_id_1) + full_streams = {key for key, value in self.expected_replication_method().items() + if value == "FULL_TABLE"} + our_catalogs = [catalog for catalog in found_catalogs if + catalog.get('stream_name') in full_streams] + self.select_found_catalogs(conn_id_1, our_catalogs, full_streams) + + # Run a sync job + first_sync_record_count = self.run_and_verify_sync(conn_id_1) + + # verify that the sync only sent records to the target for selected streams (catalogs) + self.assertEqual(set(first_sync_record_count.keys()), full_streams) + + first_sync_state = menagerie.get_state(conn_id_1) + + # Get the set of records from a first sync + first_sync_records = runner.get_records_from_target_output() + + # set future start date, which validates that stream is syncing 'FULL_TABLE' + self.START_DATE = datetime.datetime.strftime(datetime.datetime.today() + datetime.timedelta(days=1), "%Y-%m-%dT00:00:00Z") + + conn_id_2 = connections.ensure_connection(self, original_properties=False) + runner.run_check_mode(self, conn_id_2) + + found_catalogs = menagerie.get_catalogs(conn_id_2) + our_catalogs = [catalog for catalog in found_catalogs if + catalog.get('stream_name') in full_streams] + self.select_found_catalogs(conn_id_2, our_catalogs, full_streams) + + # Run a second sync job + second_sync_record_count = self.run_and_verify_sync(conn_id_2) + + # Get the set of records from a second sync + second_sync_records = runner.get_records_from_target_output() + + for stream in full_streams: + with self.subTest(stream=stream): + + # verify there is no bookmark values from state + state_value = first_sync_state.get("bookmarks", {}).get(stream) + self.assertIsNone(state_value) + + # verify that there is more than 1 record of data - setup necessary + self.assertGreater(first_sync_record_count.get(stream, 0), 1, + msg="Data is not set up to be able to test full sync") + + # verify that you get the same or more data the 2nd time around + self.assertGreaterEqual( + second_sync_record_count.get(stream, 0), + first_sync_record_count.get(stream, 0), + msg="second syc did not have more records, full sync not verified") + + # [set(message['data']) for message in messages['messages'] + # if message['action'] == 'upsert'][0] + # verify all data from 1st sync included in 2nd sync + first_data = [record["data"] for record + in first_sync_records.get(stream, {}).get("messages", {"data": {}})] + second_data = [record["data"] for record + in second_sync_records.get(stream, {}).get("messages", {"data": {}})] + + same_records = 0 + for first_record in first_data: + first_value = json.dumps(first_record, sort_keys=True) + + for compare_record in second_data: + compare_value = json.dumps(compare_record, sort_keys=True) + + if first_value == compare_value: + second_data.remove(compare_record) + same_records += 1 + break + + self.assertEqual(len(first_data), same_records, + msg="Not all data from the first sync was in the second sync") From d08a706139e9c974702c84536b774b6f1e25feba Mon Sep 17 00:00:00 2001 From: harshpatel4_crest Date: Thu, 2 Sep 2021 19:23:00 +0530 Subject: [PATCH 04/36] use transformation --- tap_exacttarget/endpoints/campaigns.py | 8 +- tap_exacttarget/endpoints/content_areas.py | 8 +- tap_exacttarget/endpoints/data_extensions.py | 8 +- tap_exacttarget/endpoints/emails.py | 8 +- tap_exacttarget/endpoints/events.py | 8 +- tap_exacttarget/endpoints/folders.py | 8 +- tap_exacttarget/endpoints/list_sends.py | 6 +- tap_exacttarget/endpoints/list_subscribers.py | 8 +- tap_exacttarget/endpoints/lists.py | 8 +- tap_exacttarget/endpoints/sends.py | 8 +- tap_exacttarget/endpoints/subscribers.py | 8 +- tests/test_exacttarget_field_selection.py | 133 ++++++++++++++++++ 12 files changed, 208 insertions(+), 11 deletions(-) create mode 100644 tests/test_exacttarget_field_selection.py diff --git a/tap_exacttarget/endpoints/campaigns.py b/tap_exacttarget/endpoints/campaigns.py index d657dfa..24eecdb 100644 --- a/tap_exacttarget/endpoints/campaigns.py +++ b/tap_exacttarget/endpoints/campaigns.py @@ -1,5 +1,7 @@ import FuelSDK +import copy import singer +from singer import Transformer, metadata from tap_exacttarget.client import request from tap_exacttarget.dao import DataAccessObject @@ -44,7 +46,11 @@ def sync_data(self): FuelSDK.ET_Campaign, self.auth_stub) + catalog_copy = copy.deepcopy(self.catalog) + for campaign in cursor: campaign = self.filter_keys_and_parse(campaign) - singer.write_records(self.__class__.TABLE, [campaign]) + with Transformer() as transformer: + rec = transformer.transform(campaign, catalog_copy.get('schema'), metadata.to_map(catalog_copy.get('metadata'))) + singer.write_record(self.__class__.TABLE, rec) diff --git a/tap_exacttarget/endpoints/content_areas.py b/tap_exacttarget/endpoints/content_areas.py index d7d9645..fb608f7 100644 --- a/tap_exacttarget/endpoints/content_areas.py +++ b/tap_exacttarget/endpoints/content_areas.py @@ -1,5 +1,7 @@ import FuelSDK +import copy import singer +from singer import Transformer, metadata from tap_exacttarget.client import request from tap_exacttarget.dao import DataAccessObject @@ -126,6 +128,8 @@ def sync_data(self): self.auth_stub, search_filter) + catalog_copy = copy.deepcopy(self.catalog) + for content_area in stream: content_area = self.filter_keys_and_parse(content_area) @@ -134,6 +138,8 @@ def sync_data(self): 'ModifiedDate', content_area.get('ModifiedDate')) - singer.write_records(table, [content_area]) + with Transformer() as transformer: + rec = transformer.transform(content_area, catalog_copy.get('schema'), metadata.to_map(catalog_copy.get('metadata'))) + singer.write_record(table, rec) save_state(self.state) diff --git a/tap_exacttarget/endpoints/data_extensions.py b/tap_exacttarget/endpoints/data_extensions.py index 9537c25..6871f22 100644 --- a/tap_exacttarget/endpoints/data_extensions.py +++ b/tap_exacttarget/endpoints/data_extensions.py @@ -1,5 +1,7 @@ import FuelSDK +import copy import singer +from singer import Transformer, metadata from funcy import set_in, update_in, merge @@ -232,6 +234,8 @@ def _replicate(self, customer_key, keys, result = request_from_cursor('DataExtensionObject', cursor, batch_size=batch_size) + catalog_copy = copy.deepcopy(self.catalog) + for row in result: row = self.filter_keys_and_parse(row) row['CategoryID'] = parent_category_id @@ -241,7 +245,9 @@ def _replicate(self, customer_key, keys, replication_key, row.get(replication_key)) - singer.write_records(table, [row]) + with Transformer() as transformer: + rec = transformer.transform(row, catalog_copy.get('schema'), metadata.to_map(catalog_copy.get('metadata'))) + singer.write_record(table, rec) if partial: self.state = incorporate(self.state, diff --git a/tap_exacttarget/endpoints/emails.py b/tap_exacttarget/endpoints/emails.py index 0131f86..218c7bb 100644 --- a/tap_exacttarget/endpoints/emails.py +++ b/tap_exacttarget/endpoints/emails.py @@ -1,5 +1,7 @@ import FuelSDK +import copy import singer +from singer import Transformer, metadata from tap_exacttarget.client import request from tap_exacttarget.dao import DataAccessObject @@ -141,6 +143,8 @@ def sync_data(self): self.auth_stub, search_filter) + catalog_copy = copy.deepcopy(self.catalog) + for email in stream: email = self.filter_keys_and_parse(email) @@ -149,6 +153,8 @@ def sync_data(self): 'ModifiedDate', email.get('ModifiedDate')) - singer.write_records(table, [email]) + with Transformer() as transformer: + rec = transformer.transform(email, catalog_copy.get('schema'), metadata.to_map(catalog_copy.get('metadata'))) + singer.write_record(table, rec) save_state(self.state) diff --git a/tap_exacttarget/endpoints/events.py b/tap_exacttarget/endpoints/events.py index 4d48983..36145cc 100644 --- a/tap_exacttarget/endpoints/events.py +++ b/tap_exacttarget/endpoints/events.py @@ -1,5 +1,7 @@ import FuelSDK +import copy import singer +from singer import Transformer, metadata from tap_exacttarget.client import request from tap_exacttarget.dao import DataAccessObject @@ -89,6 +91,8 @@ def sync_data(self): self.auth_stub, search_filter) + catalog_copy = copy.deepcopy(self.catalog) + for event in stream: event = self.filter_keys_and_parse(event) @@ -104,7 +108,9 @@ def sync_data(self): event.get('EventDate'))) continue - singer.write_records(table, [event]) + with Transformer() as transformer: + rec = transformer.transform(event, catalog_copy.get('schema'), metadata.to_map(catalog_copy.get('metadata'))) + singer.write_record(table, rec) self.state = incorporate(self.state, event_name, diff --git a/tap_exacttarget/endpoints/folders.py b/tap_exacttarget/endpoints/folders.py index 5682f76..dcf1c68 100644 --- a/tap_exacttarget/endpoints/folders.py +++ b/tap_exacttarget/endpoints/folders.py @@ -1,5 +1,7 @@ import FuelSDK +import copy import singer +from singer import Transformer, metadata from tap_exacttarget.client import request from tap_exacttarget.dao import DataAccessObject @@ -82,6 +84,8 @@ def sync_data(self): self.auth_stub, search_filter) + catalog_copy = copy.deepcopy(self.catalog) + for folder in stream: folder = self.filter_keys_and_parse(folder) @@ -90,6 +94,8 @@ def sync_data(self): 'ModifiedDate', folder.get('ModifiedDate')) - singer.write_records(table, [folder]) + with Transformer() as transformer: + rec = transformer.transform(folder, catalog_copy.get('schema'), metadata.to_map(catalog_copy.get('metadata'))) + singer.write_record(table, rec) save_state(self.state) diff --git a/tap_exacttarget/endpoints/list_sends.py b/tap_exacttarget/endpoints/list_sends.py index 0fa16c1..c1969db 100644 --- a/tap_exacttarget/endpoints/list_sends.py +++ b/tap_exacttarget/endpoints/list_sends.py @@ -1,5 +1,7 @@ import FuelSDK +import copy import singer +from singer import Transformer, metadata from tap_exacttarget.client import request from tap_exacttarget.dao import DataAccessObject @@ -114,4 +116,6 @@ def sync_data(self): for list_send in stream: list_send = self.filter_keys_and_parse(list_send) - singer.write_records(table, [list_send]) + with Transformer() as transformer: + rec = transformer.transform(list_send, catalog_copy.get('schema'), metadata.to_map(catalog_copy.get('metadata'))) + singer.write_record(table, rec) diff --git a/tap_exacttarget/endpoints/list_subscribers.py b/tap_exacttarget/endpoints/list_subscribers.py index 0e67b55..b2a8e3e 100644 --- a/tap_exacttarget/endpoints/list_subscribers.py +++ b/tap_exacttarget/endpoints/list_subscribers.py @@ -1,5 +1,7 @@ import FuelSDK +import copy import singer +from singer import Transformer, metadata from tap_exacttarget.client import request from tap_exacttarget.dao import DataAccessObject @@ -121,6 +123,8 @@ def sync_data(self): if self.replicate_subscriber: subscriber_dao.write_schema() + catalog_copy = copy.deepcopy(self.catalog) + for list_subscribers_batch in partition_all(stream, batch_size): for list_subscriber in list_subscribers_batch: list_subscriber = self.filter_keys_and_parse( @@ -133,7 +137,9 @@ def sync_data(self): 'ModifiedDate', list_subscriber.get('ModifiedDate')) - singer.write_records(table, [list_subscriber]) + with Transformer() as transformer: + rec = transformer.transform(list_subscriber, catalog_copy.get('schema'), metadata.to_map(catalog_copy.get('metadata'))) + singer.write_record(table, rec) if self.replicate_subscriber: subscriber_keys = list(map( diff --git a/tap_exacttarget/endpoints/lists.py b/tap_exacttarget/endpoints/lists.py index 69836da..a05c518 100644 --- a/tap_exacttarget/endpoints/lists.py +++ b/tap_exacttarget/endpoints/lists.py @@ -1,5 +1,7 @@ import FuelSDK +import copy import singer +from singer import Transformer, metadata from tap_exacttarget.client import request from tap_exacttarget.dao import DataAccessObject @@ -71,6 +73,8 @@ def sync_data(self): self.auth_stub, search_filter) + catalog_copy = copy.deepcopy(self.catalog) + for _list in stream: _list = self.filter_keys_and_parse(_list) @@ -79,6 +83,8 @@ def sync_data(self): 'ModifiedDate', _list.get('ModifiedDate')) - singer.write_records(table, [_list]) + with Transformer() as transformer: + rec = transformer.transform(_list, catalog_copy.get('schema'), metadata.to_map(catalog_copy.get('metadata'))) + singer.write_record(table, rec) save_state(self.state) diff --git a/tap_exacttarget/endpoints/sends.py b/tap_exacttarget/endpoints/sends.py index c521941..dd8c48f 100644 --- a/tap_exacttarget/endpoints/sends.py +++ b/tap_exacttarget/endpoints/sends.py @@ -1,5 +1,7 @@ import FuelSDK +import copy import singer +from singer import Transformer, metadata from tap_exacttarget.client import request from tap_exacttarget.dao import DataAccessObject @@ -110,6 +112,8 @@ def sync_data(self): self.auth_stub, search_filter) + catalog_copy = copy.deepcopy(self.catalog) + for send in stream: send = self.filter_keys_and_parse(send) @@ -118,6 +122,8 @@ def sync_data(self): 'ModifiedDate', send.get('ModifiedDate')) - singer.write_records(table, [send]) + with Transformer() as transformer: + rec = transformer.transform(send, catalog_copy.get('schema'), metadata.to_map(catalog_copy.get('metadata'))) + singer.write_record(table, rec) save_state(self.state) diff --git a/tap_exacttarget/endpoints/subscribers.py b/tap_exacttarget/endpoints/subscribers.py index e081998..a97f13e 100644 --- a/tap_exacttarget/endpoints/subscribers.py +++ b/tap_exacttarget/endpoints/subscribers.py @@ -1,5 +1,7 @@ import FuelSDK +import copy import singer +from singer import Transformer, metadata from tap_exacttarget.client import request from tap_exacttarget.dao import DataAccessObject @@ -153,7 +155,11 @@ def pull_subscribers_batch(self, subscriber_keys): stream = request( 'Subscriber', FuelSDK.ET_Subscriber, self.auth_stub, _filter) + catalog_copy = copy.deepcopy(self.catalog) + for subscriber in stream: subscriber = self.filter_keys_and_parse(subscriber) - singer.write_records(table, [subscriber]) + with Transformer() as transformer: + rec = transformer.transform(subscriber, catalog_copy.get('schema'), metadata.to_map(catalog_copy.get('metadata'))) + singer.write_record(table, rec) diff --git a/tests/test_exacttarget_field_selection.py b/tests/test_exacttarget_field_selection.py new file mode 100644 index 0000000..e46df1a --- /dev/null +++ b/tests/test_exacttarget_field_selection.py @@ -0,0 +1,133 @@ +from base import ExactTargetBase +import tap_tester.connections as connections +import tap_tester.menagerie as menagerie +import tap_tester.runner as runner + +class ExactTargetFieldSelection(ExactTargetBase): + + # fields not to select + non_selected_fields = { + "campaign": ["createdDate", "campaignCode", "description"], + "content_area": ["Name"], + "data_extension.test emails": ["name", "email"], + "data_extension.This is a test": ["Birthday"], + "data_extension.my_test": ["Address"], + "email": ["Name", "CharacterSet", "HasDynamicSubjectLine", "EmailType"], + "event": ["EventType"], + "folder": ["Name", "ContentType", "Description", "ObjectID"], + "list": ["ListName", "Category", "Type"], + "list_send": ["MissingAddresses", "ExistingUndeliverables", "HardBounces", "NumberDelivered"], + "list_subscriber": ["Status", "ObjectID", "ListID"], + "send": ["Status", "EmailName", "FromAddress", "IsMultipart"], + "subscriber": ["Status", "EmailAddress", "SubscriberKey", "PartnerKey"] + } + + def name(self): + return "tap_tester_exacttarget_field_selection" + + def test_run(self): + # run test with all fields of stream 'selected = False' + self.run_test(only_automatic_fields=True) + # run test with fields in 'non_selected_fields', 'selected = False' + self.run_test(only_automatic_fields=False) + + def run_test(self, only_automatic_fields=False): + expected_streams = self.streams_to_select() + conn_id = connections.ensure_connection(self) + runner.run_check_mode(self, conn_id) + + expected_stream_fields = dict() + + found_catalogs = menagerie.get_catalogs(conn_id) + for catalog in found_catalogs: + stream_name = catalog['stream_name'] + catalog_entry = menagerie.get_annotated_schema(conn_id, catalog['stream_id']) + if not stream_name in expected_streams: + continue + # select catalog fields + self.select_found_catalogs( + conn_id, + [catalog], + only_streams=[stream_name], + deselect_all_fields=True if only_automatic_fields else False, + non_selected_props=[] if only_automatic_fields else self.non_selected_fields[stream_name]) + # add expected fields for assertion + fields_from_field_level_md = [md_entry['breadcrumb'][1] + for md_entry in catalog_entry['metadata'] + if md_entry['breadcrumb'] != []] + if only_automatic_fields: + expected_stream_fields[stream_name] = self.expected_primary_keys()[stream_name] | self.expected_replication_keys()[stream_name] + else: + expected_stream_fields[stream_name] = set(fields_from_field_level_md) - set(self.non_selected_fields[stream_name]) + + self.run_and_verify_sync(conn_id) + synced_records = runner.get_records_from_target_output() + + for stream in expected_streams: + with self.subTest(stream=stream): + + # get expected keys + expected_keys = expected_stream_fields[stream] + + # collect all actual values + messages = synced_records.get(stream) + + # collect actual synced fields + actual_keys = [set(message['data'].keys()) for message in messages['messages'] + if message['action'] == 'upsert'][0] + + if stream == 'list': + expected_keys = expected_keys - { + 'SendClassification', + 'PartnerProperties'} + elif stream == 'subscriber': + expected_keys = expected_keys - { + 'CustomerKey', + 'PartnerType', + 'UnsubscribedDate', + 'PrimarySMSAddress', + 'PrimaryEmailAddress', + 'PartnerProperties', + 'SubscriberTypeDefinition', + 'Addresses', + 'ListIDs', + 'Locale', + 'PrimarySMSPublicationStatus', + 'ModifiedDate'} + elif stream == 'list_send': + expected_keys = expected_keys - { + 'CreatedDate', + 'CustomerKey', + 'ID', + 'PartnerProperties', + 'ModifiedDate'} + elif stream == 'folder': + expected_keys = expected_keys - { + 'Type', + 'PartnerProperties'} + elif stream == 'email': + expected_keys = expected_keys - { + '__AdditionalEmailAttribute1', + '__AdditionalEmailAttribute3', + 'SyncTextWithHTML', + 'PartnerProperties', + '__AdditionalEmailAttribute5', + 'ClonedFromID', + '__AdditionalEmailAttribute4', + '__AdditionalEmailAttribute2'} + elif stream == 'content_area': + # most of them are included in the 'Content' data + expected_keys = expected_keys - { + 'BackgroundColor', + 'Cellpadding', + 'HasFontSize', + 'BorderColor', + 'BorderWidth', + 'Width', + 'IsLocked', + 'Cellspacing', + 'FontFamily'} + + # verify expected and actual fields + self.assertEqual(expected_keys, actual_keys, + msg='Selected keys in catalog is not as expected') From e71aaf3d2922918aad2d09ca7c3b9f1219f70754 Mon Sep 17 00:00:00 2001 From: harshpatel4_crest Date: Thu, 2 Sep 2021 19:46:05 +0530 Subject: [PATCH 05/36] resolve pylint --- tap_exacttarget/endpoints/list_sends.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tap_exacttarget/endpoints/list_sends.py b/tap_exacttarget/endpoints/list_sends.py index c1969db..abc1f98 100644 --- a/tap_exacttarget/endpoints/list_sends.py +++ b/tap_exacttarget/endpoints/list_sends.py @@ -113,6 +113,8 @@ def sync_data(self): selector, self.auth_stub) + catalog_copy = copy.deepcopy(self.catalog) + for list_send in stream: list_send = self.filter_keys_and_parse(list_send) From bf152427d686b0bb6adf20dea85aad9435ee5c6f Mon Sep 17 00:00:00 2001 From: harshpatel4_crest Date: Fri, 3 Sep 2021 17:53:31 +0530 Subject: [PATCH 06/36] added code change for data extension stream --- tap_exacttarget/endpoints/data_extensions.py | 20 ++++++++++++++++++-- tests/base.py | 7 ++++++- 2 files changed, 24 insertions(+), 3 deletions(-) diff --git a/tap_exacttarget/endpoints/data_extensions.py b/tap_exacttarget/endpoints/data_extensions.py index 9537c25..c3193b4 100644 --- a/tap_exacttarget/endpoints/data_extensions.py +++ b/tap_exacttarget/endpoints/data_extensions.py @@ -87,7 +87,8 @@ def _get_extensions(self): 'forced-replication-method': 'FULL_TABLE', "table-key-properties": [ "_CustomObjectKey" - ] + ], + "valid-replication-keys": [] } }, { @@ -110,6 +111,7 @@ def _get_fields(self, extensions): self.auth_stub) for field in result: + is_replication_key = False is_primary_key = False extension_id = field.DataExtension.CustomerKey field = sudsobj_to_dict(field) @@ -122,6 +124,9 @@ def _get_fields(self, extensions): [extension_id, 'key_properties'], field_name) + if field_name in ['ModifiedDate', 'JoinDate']: + is_replication_key = True + field_schema = { 'type': [ 'null', @@ -140,9 +145,15 @@ def _get_fields(self, extensions): if not mdata.get('breadcrumb'): mdata.get('metadata').get('table-key-properties').append(field_name) + if is_replication_key: + for mdata in to_return[extension_id]['metadata']: + if not mdata.get('breadcrumb'): + mdata.get('metadata')['forced-replication-method'] = "INCREMENTAL" + mdata.get('metadata').get('valid-replication-keys').append(field_name) + # These fields are defaulted into the schema, do not add to metadata again. if field_name not in {'_CustomObjectKey', 'CategoryID'}: - if is_primary_key: + if is_primary_key or is_replication_key: to_return[extension_id]['metadata'].append({ 'breadcrumb': ('properties', field_name), 'metadata': {'inclusion': 'automatic'} @@ -153,6 +164,11 @@ def _get_fields(self, extensions): 'metadata': {'inclusion': 'available'} }) + for catalog in to_return.values(): + for mdata in catalog.get('metadata'): + if not mdata.get('breadcrumb'): + if not mdata.get('metadata').get('valid-replication-keys'): + del mdata.get('metadata')['valid-replication-keys'] return to_return def generate_catalog(self): diff --git a/tests/base.py b/tests/base.py index 3f21ab0..19edcc0 100644 --- a/tests/base.py +++ b/tests/base.py @@ -82,6 +82,11 @@ def expected_metadata(self): self.PRIMARY_KEYS: {"_CustomObjectKey", "ID"}, self.REPLICATION_METHOD: self.FULL_TABLE, }, + "data_extension.test 1":{ + self.PRIMARY_KEYS: {"_CustomObjectKey", "ID"}, + self.REPLICATION_METHOD: self.INCREMENTAL, + self.REPLICATION_KEYS: {"JoinDate"}, + }, "email":{ self.PRIMARY_KEYS: {"ID"}, self.REPLICATION_METHOD: self.INCREMENTAL, @@ -188,4 +193,4 @@ def dt_to_ts(self, dtime): continue def is_incremental(self, stream): - return self.expected_metadata()[stream][self.REPLICATION_METHOD] == self.INCREMENTAL \ No newline at end of file + return self.expected_metadata()[stream][self.REPLICATION_METHOD] == self.INCREMENTAL From f25c8152b12d76ba7f43f99d81a59520b98da411 Mon Sep 17 00:00:00 2001 From: harshpatel4_crest Date: Fri, 3 Sep 2021 18:03:22 +0530 Subject: [PATCH 07/36] pylint resolve --- tap_exacttarget/endpoints/data_extensions.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tap_exacttarget/endpoints/data_extensions.py b/tap_exacttarget/endpoints/data_extensions.py index c3193b4..f635455 100644 --- a/tap_exacttarget/endpoints/data_extensions.py +++ b/tap_exacttarget/endpoints/data_extensions.py @@ -102,7 +102,7 @@ def _get_extensions(self): return to_return - def _get_fields(self, extensions): + def _get_fields(self, extensions): # pylint: disable=too-many-branches to_return = extensions.copy() result = request( From bbf14a31faad1fd2d64f395c83ed06e09acb1056 Mon Sep 17 00:00:00 2001 From: harshpatel4_crest Date: Fri, 3 Sep 2021 18:10:15 +0530 Subject: [PATCH 08/36] updated test case for data extension --- tests/test_exacttarget_field_selection.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/test_exacttarget_field_selection.py b/tests/test_exacttarget_field_selection.py index e46df1a..117fcc9 100644 --- a/tests/test_exacttarget_field_selection.py +++ b/tests/test_exacttarget_field_selection.py @@ -12,6 +12,7 @@ class ExactTargetFieldSelection(ExactTargetBase): "data_extension.test emails": ["name", "email"], "data_extension.This is a test": ["Birthday"], "data_extension.my_test": ["Address"], + "data_extension.test 1": ["name"], "email": ["Name", "CharacterSet", "HasDynamicSubjectLine", "EmailType"], "event": ["EventType"], "folder": ["Name", "ContentType", "Description", "ObjectID"], From af3ea3b8c84b8137fd937fe8c8490358f861d569 Mon Sep 17 00:00:00 2001 From: harshpatel4_crest Date: Tue, 7 Sep 2021 12:50:36 +0530 Subject: [PATCH 09/36] added comment --- tap_exacttarget/endpoints/data_extensions.py | 26 ++++++++++++++++++++ 1 file changed, 26 insertions(+) diff --git a/tap_exacttarget/endpoints/data_extensions.py b/tap_exacttarget/endpoints/data_extensions.py index f635455..6df4a31 100644 --- a/tap_exacttarget/endpoints/data_extensions.py +++ b/tap_exacttarget/endpoints/data_extensions.py @@ -164,6 +164,32 @@ def _get_fields(self, extensions): # pylint: disable=too-many-branches 'metadata': {'inclusion': 'available'} }) + # to_return = + # { + # 'de1': { + # 'tap_stream_id': 'data_extension.de1', + # 'stream': 'data_extension.de1', + # 'key_properties': ['_CustomObjectKey'], + # 'schema': { + # 'type': 'object', + # 'properties': {...} + # }, + # 'metadata': [...] + # }, + # 'de2': { + # 'tap_stream_id': 'data_extension.de2', + # 'stream': 'data_extension.de2', + # 'key_properties': ['_CustomObjectKey'], + # 'schema': { + # 'type': 'object', + # 'properties': {...} + # }, + # 'metadata': [...] + # } + # } + + # loop through all the data extension catalog in 'to_return' + # and remove empty 'valid-replication-keys' present in metadata for catalog in to_return.values(): for mdata in catalog.get('metadata'): if not mdata.get('breadcrumb'): From eef1780145b2907a666dcd728a1a2f2f9127861d Mon Sep 17 00:00:00 2001 From: harshpatel4_crest Date: Tue, 7 Sep 2021 17:35:30 +0530 Subject: [PATCH 10/36] added comments --- tap_exacttarget/endpoints/data_extensions.py | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) diff --git a/tap_exacttarget/endpoints/data_extensions.py b/tap_exacttarget/endpoints/data_extensions.py index 6df4a31..8a2ee78 100644 --- a/tap_exacttarget/endpoints/data_extensions.py +++ b/tap_exacttarget/endpoints/data_extensions.py @@ -85,10 +85,10 @@ def _get_extensions(self): 'metadata': { 'inclusion':'available', 'forced-replication-method': 'FULL_TABLE', - "table-key-properties": [ - "_CustomObjectKey" + 'table-key-properties': [ + '_CustomObjectKey' ], - "valid-replication-keys": [] + 'valid-replication-keys': [] } }, { @@ -110,6 +110,14 @@ def _get_fields(self, extensions): # pylint: disable=too-many-branches FuelSDK.ET_DataExtension_Column, self.auth_stub) + # iterate through all the fields and determine if it is primary key + # or replication key and update the catalog file accordingly: + # is_primary_key: + # update catalog file by appending that field in 'table-key-properties' + # is_replication_key: + # update value of 'forced-replication-method' as INCREMENTAL + # update catalog file by appending that field in 'valid-replication-keys' + # add 'AUTOMATIC' replication method for both primary and replication keys for field in result: is_replication_key = False is_primary_key = False @@ -164,7 +172,7 @@ def _get_fields(self, extensions): # pylint: disable=too-many-branches 'metadata': {'inclusion': 'available'} }) - # to_return = + # the structure of 'to_return' is like: # { # 'de1': { # 'tap_stream_id': 'data_extension.de1', From 5f179fcc397f0f2f2ddf1db21050518444f18ded Mon Sep 17 00:00:00 2001 From: harshpatel4_crest Date: Tue, 7 Sep 2021 17:48:43 +0530 Subject: [PATCH 11/36] added comments and optimized that condition --- tests/test_exacttarget_field_selection.py | 101 +++++++++++----------- 1 file changed, 50 insertions(+), 51 deletions(-) diff --git a/tests/test_exacttarget_field_selection.py b/tests/test_exacttarget_field_selection.py index 117fcc9..7f45179 100644 --- a/tests/test_exacttarget_field_selection.py +++ b/tests/test_exacttarget_field_selection.py @@ -5,6 +5,54 @@ class ExactTargetFieldSelection(ExactTargetBase): + fields_to_remove = { + 'list': [ + 'SendClassification', # not retrievable + 'PartnerProperties'], # not retrievable + 'subscriber': [ + 'CustomerKey', # not retrievable + 'PartnerType', # not retrievable + 'UnsubscribedDate', + 'PrimarySMSAddress', # not retrievable + 'PrimaryEmailAddress', # not retrievable + 'PartnerProperties', # not retrievable + 'SubscriberTypeDefinition', # not retrievable + 'Addresses', # not retrievable + 'ListIDs', + 'Locale', # not retrievable + 'PrimarySMSPublicationStatus', # not retrievable + 'ModifiedDate'], # not retrievable + 'list_send': [ + 'CreatedDate', # not retrievable + 'CustomerKey', # not retrievable + 'ID', + 'PartnerProperties', # not retrievable + 'ModifiedDate'], # not retrievable + 'folder': [ + 'Type', + 'PartnerProperties'], + 'email': [ + '__AdditionalEmailAttribute1', # not retrievable + '__AdditionalEmailAttribute3', # not retrievable + 'SyncTextWithHTML', # not retrievable + 'PartnerProperties', # not retrievable + '__AdditionalEmailAttribute5', # not retrievable + 'ClonedFromID', + '__AdditionalEmailAttribute4', # not retrievable + '__AdditionalEmailAttribute2'], # not retrievable + 'content_area': [ + # most of them are included in the 'Content' data + 'BackgroundColor', # not retrievable + 'Cellpadding', # not retrievable + 'HasFontSize', # not retrievable + 'BorderColor', # not retrievable + 'BorderWidth', # not retrievable + 'Width', # not retrievable + 'IsLocked', # not retrievable + 'Cellspacing', # not retrievable + 'FontFamily'] # not retrievable + } + # fields not to select non_selected_fields = { "campaign": ["createdDate", "campaignCode", "description"], @@ -77,57 +125,8 @@ def run_test(self, only_automatic_fields=False): actual_keys = [set(message['data'].keys()) for message in messages['messages'] if message['action'] == 'upsert'][0] - if stream == 'list': - expected_keys = expected_keys - { - 'SendClassification', - 'PartnerProperties'} - elif stream == 'subscriber': - expected_keys = expected_keys - { - 'CustomerKey', - 'PartnerType', - 'UnsubscribedDate', - 'PrimarySMSAddress', - 'PrimaryEmailAddress', - 'PartnerProperties', - 'SubscriberTypeDefinition', - 'Addresses', - 'ListIDs', - 'Locale', - 'PrimarySMSPublicationStatus', - 'ModifiedDate'} - elif stream == 'list_send': - expected_keys = expected_keys - { - 'CreatedDate', - 'CustomerKey', - 'ID', - 'PartnerProperties', - 'ModifiedDate'} - elif stream == 'folder': - expected_keys = expected_keys - { - 'Type', - 'PartnerProperties'} - elif stream == 'email': - expected_keys = expected_keys - { - '__AdditionalEmailAttribute1', - '__AdditionalEmailAttribute3', - 'SyncTextWithHTML', - 'PartnerProperties', - '__AdditionalEmailAttribute5', - 'ClonedFromID', - '__AdditionalEmailAttribute4', - '__AdditionalEmailAttribute2'} - elif stream == 'content_area': - # most of them are included in the 'Content' data - expected_keys = expected_keys - { - 'BackgroundColor', - 'Cellpadding', - 'HasFontSize', - 'BorderColor', - 'BorderWidth', - 'Width', - 'IsLocked', - 'Cellspacing', - 'FontFamily'} + fields = self.fields_to_remove.get(stream) or [] + expected_keys = expected_keys - set(fields) # verify expected and actual fields self.assertEqual(expected_keys, actual_keys, From 88a86301bf3e1486269ebf9d70b197cfd7963212 Mon Sep 17 00:00:00 2001 From: harshpatel4_crest Date: Tue, 7 Sep 2021 18:13:24 +0530 Subject: [PATCH 12/36] added code change for tranformation function in base file --- tap_exacttarget/dao.py | 8 +++++++- tap_exacttarget/endpoints/campaigns.py | 5 +---- tap_exacttarget/endpoints/content_areas.py | 5 +---- tap_exacttarget/endpoints/data_extensions.py | 5 +---- tap_exacttarget/endpoints/emails.py | 5 +---- tap_exacttarget/endpoints/events.py | 5 +---- tap_exacttarget/endpoints/folders.py | 5 +---- tap_exacttarget/endpoints/list_sends.py | 5 +---- tap_exacttarget/endpoints/list_subscribers.py | 5 +---- tap_exacttarget/endpoints/lists.py | 5 +---- tap_exacttarget/endpoints/sends.py | 5 +---- tap_exacttarget/endpoints/subscribers.py | 5 +---- 12 files changed, 18 insertions(+), 45 deletions(-) diff --git a/tap_exacttarget/dao.py b/tap_exacttarget/dao.py index be2d557..bda8326 100644 --- a/tap_exacttarget/dao.py +++ b/tap_exacttarget/dao.py @@ -1,5 +1,5 @@ import singer -from singer import metadata +from singer import metadata, Transformer from funcy import project @@ -59,6 +59,12 @@ def get_catalog_keys(self): def parse_object(self, obj): return project(obj, self.get_catalog_keys()) + # a function to write records with applying transformation + def write_records(self, record, catalog, table): + with Transformer() as transformer: + rec = transformer.transform(record, catalog.get('schema'), metadata.to_map(catalog.get('metadata'))) + singer.write_record(table, rec) + def write_schema(self): singer.write_schema( self.catalog.get('stream'), diff --git a/tap_exacttarget/endpoints/campaigns.py b/tap_exacttarget/endpoints/campaigns.py index 24eecdb..0c9d57b 100644 --- a/tap_exacttarget/endpoints/campaigns.py +++ b/tap_exacttarget/endpoints/campaigns.py @@ -1,7 +1,6 @@ import FuelSDK import copy import singer -from singer import Transformer, metadata from tap_exacttarget.client import request from tap_exacttarget.dao import DataAccessObject @@ -51,6 +50,4 @@ def sync_data(self): for campaign in cursor: campaign = self.filter_keys_and_parse(campaign) - with Transformer() as transformer: - rec = transformer.transform(campaign, catalog_copy.get('schema'), metadata.to_map(catalog_copy.get('metadata'))) - singer.write_record(self.__class__.TABLE, rec) + self.write_records(campaign, catalog_copy, self.__class__.TABLE) diff --git a/tap_exacttarget/endpoints/content_areas.py b/tap_exacttarget/endpoints/content_areas.py index fb608f7..03444ca 100644 --- a/tap_exacttarget/endpoints/content_areas.py +++ b/tap_exacttarget/endpoints/content_areas.py @@ -1,7 +1,6 @@ import FuelSDK import copy import singer -from singer import Transformer, metadata from tap_exacttarget.client import request from tap_exacttarget.dao import DataAccessObject @@ -138,8 +137,6 @@ def sync_data(self): 'ModifiedDate', content_area.get('ModifiedDate')) - with Transformer() as transformer: - rec = transformer.transform(content_area, catalog_copy.get('schema'), metadata.to_map(catalog_copy.get('metadata'))) - singer.write_record(table, rec) + self.write_records(content_area, catalog_copy, table) save_state(self.state) diff --git a/tap_exacttarget/endpoints/data_extensions.py b/tap_exacttarget/endpoints/data_extensions.py index 7f4be6b..dd07c2d 100644 --- a/tap_exacttarget/endpoints/data_extensions.py +++ b/tap_exacttarget/endpoints/data_extensions.py @@ -1,7 +1,6 @@ import FuelSDK import copy import singer -from singer import Transformer, metadata from funcy import set_in, update_in, merge @@ -295,9 +294,7 @@ def _replicate(self, customer_key, keys, replication_key, row.get(replication_key)) - with Transformer() as transformer: - rec = transformer.transform(row, catalog_copy.get('schema'), metadata.to_map(catalog_copy.get('metadata'))) - singer.write_record(table, rec) + self.write_records(row, catalog_copy, table) if partial: self.state = incorporate(self.state, diff --git a/tap_exacttarget/endpoints/emails.py b/tap_exacttarget/endpoints/emails.py index 218c7bb..b2b242e 100644 --- a/tap_exacttarget/endpoints/emails.py +++ b/tap_exacttarget/endpoints/emails.py @@ -1,7 +1,6 @@ import FuelSDK import copy import singer -from singer import Transformer, metadata from tap_exacttarget.client import request from tap_exacttarget.dao import DataAccessObject @@ -153,8 +152,6 @@ def sync_data(self): 'ModifiedDate', email.get('ModifiedDate')) - with Transformer() as transformer: - rec = transformer.transform(email, catalog_copy.get('schema'), metadata.to_map(catalog_copy.get('metadata'))) - singer.write_record(table, rec) + self.write_records(email, catalog_copy, table) save_state(self.state) diff --git a/tap_exacttarget/endpoints/events.py b/tap_exacttarget/endpoints/events.py index 36145cc..8744a64 100644 --- a/tap_exacttarget/endpoints/events.py +++ b/tap_exacttarget/endpoints/events.py @@ -1,7 +1,6 @@ import FuelSDK import copy import singer -from singer import Transformer, metadata from tap_exacttarget.client import request from tap_exacttarget.dao import DataAccessObject @@ -108,9 +107,7 @@ def sync_data(self): event.get('EventDate'))) continue - with Transformer() as transformer: - rec = transformer.transform(event, catalog_copy.get('schema'), metadata.to_map(catalog_copy.get('metadata'))) - singer.write_record(table, rec) + self.write_records(event, catalog_copy, table) self.state = incorporate(self.state, event_name, diff --git a/tap_exacttarget/endpoints/folders.py b/tap_exacttarget/endpoints/folders.py index dcf1c68..8eae707 100644 --- a/tap_exacttarget/endpoints/folders.py +++ b/tap_exacttarget/endpoints/folders.py @@ -1,7 +1,6 @@ import FuelSDK import copy import singer -from singer import Transformer, metadata from tap_exacttarget.client import request from tap_exacttarget.dao import DataAccessObject @@ -94,8 +93,6 @@ def sync_data(self): 'ModifiedDate', folder.get('ModifiedDate')) - with Transformer() as transformer: - rec = transformer.transform(folder, catalog_copy.get('schema'), metadata.to_map(catalog_copy.get('metadata'))) - singer.write_record(table, rec) + self.write_records(folder, catalog_copy, table) save_state(self.state) diff --git a/tap_exacttarget/endpoints/list_sends.py b/tap_exacttarget/endpoints/list_sends.py index abc1f98..c9b1d14 100644 --- a/tap_exacttarget/endpoints/list_sends.py +++ b/tap_exacttarget/endpoints/list_sends.py @@ -1,7 +1,6 @@ import FuelSDK import copy import singer -from singer import Transformer, metadata from tap_exacttarget.client import request from tap_exacttarget.dao import DataAccessObject @@ -118,6 +117,4 @@ def sync_data(self): for list_send in stream: list_send = self.filter_keys_and_parse(list_send) - with Transformer() as transformer: - rec = transformer.transform(list_send, catalog_copy.get('schema'), metadata.to_map(catalog_copy.get('metadata'))) - singer.write_record(table, rec) + self.write_records(list_send, catalog_copy, table) diff --git a/tap_exacttarget/endpoints/list_subscribers.py b/tap_exacttarget/endpoints/list_subscribers.py index b2a8e3e..97b4684 100644 --- a/tap_exacttarget/endpoints/list_subscribers.py +++ b/tap_exacttarget/endpoints/list_subscribers.py @@ -1,7 +1,6 @@ import FuelSDK import copy import singer -from singer import Transformer, metadata from tap_exacttarget.client import request from tap_exacttarget.dao import DataAccessObject @@ -137,9 +136,7 @@ def sync_data(self): 'ModifiedDate', list_subscriber.get('ModifiedDate')) - with Transformer() as transformer: - rec = transformer.transform(list_subscriber, catalog_copy.get('schema'), metadata.to_map(catalog_copy.get('metadata'))) - singer.write_record(table, rec) + self.write_records(list_subscriber, catalog_copy, table) if self.replicate_subscriber: subscriber_keys = list(map( diff --git a/tap_exacttarget/endpoints/lists.py b/tap_exacttarget/endpoints/lists.py index a05c518..02728fa 100644 --- a/tap_exacttarget/endpoints/lists.py +++ b/tap_exacttarget/endpoints/lists.py @@ -1,7 +1,6 @@ import FuelSDK import copy import singer -from singer import Transformer, metadata from tap_exacttarget.client import request from tap_exacttarget.dao import DataAccessObject @@ -83,8 +82,6 @@ def sync_data(self): 'ModifiedDate', _list.get('ModifiedDate')) - with Transformer() as transformer: - rec = transformer.transform(_list, catalog_copy.get('schema'), metadata.to_map(catalog_copy.get('metadata'))) - singer.write_record(table, rec) + self.write_records(_list, catalog_copy, table) save_state(self.state) diff --git a/tap_exacttarget/endpoints/sends.py b/tap_exacttarget/endpoints/sends.py index dd8c48f..e7e0176 100644 --- a/tap_exacttarget/endpoints/sends.py +++ b/tap_exacttarget/endpoints/sends.py @@ -1,7 +1,6 @@ import FuelSDK import copy import singer -from singer import Transformer, metadata from tap_exacttarget.client import request from tap_exacttarget.dao import DataAccessObject @@ -122,8 +121,6 @@ def sync_data(self): 'ModifiedDate', send.get('ModifiedDate')) - with Transformer() as transformer: - rec = transformer.transform(send, catalog_copy.get('schema'), metadata.to_map(catalog_copy.get('metadata'))) - singer.write_record(table, rec) + self.write_records(send, catalog_copy, table) save_state(self.state) diff --git a/tap_exacttarget/endpoints/subscribers.py b/tap_exacttarget/endpoints/subscribers.py index a97f13e..8374e4a 100644 --- a/tap_exacttarget/endpoints/subscribers.py +++ b/tap_exacttarget/endpoints/subscribers.py @@ -1,7 +1,6 @@ import FuelSDK import copy import singer -from singer import Transformer, metadata from tap_exacttarget.client import request from tap_exacttarget.dao import DataAccessObject @@ -160,6 +159,4 @@ def pull_subscribers_batch(self, subscriber_keys): for subscriber in stream: subscriber = self.filter_keys_and_parse(subscriber) - with Transformer() as transformer: - rec = transformer.transform(subscriber, catalog_copy.get('schema'), metadata.to_map(catalog_copy.get('metadata'))) - singer.write_record(table, rec) + self.write_records(subscriber, catalog_copy, table) From bf8b487e6dc00e4465be47ffff2dea91fff96542 Mon Sep 17 00:00:00 2001 From: harshpatel4_crest Date: Tue, 7 Sep 2021 18:21:01 +0530 Subject: [PATCH 13/36] pylint resolve --- tap_exacttarget/dao.py | 2 +- tap_exacttarget/endpoints/campaigns.py | 2 +- tap_exacttarget/endpoints/content_areas.py | 2 +- tap_exacttarget/endpoints/data_extensions.py | 2 +- tap_exacttarget/endpoints/emails.py | 2 +- tap_exacttarget/endpoints/events.py | 2 +- tap_exacttarget/endpoints/folders.py | 2 +- tap_exacttarget/endpoints/list_sends.py | 2 +- tap_exacttarget/endpoints/list_subscribers.py | 2 +- tap_exacttarget/endpoints/lists.py | 2 +- tap_exacttarget/endpoints/sends.py | 2 +- tap_exacttarget/endpoints/subscribers.py | 2 +- 12 files changed, 12 insertions(+), 12 deletions(-) diff --git a/tap_exacttarget/dao.py b/tap_exacttarget/dao.py index bda8326..6b57e6d 100644 --- a/tap_exacttarget/dao.py +++ b/tap_exacttarget/dao.py @@ -60,7 +60,7 @@ def parse_object(self, obj): return project(obj, self.get_catalog_keys()) # a function to write records with applying transformation - def write_records(self, record, catalog, table): + def write_records_with_transform(self, record, catalog, table): with Transformer() as transformer: rec = transformer.transform(record, catalog.get('schema'), metadata.to_map(catalog.get('metadata'))) singer.write_record(table, rec) diff --git a/tap_exacttarget/endpoints/campaigns.py b/tap_exacttarget/endpoints/campaigns.py index 0c9d57b..6080efd 100644 --- a/tap_exacttarget/endpoints/campaigns.py +++ b/tap_exacttarget/endpoints/campaigns.py @@ -50,4 +50,4 @@ def sync_data(self): for campaign in cursor: campaign = self.filter_keys_and_parse(campaign) - self.write_records(campaign, catalog_copy, self.__class__.TABLE) + self.write_records_with_transform(campaign, catalog_copy, self.__class__.TABLE) diff --git a/tap_exacttarget/endpoints/content_areas.py b/tap_exacttarget/endpoints/content_areas.py index 03444ca..d7973cb 100644 --- a/tap_exacttarget/endpoints/content_areas.py +++ b/tap_exacttarget/endpoints/content_areas.py @@ -137,6 +137,6 @@ def sync_data(self): 'ModifiedDate', content_area.get('ModifiedDate')) - self.write_records(content_area, catalog_copy, table) + self.write_records_with_transform(content_area, catalog_copy, table) save_state(self.state) diff --git a/tap_exacttarget/endpoints/data_extensions.py b/tap_exacttarget/endpoints/data_extensions.py index dd07c2d..dc9b851 100644 --- a/tap_exacttarget/endpoints/data_extensions.py +++ b/tap_exacttarget/endpoints/data_extensions.py @@ -294,7 +294,7 @@ def _replicate(self, customer_key, keys, replication_key, row.get(replication_key)) - self.write_records(row, catalog_copy, table) + self.write_records_with_transform(row, catalog_copy, table) if partial: self.state = incorporate(self.state, diff --git a/tap_exacttarget/endpoints/emails.py b/tap_exacttarget/endpoints/emails.py index b2b242e..2824079 100644 --- a/tap_exacttarget/endpoints/emails.py +++ b/tap_exacttarget/endpoints/emails.py @@ -152,6 +152,6 @@ def sync_data(self): 'ModifiedDate', email.get('ModifiedDate')) - self.write_records(email, catalog_copy, table) + self.write_records_with_transform(email, catalog_copy, table) save_state(self.state) diff --git a/tap_exacttarget/endpoints/events.py b/tap_exacttarget/endpoints/events.py index 8744a64..512b6b3 100644 --- a/tap_exacttarget/endpoints/events.py +++ b/tap_exacttarget/endpoints/events.py @@ -107,7 +107,7 @@ def sync_data(self): event.get('EventDate'))) continue - self.write_records(event, catalog_copy, table) + self.write_records_with_transform(event, catalog_copy, table) self.state = incorporate(self.state, event_name, diff --git a/tap_exacttarget/endpoints/folders.py b/tap_exacttarget/endpoints/folders.py index 8eae707..19acf80 100644 --- a/tap_exacttarget/endpoints/folders.py +++ b/tap_exacttarget/endpoints/folders.py @@ -93,6 +93,6 @@ def sync_data(self): 'ModifiedDate', folder.get('ModifiedDate')) - self.write_records(folder, catalog_copy, table) + self.write_records_with_transform(folder, catalog_copy, table) save_state(self.state) diff --git a/tap_exacttarget/endpoints/list_sends.py b/tap_exacttarget/endpoints/list_sends.py index c9b1d14..cf70f16 100644 --- a/tap_exacttarget/endpoints/list_sends.py +++ b/tap_exacttarget/endpoints/list_sends.py @@ -117,4 +117,4 @@ def sync_data(self): for list_send in stream: list_send = self.filter_keys_and_parse(list_send) - self.write_records(list_send, catalog_copy, table) + self.write_records_with_transform(list_send, catalog_copy, table) diff --git a/tap_exacttarget/endpoints/list_subscribers.py b/tap_exacttarget/endpoints/list_subscribers.py index 97b4684..8815c63 100644 --- a/tap_exacttarget/endpoints/list_subscribers.py +++ b/tap_exacttarget/endpoints/list_subscribers.py @@ -136,7 +136,7 @@ def sync_data(self): 'ModifiedDate', list_subscriber.get('ModifiedDate')) - self.write_records(list_subscriber, catalog_copy, table) + self.write_records_with_transform(list_subscriber, catalog_copy, table) if self.replicate_subscriber: subscriber_keys = list(map( diff --git a/tap_exacttarget/endpoints/lists.py b/tap_exacttarget/endpoints/lists.py index 02728fa..abc1638 100644 --- a/tap_exacttarget/endpoints/lists.py +++ b/tap_exacttarget/endpoints/lists.py @@ -82,6 +82,6 @@ def sync_data(self): 'ModifiedDate', _list.get('ModifiedDate')) - self.write_records(_list, catalog_copy, table) + self.write_records_with_transform(_list, catalog_copy, table) save_state(self.state) diff --git a/tap_exacttarget/endpoints/sends.py b/tap_exacttarget/endpoints/sends.py index e7e0176..e9424f5 100644 --- a/tap_exacttarget/endpoints/sends.py +++ b/tap_exacttarget/endpoints/sends.py @@ -121,6 +121,6 @@ def sync_data(self): 'ModifiedDate', send.get('ModifiedDate')) - self.write_records(send, catalog_copy, table) + self.write_records_with_transform(send, catalog_copy, table) save_state(self.state) diff --git a/tap_exacttarget/endpoints/subscribers.py b/tap_exacttarget/endpoints/subscribers.py index 8374e4a..9b7051c 100644 --- a/tap_exacttarget/endpoints/subscribers.py +++ b/tap_exacttarget/endpoints/subscribers.py @@ -159,4 +159,4 @@ def pull_subscribers_batch(self, subscriber_keys): for subscriber in stream: subscriber = self.filter_keys_and_parse(subscriber) - self.write_records(subscriber, catalog_copy, table) + self.write_records_with_transform(subscriber, catalog_copy, table) From 2b12bafd15efad8a167c755c9d405922f9e926b2 Mon Sep 17 00:00:00 2001 From: harshpatel4_crest Date: Tue, 7 Sep 2021 18:24:29 +0530 Subject: [PATCH 14/36] disabled pylint error --- tap_exacttarget/dao.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tap_exacttarget/dao.py b/tap_exacttarget/dao.py index 6b57e6d..354190b 100644 --- a/tap_exacttarget/dao.py +++ b/tap_exacttarget/dao.py @@ -60,7 +60,7 @@ def parse_object(self, obj): return project(obj, self.get_catalog_keys()) # a function to write records with applying transformation - def write_records_with_transform(self, record, catalog, table): + def write_records_with_transform(self, record, catalog, table): # pylint: disable=no-self-use with Transformer() as transformer: rec = transformer.transform(record, catalog.get('schema'), metadata.to_map(catalog.get('metadata'))) singer.write_record(table, rec) From e5a98cf9570225025d567db1c75020d96acdec25 Mon Sep 17 00:00:00 2001 From: harshpatel4_crest Date: Wed, 8 Sep 2021 17:58:02 +0530 Subject: [PATCH 15/36] test: removed disable pylint code --- tap_exacttarget/dao.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tap_exacttarget/dao.py b/tap_exacttarget/dao.py index 354190b..b547b64 100644 --- a/tap_exacttarget/dao.py +++ b/tap_exacttarget/dao.py @@ -60,7 +60,8 @@ def parse_object(self, obj): return project(obj, self.get_catalog_keys()) # a function to write records with applying transformation - def write_records_with_transform(self, record, catalog, table): # pylint: disable=no-self-use + @staticmethod + def write_records_with_transform(record, catalog, table): with Transformer() as transformer: rec = transformer.transform(record, catalog.get('schema'), metadata.to_map(catalog.get('metadata'))) singer.write_record(table, rec) From fa07e1c08e9ef88e2abaa6bb836c3d11d03059ab Mon Sep 17 00:00:00 2001 From: harshpatel4_crest Date: Wed, 8 Sep 2021 18:10:50 +0530 Subject: [PATCH 16/36] added comment in base file --- tests/base.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/tests/base.py b/tests/base.py index 19edcc0..cd38d21 100644 --- a/tests/base.py +++ b/tests/base.py @@ -129,6 +129,11 @@ def expected_metadata(self): } def streams_to_select(self): + # events: there are 5 events and the API call window is of 10 minutes + # so there will be a lot of API calls for every test + # list_subscriber: the API window is of 1 day, as there are 5-6 test cases + # it will consume lots of time + # subscriber: it is the child stream of 'list_subscriber' return set(self.expected_metadata().keys()) - {'event', 'list_subscriber', 'subscriber'} def expected_replication_keys(self): From c2041361953df03e95fdbee062f72dc373f89514 Mon Sep 17 00:00:00 2001 From: harshpatel4_crest Date: Fri, 10 Sep 2021 15:48:42 +0530 Subject: [PATCH 17/36] updated comment --- tests/base.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/base.py b/tests/base.py index cd38d21..96880e3 100644 --- a/tests/base.py +++ b/tests/base.py @@ -131,8 +131,8 @@ def expected_metadata(self): def streams_to_select(self): # events: there are 5 events and the API call window is of 10 minutes # so there will be a lot of API calls for every test - # list_subscriber: the API window is of 1 day, as there are 5-6 test cases - # it will consume lots of time + # list_subscriber: tests took 30 minutes to run 3 tests, the test run time + # will be increased when all the tests are combined # subscriber: it is the child stream of 'list_subscriber' return set(self.expected_metadata().keys()) - {'event', 'list_subscriber', 'subscriber'} From 04ce60571a2a7f841cfd19099722508b664a4d55 Mon Sep 17 00:00:00 2001 From: harshpatel4_crest Date: Mon, 13 Sep 2021 18:38:08 +0530 Subject: [PATCH 18/36] updated the comment for skipping streams --- tests/base.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/tests/base.py b/tests/base.py index 96880e3..892fbf4 100644 --- a/tests/base.py +++ b/tests/base.py @@ -131,8 +131,9 @@ def expected_metadata(self): def streams_to_select(self): # events: there are 5 events and the API call window is of 10 minutes # so there will be a lot of API calls for every test - # list_subscriber: tests took 30 minutes to run 3 tests, the test run time - # will be increased when all the tests are combined + # list_subscriber: as the API window is of 1 day, the tests took + # 30 minutes to run 3 tests, the test run time will be increased + # when all the tests are combined # subscriber: it is the child stream of 'list_subscriber' return set(self.expected_metadata().keys()) - {'event', 'list_subscriber', 'subscriber'} From d344f213b21d6e8636dddd0ff838d77d1f799259 Mon Sep 17 00:00:00 2001 From: harshpatel4_crest Date: Fri, 17 Sep 2021 17:55:54 +0530 Subject: [PATCH 19/36] updated discovery test and removed full replication test --- tests/test_exacttarget_discover.py | 12 +++ tests/test_exacttarget_full_replication.py | 92 ---------------------- 2 files changed, 12 insertions(+), 92 deletions(-) delete mode 100644 tests/test_exacttarget_full_replication.py diff --git a/tests/test_exacttarget_discover.py b/tests/test_exacttarget_discover.py index 9c247ff..fc00f47 100644 --- a/tests/test_exacttarget_discover.py +++ b/tests/test_exacttarget_discover.py @@ -3,6 +3,7 @@ import tap_tester.menagerie as menagerie import tap_tester.runner as runner import os +import re class ExactTargetDiscover(ExactTargetBase): @@ -37,6 +38,17 @@ def test_run(self): streams_to_test = self.streams_to_select() found_catalogs = menagerie.get_catalogs(conn_id) + # verify the stream names discovered were what we expect + # streams should only have lowercase alphas and underscores + + # skipped 'data_extension' streams, because they are the custom + # tables we create in marketing cloud UI and, the stream name + # will be the table name we set in the UI, as seen in our + # instance the table name is 'This is a test' + found_catalog_names = {c['tap_stream_id'] for c in found_catalogs if 'data_extension.' not in c['tap_stream_id']} + self.assertTrue(all([re.fullmatch(r"[a-z_]+", name) for name in found_catalog_names]), + msg="One or more streams don't follow standard naming") + for stream in streams_to_test: with self.subTest(stream=stream): diff --git a/tests/test_exacttarget_full_replication.py b/tests/test_exacttarget_full_replication.py deleted file mode 100644 index c175a7f..0000000 --- a/tests/test_exacttarget_full_replication.py +++ /dev/null @@ -1,92 +0,0 @@ -from base import ExactTargetBase -import tap_tester.connections as connections -import tap_tester.menagerie as menagerie -import tap_tester.runner as runner -import json -import datetime - -class FullReplicationTest(ExactTargetBase): - """Test tap gets all records for streams with full replication""" - - def name(self): - return "tap_tester_exacttarget_full_replication" - - def test_run(self): - conn_id_1 = connections.ensure_connection(self) - runner.run_check_mode(self, conn_id_1) - - # Select streams - found_catalogs = menagerie.get_catalogs(conn_id_1) - full_streams = {key for key, value in self.expected_replication_method().items() - if value == "FULL_TABLE"} - our_catalogs = [catalog for catalog in found_catalogs if - catalog.get('stream_name') in full_streams] - self.select_found_catalogs(conn_id_1, our_catalogs, full_streams) - - # Run a sync job - first_sync_record_count = self.run_and_verify_sync(conn_id_1) - - # verify that the sync only sent records to the target for selected streams (catalogs) - self.assertEqual(set(first_sync_record_count.keys()), full_streams) - - first_sync_state = menagerie.get_state(conn_id_1) - - # Get the set of records from a first sync - first_sync_records = runner.get_records_from_target_output() - - # set future start date, which validates that stream is syncing 'FULL_TABLE' - self.START_DATE = datetime.datetime.strftime(datetime.datetime.today() + datetime.timedelta(days=1), "%Y-%m-%dT00:00:00Z") - - conn_id_2 = connections.ensure_connection(self, original_properties=False) - runner.run_check_mode(self, conn_id_2) - - found_catalogs = menagerie.get_catalogs(conn_id_2) - our_catalogs = [catalog for catalog in found_catalogs if - catalog.get('stream_name') in full_streams] - self.select_found_catalogs(conn_id_2, our_catalogs, full_streams) - - # Run a second sync job - second_sync_record_count = self.run_and_verify_sync(conn_id_2) - - # Get the set of records from a second sync - second_sync_records = runner.get_records_from_target_output() - - for stream in full_streams: - with self.subTest(stream=stream): - - # verify there is no bookmark values from state - state_value = first_sync_state.get("bookmarks", {}).get(stream) - self.assertIsNone(state_value) - - # verify that there is more than 1 record of data - setup necessary - self.assertGreater(first_sync_record_count.get(stream, 0), 1, - msg="Data is not set up to be able to test full sync") - - # verify that you get the same or more data the 2nd time around - self.assertGreaterEqual( - second_sync_record_count.get(stream, 0), - first_sync_record_count.get(stream, 0), - msg="second syc did not have more records, full sync not verified") - - # [set(message['data']) for message in messages['messages'] - # if message['action'] == 'upsert'][0] - # verify all data from 1st sync included in 2nd sync - first_data = [record["data"] for record - in first_sync_records.get(stream, {}).get("messages", {"data": {}})] - second_data = [record["data"] for record - in second_sync_records.get(stream, {}).get("messages", {"data": {}})] - - same_records = 0 - for first_record in first_data: - first_value = json.dumps(first_record, sort_keys=True) - - for compare_record in second_data: - compare_value = json.dumps(compare_record, sort_keys=True) - - if first_value == compare_value: - second_data.remove(compare_record) - same_records += 1 - break - - self.assertEqual(len(first_data), same_records, - msg="Not all data from the first sync was in the second sync") From 4630c7172cea985ef9fba02bba453657901b637d Mon Sep 17 00:00:00 2001 From: harshpatel4_crest Date: Fri, 17 Sep 2021 19:03:53 +0530 Subject: [PATCH 20/36] added verification of unique records --- tests/test_exacttarget_field_selection.py | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/tests/test_exacttarget_field_selection.py b/tests/test_exacttarget_field_selection.py index 7f45179..78cf99b 100644 --- a/tests/test_exacttarget_field_selection.py +++ b/tests/test_exacttarget_field_selection.py @@ -115,6 +115,8 @@ def run_test(self, only_automatic_fields=False): for stream in expected_streams: with self.subTest(stream=stream): + expected_primary_keys = self.expected_primary_keys()[stream] + # get expected keys expected_keys = expected_stream_fields[stream] @@ -131,3 +133,13 @@ def run_test(self, only_automatic_fields=False): # verify expected and actual fields self.assertEqual(expected_keys, actual_keys, msg='Selected keys in catalog is not as expected') + + # Verify we did not duplicate any records across pages + records_pks_set = {tuple([message.get('data').get(primary_key) + for primary_key in expected_primary_keys]) + for message in messages.get('messages')} + records_pks_list = [tuple([message.get('data').get(primary_key) + for primary_key in expected_primary_keys]) + for message in messages.get('messages')] + self.assertCountEqual(records_pks_set, records_pks_list, + msg="We have duplicate records for {}".format(stream)) From 087b15661db75bede77d8630a832ae2c0894a9a0 Mon Sep 17 00:00:00 2001 From: harshpatel4_crest Date: Mon, 20 Sep 2021 14:39:23 +0530 Subject: [PATCH 21/36] updated start date --- tests/base.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/base.py b/tests/base.py index 892fbf4..1396dcc 100644 --- a/tests/base.py +++ b/tests/base.py @@ -48,7 +48,7 @@ def get_credentials(self): def get_properties(self, original: bool = True): return_value = { - 'start_date': '2014-01-01T00:00:00Z', + 'start_date': '2019-01-01T00:00:00Z', 'client_id': os.getenv('TAP_EXACTTARGET_CLIENT_ID'), 'tenant_subdomain': os.getenv('TAP_EXACTTARGET_TENANT_SUBDOMAIN') } From 612d1a840d1be06344bb0dcb13392fbd5e6b6b7b Mon Sep 17 00:00:00 2001 From: harshpatel4_crest Date: Wed, 22 Sep 2021 12:41:29 +0530 Subject: [PATCH 22/36] updated the code --- tap_exacttarget/dao.py | 10 +++--- tap_exacttarget/endpoints/data_extensions.py | 10 +++--- tests/base.py | 37 ++++++++++---------- tests/test_exacttarget_discover.py | 12 +++---- 4 files changed, 32 insertions(+), 37 deletions(-) diff --git a/tap_exacttarget/dao.py b/tap_exacttarget/dao.py index be2d557..d5cb8b5 100644 --- a/tap_exacttarget/dao.py +++ b/tap_exacttarget/dao.py @@ -28,12 +28,10 @@ def generate_catalog(self): cls = self.__class__ mdata = metadata.new() - mdata = metadata.get_standard_metadata( - schema=self.SCHEMA, - key_properties=self.KEY_PROPERTIES, - valid_replication_keys=self.REPLICATION_KEYS if self.REPLICATION_KEYS else None, - replication_method=self.REPLICATION_METHOD - ) + mdata = metadata.get_standard_metadata(schema=self.SCHEMA, + key_properties=self.KEY_PROPERTIES, + valid_replication_keys=self.REPLICATION_KEYS if self.REPLICATION_KEYS else None, + replication_method=self.REPLICATION_METHOD) mdata_map = metadata.to_map(mdata) for replication_key in self.REPLICATION_KEYS: diff --git a/tap_exacttarget/endpoints/data_extensions.py b/tap_exacttarget/endpoints/data_extensions.py index 8a2ee78..d64e641 100644 --- a/tap_exacttarget/endpoints/data_extensions.py +++ b/tap_exacttarget/endpoints/data_extensions.py @@ -85,18 +85,18 @@ def _get_extensions(self): 'metadata': { 'inclusion':'available', 'forced-replication-method': 'FULL_TABLE', - 'table-key-properties': [ - '_CustomObjectKey' - ], + 'table-key-properties': ['_CustomObjectKey'], 'valid-replication-keys': [] } }, { 'breadcrumb': ('properties', '_CustomObjectKey'), - 'metadata': {'inclusion':'automatic'}}, + 'metadata': {'inclusion':'automatic'} + }, { 'breadcrumb': ('properties', 'CategoryID'), - 'metadata': {'inclusion':'available'}} + 'metadata': {'inclusion':'available'} + } ] } diff --git a/tests/base.py b/tests/base.py index cd38d21..783264b 100644 --- a/tests/base.py +++ b/tests/base.py @@ -48,7 +48,7 @@ def get_credentials(self): def get_properties(self, original: bool = True): return_value = { - 'start_date': '2014-01-01T00:00:00Z', + 'start_date': '2019-01-01T00:00:00Z', 'client_id': os.getenv('TAP_EXACTTARGET_CLIENT_ID'), 'tenant_subdomain': os.getenv('TAP_EXACTTARGET_TENANT_SUBDOMAIN') } @@ -131,45 +131,44 @@ def expected_metadata(self): def streams_to_select(self): # events: there are 5 events and the API call window is of 10 minutes # so there will be a lot of API calls for every test - # list_subscriber: the API window is of 1 day, as there are 5-6 test cases - # it will consume lots of time + # list_subscriber: as the API window is of 1 day, the tests took + # 30 minutes to run 3 tests, the test run time will be increased + # when all the tests are combined # subscriber: it is the child stream of 'list_subscriber' return set(self.expected_metadata().keys()) - {'event', 'list_subscriber', 'subscriber'} def expected_replication_keys(self): return {table: properties.get(self.REPLICATION_KEYS, set()) - for table, properties - in self.expected_metadata().items()} + for table, properties in self.expected_metadata().items()} def expected_primary_keys(self): return {table: properties.get(self.PRIMARY_KEYS, set()) - for table, properties - in self.expected_metadata().items()} + for table, properties in self.expected_metadata().items()} def expected_replication_method(self): return {table: properties.get(self.REPLICATION_METHOD, set()) - for table, properties - in self.expected_metadata().items()} + for table, properties in self.expected_metadata().items()} def select_found_catalogs(self, conn_id, catalogs, only_streams=None, deselect_all_fields: bool = False, non_selected_props=[]): """Select all streams and all fields within streams""" for catalog in catalogs: if only_streams and catalog["stream_name"] not in only_streams: continue + schema = menagerie.get_annotated_schema(conn_id, catalog['stream_id']) non_selected_properties = non_selected_props if not deselect_all_fields else [] if deselect_all_fields: # get a list of all properties so that none are selected - non_selected_properties = schema.get('annotated-schema', {}).get( - 'properties', {}) + non_selected_properties = schema.get('annotated-schema', {}).get('properties', {}) non_selected_properties = non_selected_properties.keys() - additional_md = [] - connections.select_catalog_and_fields_via_metadata( - conn_id, catalog, schema, additional_md=additional_md, - non_selected_fields=non_selected_properties - ) + additional_md = [] + connections.select_catalog_and_fields_via_metadata(conn_id, + catalog, + schema, + additional_md=additional_md, + non_selected_fields=non_selected_properties) def run_and_verify_sync(self, conn_id): sync_job_name = runner.run_sync_mode(self, conn_id) @@ -178,8 +177,10 @@ def run_and_verify_sync(self, conn_id): exit_status = menagerie.get_exit_status(conn_id, sync_job_name) menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - sync_record_count = runner.examine_target_output_file( - self, conn_id, self.streams_to_select(), self.expected_primary_keys()) + sync_record_count = runner.examine_target_output_file(self, + conn_id, + self.streams_to_select(), + self.expected_primary_keys()) self.assertGreater( sum(sync_record_count.values()), 0, diff --git a/tests/test_exacttarget_discover.py b/tests/test_exacttarget_discover.py index fc00f47..de9c113 100644 --- a/tests/test_exacttarget_discover.py +++ b/tests/test_exacttarget_discover.py @@ -25,7 +25,7 @@ def test_run(self): Testing that discovery creates the appropriate catalog with valid metadata. • Verify number of actual streams discovered match expected • Verify the stream names discovered were what we expect - streams should only have lowercase alphas and underscores + streams should only have lowercase alphas and underscores • verify there is only 1 top level breadcrumb • verify primary key(s) • verify replication key(s) @@ -91,14 +91,10 @@ def test_run(self): "\nstream_properties | {}".format(stream_properties)) # verify primary key(s) match expectations - self.assertSetEqual( - expected_primary_keys, actual_primary_keys, - ) + self.assertSetEqual(expected_primary_keys, actual_primary_keys) # verify replication key(s) match expectations - self.assertSetEqual( - expected_replication_keys, actual_replication_keys, - ) + self.assertSetEqual(expected_replication_keys, actual_replication_keys) # verify that primary keys # are given the inclusion of automatic in metadata. @@ -136,4 +132,4 @@ def get_properties(self, *args, **kwargs): props = super().get_properties(*args, **kwargs) props['client_id'] = os.getenv('TAP_EXACTTARGET_V2_CLIENT_ID') props['tenant_subdomain'] = os.getenv('TAP_EXACTTARGET_V2_TENANT_SUBDOMAIN') - return props \ No newline at end of file + return props From e7d7e09e4a022b300339d91e2c3f64f23575fb32 Mon Sep 17 00:00:00 2001 From: harshpatel4_crest Date: Wed, 22 Sep 2021 12:46:43 +0530 Subject: [PATCH 23/36] updated the code --- tap_exacttarget/endpoints/campaigns.py | 2 +- tests/test_exacttarget_field_selection.py | 20 ++++++++------------ 2 files changed, 9 insertions(+), 13 deletions(-) diff --git a/tap_exacttarget/endpoints/campaigns.py b/tap_exacttarget/endpoints/campaigns.py index 6080efd..f55dbe1 100644 --- a/tap_exacttarget/endpoints/campaigns.py +++ b/tap_exacttarget/endpoints/campaigns.py @@ -50,4 +50,4 @@ def sync_data(self): for campaign in cursor: campaign = self.filter_keys_and_parse(campaign) - self.write_records_with_transform(campaign, catalog_copy, self.__class__.TABLE) + self.write_records_with_transform(campaign, catalog_copy, self.TABLE) diff --git a/tests/test_exacttarget_field_selection.py b/tests/test_exacttarget_field_selection.py index 78cf99b..dafb519 100644 --- a/tests/test_exacttarget_field_selection.py +++ b/tests/test_exacttarget_field_selection.py @@ -94,15 +94,13 @@ def run_test(self, only_automatic_fields=False): if not stream_name in expected_streams: continue # select catalog fields - self.select_found_catalogs( - conn_id, - [catalog], - only_streams=[stream_name], - deselect_all_fields=True if only_automatic_fields else False, - non_selected_props=[] if only_automatic_fields else self.non_selected_fields[stream_name]) + self.select_found_catalogs(conn_id, + [catalog], + only_streams=[stream_name], + deselect_all_fields=True if only_automatic_fields else False, + non_selected_props=[] if only_automatic_fields else self.non_selected_fields[stream_name]) # add expected fields for assertion - fields_from_field_level_md = [md_entry['breadcrumb'][1] - for md_entry in catalog_entry['metadata'] + fields_from_field_level_md = [md_entry['breadcrumb'][1] for md_entry in catalog_entry['metadata'] if md_entry['breadcrumb'] != []] if only_automatic_fields: expected_stream_fields[stream_name] = self.expected_primary_keys()[stream_name] | self.expected_replication_keys()[stream_name] @@ -135,11 +133,9 @@ def run_test(self, only_automatic_fields=False): msg='Selected keys in catalog is not as expected') # Verify we did not duplicate any records across pages - records_pks_set = {tuple([message.get('data').get(primary_key) - for primary_key in expected_primary_keys]) + records_pks_set = {tuple([message.get('data').get(primary_key) for primary_key in expected_primary_keys]) for message in messages.get('messages')} - records_pks_list = [tuple([message.get('data').get(primary_key) - for primary_key in expected_primary_keys]) + records_pks_list = [tuple([message.get('data').get(primary_key) for primary_key in expected_primary_keys]) for message in messages.get('messages')] self.assertCountEqual(records_pks_set, records_pks_list, msg="We have duplicate records for {}".format(stream)) From 8ab835365d65b52c2701ca5c77213bfde9b05960 Mon Sep 17 00:00:00 2001 From: harshpatel4_crest Date: Thu, 23 Sep 2021 17:43:03 +0530 Subject: [PATCH 24/36] added a comment explaining subscriber and list subscriber syncing --- tap_exacttarget/__init__.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/tap_exacttarget/__init__.py b/tap_exacttarget/__init__.py index 66c61a0..6ee548e 100644 --- a/tap_exacttarget/__init__.py +++ b/tap_exacttarget/__init__.py @@ -102,6 +102,13 @@ def do_sync(args): .format(stream_catalog.get('stream'))) continue + # The 'subscribers' stream is the child stream of 'list_subscribers' + # When we sync 'list_subscribers', it makes the list of subscriber's + # 'SubscriberKey' that were returned as part of 'list_subscribers' records + # and pass that list to 'subscribers' stream and thus 'subscribers' stream + # will only sync records of subscribers that are present in the list. + # Hence, for different start dates the 'SubscriberKey' list will differ and + # thus 'subscribers' records will also be different for different start dates. if SubscriberDataAccessObject.matches_catalog(stream_catalog): subscriber_selected = True subscriber_catalog = stream_catalog From 684f7526e5aa40d06c966f5d2bc4c4502866ed3e Mon Sep 17 00:00:00 2001 From: harshpatel4_crest Date: Thu, 7 Oct 2021 11:33:14 +0530 Subject: [PATCH 25/36] added comments --- tap_exacttarget/dao.py | 4 ++++ tap_exacttarget/endpoints/data_extensions.py | 4 ++++ 2 files changed, 8 insertions(+) diff --git a/tap_exacttarget/dao.py b/tap_exacttarget/dao.py index d5cb8b5..03bb21f 100644 --- a/tap_exacttarget/dao.py +++ b/tap_exacttarget/dao.py @@ -28,12 +28,16 @@ def generate_catalog(self): cls = self.__class__ mdata = metadata.new() + + # use 'get_standard_metadata' with primary key, replication key and replication method mdata = metadata.get_standard_metadata(schema=self.SCHEMA, key_properties=self.KEY_PROPERTIES, valid_replication_keys=self.REPLICATION_KEYS if self.REPLICATION_KEYS else None, replication_method=self.REPLICATION_METHOD) mdata_map = metadata.to_map(mdata) + + # make 'automatic' inclusion for replication keys for replication_key in self.REPLICATION_KEYS: mdata_map[('properties', replication_key)]['inclusion'] = 'automatic' diff --git a/tap_exacttarget/endpoints/data_extensions.py b/tap_exacttarget/endpoints/data_extensions.py index d64e641..724703e 100644 --- a/tap_exacttarget/endpoints/data_extensions.py +++ b/tap_exacttarget/endpoints/data_extensions.py @@ -148,11 +148,14 @@ def _get_fields(self, extensions): # pylint: disable=too-many-branches [extension_id, 'schema', 'properties', field_name], field_schema) + # add primary key in 'table-key-properties' if is_primary_key: for mdata in to_return[extension_id]['metadata']: if not mdata.get('breadcrumb'): mdata.get('metadata').get('table-key-properties').append(field_name) + # add replication key in 'valid-replication-keys' + # and change 'forced-replication-method' to INCREMENTAL if is_replication_key: for mdata in to_return[extension_id]['metadata']: if not mdata.get('breadcrumb'): @@ -161,6 +164,7 @@ def _get_fields(self, extensions): # pylint: disable=too-many-branches # These fields are defaulted into the schema, do not add to metadata again. if field_name not in {'_CustomObjectKey', 'CategoryID'}: + # if primary of replication key, then mark it as automatic if is_primary_key or is_replication_key: to_return[extension_id]['metadata'].append({ 'breadcrumb': ('properties', field_name), From 4abadefea2fdbccad77f839238a5110461991b5c Mon Sep 17 00:00:00 2001 From: harshpatel4_crest Date: Thu, 7 Oct 2021 11:37:47 +0530 Subject: [PATCH 26/36] updated comment --- tap_exacttarget/dao.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tap_exacttarget/dao.py b/tap_exacttarget/dao.py index a71c283..c7020c8 100644 --- a/tap_exacttarget/dao.py +++ b/tap_exacttarget/dao.py @@ -61,7 +61,7 @@ def get_catalog_keys(self): def parse_object(self, obj): return project(obj, self.get_catalog_keys()) - # a function to write records with applying transformation + # a function to write records by applying transformation @staticmethod def write_records_with_transform(record, catalog, table): with Transformer() as transformer: From 02325350b81baf53f7c69a85af6423d54d0243fe Mon Sep 17 00:00:00 2001 From: harshpatel4_crest Date: Fri, 8 Oct 2021 14:34:18 +0530 Subject: [PATCH 27/36] made separate files for schemas --- tap_exacttarget/dao.py | 32 ++++++- tap_exacttarget/endpoints/campaigns.py | 25 ----- tap_exacttarget/endpoints/content_areas.py | 90 ------------------ tap_exacttarget/endpoints/emails.py | 94 ------------------ tap_exacttarget/endpoints/events.py | 29 ------ tap_exacttarget/endpoints/folders.py | 38 -------- tap_exacttarget/endpoints/list_sends.py | 81 ---------------- tap_exacttarget/endpoints/list_subscribers.py | 21 ---- tap_exacttarget/endpoints/lists.py | 35 ------- tap_exacttarget/endpoints/sends.py | 68 ------------- tap_exacttarget/endpoints/subscribers.py | 92 ------------------ tap_exacttarget/schemas.py | 64 ------------- tap_exacttarget/schemas/campaigns.json | 26 +++++ tap_exacttarget/schemas/content_areas.json | 87 +++++++++++++++++ tap_exacttarget/schemas/definations.json | 41 ++++++++ tap_exacttarget/schemas/emails.json | 95 +++++++++++++++++++ tap_exacttarget/schemas/events.json | 33 +++++++ tap_exacttarget/schemas/folders.json | 46 +++++++++ tap_exacttarget/schemas/list_sends.json | 83 ++++++++++++++++ tap_exacttarget/schemas/list_subscribers.json | 31 ++++++ tap_exacttarget/schemas/lists.json | 43 +++++++++ tap_exacttarget/schemas/sends.json | 59 ++++++++++++ tap_exacttarget/schemas/subscribers.json | 92 ++++++++++++++++++ tests/unittests/test_schema.py | 54 +++++++++++ 24 files changed, 718 insertions(+), 641 deletions(-) delete mode 100644 tap_exacttarget/schemas.py create mode 100644 tap_exacttarget/schemas/campaigns.json create mode 100644 tap_exacttarget/schemas/content_areas.json create mode 100644 tap_exacttarget/schemas/definations.json create mode 100644 tap_exacttarget/schemas/emails.json create mode 100644 tap_exacttarget/schemas/events.json create mode 100644 tap_exacttarget/schemas/folders.json create mode 100644 tap_exacttarget/schemas/list_sends.json create mode 100644 tap_exacttarget/schemas/list_subscribers.json create mode 100644 tap_exacttarget/schemas/lists.json create mode 100644 tap_exacttarget/schemas/sends.json create mode 100644 tap_exacttarget/schemas/subscribers.json create mode 100644 tests/unittests/test_schema.py diff --git a/tap_exacttarget/dao.py b/tap_exacttarget/dao.py index c7020c8..427a94f 100644 --- a/tap_exacttarget/dao.py +++ b/tap_exacttarget/dao.py @@ -1,5 +1,6 @@ import singer -from singer import metadata, Transformer +import os +from singer import metadata, Transformer, utils from funcy import project @@ -11,6 +12,26 @@ def _get_catalog_schema(catalog): return catalog.get('schema', {}).get('properties') +def get_abs_path(path): + return os.path.join(os.path.dirname(os.path.realpath(__file__)), path) + +# function to load the fields in the 'definations' which contains the reference fields +def load_schema_references(): + shared_schema_path = get_abs_path('schemas/definations.json') + + refs = {} + # load json from the path + refs["definations.json"] = utils.load_json(shared_schema_path) + + return refs + +# function to load schema from json file +def load_schema(stream): + path = get_abs_path('schemas/{}s.json'.format(stream)) + # load json from the path + schema = utils.load_json(path) + + return schema class DataAccessObject(): @@ -27,10 +48,14 @@ def matches_catalog(cls, catalog): def generate_catalog(self): cls = self.__class__ + # get the reference schemas + refs = load_schema_references() + # resolve the schema reference and make final schema + schema = singer.resolve_schema_references(load_schema(cls.TABLE), refs) mdata = metadata.new() # use 'get_standard_metadata' with primary key, replication key and replication method - mdata = metadata.get_standard_metadata(schema=self.SCHEMA, + mdata = metadata.get_standard_metadata(schema=schema, key_properties=self.KEY_PROPERTIES, valid_replication_keys=self.REPLICATION_KEYS if self.REPLICATION_KEYS else None, replication_method=self.REPLICATION_METHOD) @@ -45,7 +70,7 @@ def generate_catalog(self): 'tap_stream_id': cls.TABLE, 'stream': cls.TABLE, 'key_properties': cls.KEY_PROPERTIES, - 'schema': cls.SCHEMA, + 'schema': schema, 'metadata': metadata.to_list(mdata_map) }] @@ -91,7 +116,6 @@ def sync(self): # OVERRIDE THESE TO IMPLEMENT A NEW DAO: - SCHEMA = None TABLE = None KEY_PROPERTIES = None REPLICATION_KEYS = [] diff --git a/tap_exacttarget/endpoints/campaigns.py b/tap_exacttarget/endpoints/campaigns.py index f55dbe1..92790e9 100644 --- a/tap_exacttarget/endpoints/campaigns.py +++ b/tap_exacttarget/endpoints/campaigns.py @@ -4,37 +4,12 @@ from tap_exacttarget.client import request from tap_exacttarget.dao import DataAccessObject -from tap_exacttarget.schemas import with_properties LOGGER = singer.get_logger() class CampaignDataAccessObject(DataAccessObject): - SCHEMA = with_properties({ - 'id': { - 'type': ['null', 'string'], - }, - 'createdDate': { - 'type': ['null', 'string'], - }, - 'modifiedDate': { - 'type': ['null', 'string'], - }, - 'name': { - 'type': ['null', 'string'], - }, - 'description': { - 'type': ['null', 'string'], - }, - 'campaignCode': { - 'type': ['null', 'string'], - }, - 'color': { - 'type': ['null', 'string'], - } - }) - TABLE = 'campaign' KEY_PROPERTIES = ['id'] REPLICATION_METHOD = 'FULL_TABLE' diff --git a/tap_exacttarget/endpoints/content_areas.py b/tap_exacttarget/endpoints/content_areas.py index d7973cb..97635bf 100644 --- a/tap_exacttarget/endpoints/content_areas.py +++ b/tap_exacttarget/endpoints/content_areas.py @@ -4,9 +4,6 @@ from tap_exacttarget.client import request from tap_exacttarget.dao import DataAccessObject -from tap_exacttarget.schemas import ID_FIELD, CUSTOM_PROPERTY_LIST, \ - CREATED_DATE_FIELD, MODIFIED_DATE_FIELD, CUSTOMER_KEY_FIELD, \ - OBJECT_ID_FIELD, with_properties from tap_exacttarget.state import incorporate, save_state, \ get_last_record_value_for_table @@ -16,93 +13,6 @@ class ContentAreaDataAccessObject(DataAccessObject): - SCHEMA = with_properties({ - 'BackgroundColor': { - 'type': ['null', 'string'], - 'description': 'Indicates background color of content area', - }, - 'BorderColor': { - 'type': ['null', 'string'], - 'description': ('Indicates color of border surrounding ' - 'content area'), - }, - 'BorderWidth': { - 'type': ['null', 'integer'], - 'description': ('Indicates pixel width of border ' - 'surrounding content area'), - }, - 'CategoryID': { - 'type': ['null', 'integer'], - 'description': 'Specifies the identifier of the folder.', - }, - 'Cellpadding': { - 'type': ['null', 'integer'], - 'description': ('Indicates pixel value of padding ' - 'around content area'), - }, - 'Cellspacing': { - 'type': ['null', 'integer'], - 'description': ('Indicates pixel value of spacing ' - 'for content area'), - }, - 'Content': { - 'type': ['null', 'string'], - 'description': ('Identifies content contained in ' - 'a content area.'), - }, - 'CreatedDate': CREATED_DATE_FIELD, - 'CustomerKey': CUSTOMER_KEY_FIELD, - 'FontFamily': { - 'type': ['null', 'string'], - 'description': 'Indicates font family used in content area', - }, - 'HasFontSize': { - 'type': ['null', 'boolean'], - 'description': ('Indicates whether the content area includes ' - 'a specified font size or not'), - }, - 'ID': ID_FIELD, - 'IsBlank': { - 'type': ['null', 'boolean'], - 'description': ('Indicates if specified content area ' - 'contains no content.'), - }, - 'IsDynamicContent': { - 'type': ['null', 'boolean'], - 'description': ('Indicates if specific content area ' - 'contains dynamic content.'), - }, - 'IsLocked': { - 'type': ['null', 'boolean'], - 'description': ('Indicates if specific email content area ' - 'within an Enterprise or Enterprise 2.0 ' - 'account is locked and cannot be changed by ' - 'subaccounts.'), - }, - 'IsSurvey': { - 'type': ['null', 'boolean'], - 'description': ('Indicates whether a specific content area ' - 'contains survey questions.'), - }, - 'Key': { - 'type': ['null', 'string'], - 'description': ('Specifies key associated with content area ' - 'in HTML body. Relates to the Email object ' - 'via a custom type.'), - }, - 'ModifiedDate': MODIFIED_DATE_FIELD, - 'Name': { - 'type': ['null', 'string'], - 'description': 'Name of the object or property.', - }, - 'ObjectID': OBJECT_ID_FIELD, - 'PartnerProperties': CUSTOM_PROPERTY_LIST, - 'Width': { - 'type': ['null', 'integer'], - 'description': 'Indicates pixel width of content area', - }, - }) - TABLE = 'content_area' KEY_PROPERTIES = ['ID'] REPLICATION_METHOD = 'INCREMENTAL' diff --git a/tap_exacttarget/endpoints/emails.py b/tap_exacttarget/endpoints/emails.py index 2824079..2dcbcac 100644 --- a/tap_exacttarget/endpoints/emails.py +++ b/tap_exacttarget/endpoints/emails.py @@ -4,9 +4,6 @@ from tap_exacttarget.client import request from tap_exacttarget.dao import DataAccessObject -from tap_exacttarget.schemas import ID_FIELD, CUSTOM_PROPERTY_LIST, \ - CREATED_DATE_FIELD, CUSTOMER_KEY_FIELD, OBJECT_ID_FIELD, \ - MODIFIED_DATE_FIELD, with_properties from tap_exacttarget.state import incorporate, save_state, \ get_last_record_value_for_table @@ -14,97 +11,6 @@ class EmailDataAccessObject(DataAccessObject): - SCHEMA = with_properties({ - 'CategoryID': { - 'type': ['null', 'integer'], - 'description': ('Specifies the identifier of the folder ' - 'containing the email.'), - }, - 'CharacterSet': { - 'type': ['null', 'string'], - 'description': ('Indicates encoding used in an email ' - 'message.'), - }, - 'ClonedFromID': { - 'type': ['null', 'integer'], - 'description': ('ID of email message from which the specified ' - 'email message was created.'), - }, - 'ContentAreaIDs': { - 'type': 'array', - 'description': ('Contains information on content areas ' - 'included in an email message.'), - 'items': { - 'type': ['null', 'integer'] - } - }, - 'ContentCheckStatus': { - 'type': ['null', 'string'], - 'description': ('Indicates whether content validation has ' - 'completed for this email message.'), - }, - 'CreatedDate': CREATED_DATE_FIELD, - 'CustomerKey': CUSTOMER_KEY_FIELD, - 'EmailType': { - 'type': ['null', 'string'], - 'description': ('Defines preferred email type.'), - }, - 'HasDynamicSubjectLine': { - 'type': ['null', 'boolean'], - 'description': ('Indicates whether email message contains ' - 'a dynamic subject line.'), - }, - 'HTMLBody': { - 'type': ['null', 'string'], - 'description': ('Contains HTML body of an email message.'), - }, - 'ID': ID_FIELD, - 'IsActive': { - 'type': ['null', 'boolean'], - 'description': ('Specifies whether the object is active.') - }, - 'IsHTMLPaste': { - 'type': ['null', 'boolean'], - 'description': ('Indicates whether email message was created ' - 'via pasted HTML.') - }, - 'ModifiedDate': MODIFIED_DATE_FIELD, - 'Name': { - 'type': ['null', 'string'], - 'description': 'Name of the object or property.', - }, - 'ObjectID': OBJECT_ID_FIELD, - 'PartnerProperties': CUSTOM_PROPERTY_LIST, - 'PreHeader': { - 'type': ['null', 'string'], - 'description': ('Contains text used in preheader of email ' - 'message on mobile devices.') - }, - 'Status': { - 'type': ['null', 'string'], - 'description': ('Defines status of object. Status of an ' - 'address.'), - }, - 'Subject': { - 'type': ['null', 'string'], - 'description': ('Contains subject area information for a ' - 'message.'), - }, - 'SyncTextWithHTML': { - 'type': ['null', 'boolean'], - 'description': ('Makes the text version of an email contain ' - 'the same content as the HTML version.'), - }, - 'TextBody': { - 'type': ['null', 'string'], - 'description': ('Contains raw text body of a message.'), - }, - '__AdditionalEmailAttribute1': {'type': ['null', 'string']}, - '__AdditionalEmailAttribute2': {'type': ['null', 'string']}, - '__AdditionalEmailAttribute3': {'type': ['null', 'string']}, - '__AdditionalEmailAttribute4': {'type': ['null', 'string']}, - '__AdditionalEmailAttribute5': {'type': ['null', 'string']}, - }) TABLE = 'email' KEY_PROPERTIES = ['ID'] diff --git a/tap_exacttarget/endpoints/events.py b/tap_exacttarget/endpoints/events.py index 512b6b3..fd6a20c 100644 --- a/tap_exacttarget/endpoints/events.py +++ b/tap_exacttarget/endpoints/events.py @@ -6,7 +6,6 @@ from tap_exacttarget.dao import DataAccessObject from tap_exacttarget.pagination import get_date_page, before_now, \ increment_date -from tap_exacttarget.schemas import SUBSCRIBER_KEY_FIELD, with_properties from tap_exacttarget.state import incorporate, save_state, \ get_last_record_value_for_table @@ -15,34 +14,6 @@ class EventDataAccessObject(DataAccessObject): - SCHEMA = with_properties({ - 'SendID': { - 'type': ['null', 'integer'], - 'description': 'Contains identifier for a specific send.', - }, - 'EventDate': { - 'type': ['null', 'string'], - 'format': 'datetime', - 'description': 'Date when a tracking event occurred.', - }, - 'EventType': { - 'type': ['null', 'string'], - 'description': 'The type of tracking event', - }, - 'BatchID': { - 'type': ['null','integer'], - 'description': 'Ties triggered send sent events to other events (like clicks and opens that occur at a later date and time)', - }, - 'CorrelationID': { - 'type': ['null','string'], - 'description': 'Identifies correlation of objects across several requests.', - }, - 'URL': { - 'type': ['null','string'], - 'description': 'URL that was clicked.', - }, - 'SubscriberKey': SUBSCRIBER_KEY_FIELD, - }) TABLE = 'event' KEY_PROPERTIES = ['SendID', 'EventType', 'SubscriberKey', 'EventDate'] diff --git a/tap_exacttarget/endpoints/folders.py b/tap_exacttarget/endpoints/folders.py index 19acf80..1b8af2f 100644 --- a/tap_exacttarget/endpoints/folders.py +++ b/tap_exacttarget/endpoints/folders.py @@ -4,9 +4,6 @@ from tap_exacttarget.client import request from tap_exacttarget.dao import DataAccessObject -from tap_exacttarget.schemas import ID_FIELD, CUSTOM_PROPERTY_LIST, \ - CREATED_DATE_FIELD, CUSTOMER_KEY_FIELD, MODIFIED_DATE_FIELD, \ - DESCRIPTION_FIELD, OBJECT_ID_FIELD, with_properties from tap_exacttarget.state import incorporate, save_state, \ get_last_record_value_for_table @@ -16,41 +13,6 @@ class FolderDataAccessObject(DataAccessObject): - SCHEMA = with_properties({ - 'AllowChildren': { - 'type': ['null', 'boolean'], - 'description': ('Specifies whether a data folder can have ' - 'child data folders.'), - }, - 'ContentType': { - 'type': ['null', 'string'], - 'description': ('Defines the type of content contained ' - 'within a folder.'), - }, - 'CreatedDate': CREATED_DATE_FIELD, - 'CustomerKey': CUSTOMER_KEY_FIELD, - 'Description': DESCRIPTION_FIELD, - 'ID': ID_FIELD, - 'ModifiedDate': MODIFIED_DATE_FIELD, - 'Name': { - 'type': ['null', 'string'], - 'description': 'Name of the object or property.', - }, - 'ObjectID': OBJECT_ID_FIELD, - 'ParentFolder': { - 'type': ['null', 'integer'], - 'description': ('Specifies the parent folder for a data ' - 'folder.'), - }, - 'PartnerProperties': CUSTOM_PROPERTY_LIST, - 'Type': { - 'type': ['null', 'string'], - 'description': ('Indicates type of specific list. Valid ' - 'values include Public, Private, Salesforce, ' - 'GlobalUnsubscribe, and Master.') - } - }) - TABLE = 'folder' KEY_PROPERTIES = ['ID'] REPLICATION_METHOD = 'INCREMENTAL' diff --git a/tap_exacttarget/endpoints/list_sends.py b/tap_exacttarget/endpoints/list_sends.py index cf70f16..17f3bb0 100644 --- a/tap_exacttarget/endpoints/list_sends.py +++ b/tap_exacttarget/endpoints/list_sends.py @@ -4,92 +4,11 @@ from tap_exacttarget.client import request from tap_exacttarget.dao import DataAccessObject -from tap_exacttarget.schemas import ID_FIELD, CUSTOM_PROPERTY_LIST, \ - CREATED_DATE_FIELD, CUSTOMER_KEY_FIELD, OBJECT_ID_FIELD, \ - MODIFIED_DATE_FIELD, with_properties LOGGER = singer.get_logger() class ListSendDataAccessObject(DataAccessObject): - SCHEMA = with_properties({ - 'CreatedDate': CREATED_DATE_FIELD, - 'CustomerKey': CUSTOMER_KEY_FIELD, - 'ExistingUndeliverables': { - 'type': ['null', 'integer'], - 'description': ('Indicates whether bounces occurred on previous ' - 'send.'), - }, - 'ExistingUnsubscribes': { - 'type': ['null', 'integer'], - 'description': ('Indicates whether unsubscriptions occurred on ' - 'previous send.'), - }, - 'ForwardedEmails': { - 'type': ['null', 'integer'], - 'description': ('Number of emails forwarded for a send.'), - }, - 'HardBounces': { - 'type': ['null', 'integer'], - 'description': ('Indicates number of hard bounces associated ' - 'with a send.'), - }, - 'InvalidAddresses': { - 'type': ['null', 'integer'], - 'description': ('Specifies the number of invalid addresses ' - 'associated with a send.'), - }, - 'ListID': { - 'type': ['null', 'integer'], - 'description': 'List associated with the send.', - }, - 'ID': ID_FIELD, - 'MissingAddresses': { - 'type': ['null', 'integer'], - 'description': ('Specifies number of missing addresses ' - 'encountered within a send.'), - }, - 'ModifiedDate': MODIFIED_DATE_FIELD, - 'NumberDelivered': { - 'type': ['null', 'integer'], - 'description': ('Number of sent emails that did not bounce.'), - }, - 'NumberSent': { - 'type': ['null', 'integer'], - 'description': ('Number of emails actually sent as part of an ' - 'email send. This number reflects all of the sent ' - 'messages and may include bounced messages.'), - }, - 'ObjectID': OBJECT_ID_FIELD, - 'OtherBounces': { - 'type': ['null', 'integer'], - 'description': 'Specifies number of Other-type bounces in a send.', - }, - 'PartnerProperties': CUSTOM_PROPERTY_LIST, - 'SendID': { - 'type': ['null', 'integer'], - 'description': 'Contains identifier for a specific send.', - }, - 'SoftBounces': { - 'type': ['null', 'integer'], - 'description': ('Indicates number of soft bounces associated with ' - 'a specific send.'), - }, - 'UniqueClicks': { - 'type': ['null', 'integer'], - 'description': 'Indicates number of unique clicks on message.', - }, - 'UniqueOpens': { - 'type': ['null', 'integer'], - 'description': ('Indicates number of unique opens resulting from ' - 'a triggered send.'), - }, - 'Unsubscribes': { - 'type': ['null', 'integer'], - 'description': ('Indicates the number of unsubscribe events ' - 'associated with a send.'), - }, - }) TABLE = 'list_send' KEY_PROPERTIES = ['ListID', 'SendID'] diff --git a/tap_exacttarget/endpoints/list_subscribers.py b/tap_exacttarget/endpoints/list_subscribers.py index 8815c63..6b6c4a7 100644 --- a/tap_exacttarget/endpoints/list_subscribers.py +++ b/tap_exacttarget/endpoints/list_subscribers.py @@ -7,9 +7,6 @@ from tap_exacttarget.endpoints.subscribers import SubscriberDataAccessObject from tap_exacttarget.pagination import get_date_page, before_now, \ increment_date -from tap_exacttarget.schemas import ID_FIELD, CUSTOM_PROPERTY_LIST, \ - CREATED_DATE_FIELD, OBJECT_ID_FIELD, MODIFIED_DATE_FIELD, \ - SUBSCRIBER_KEY_FIELD, with_properties from tap_exacttarget.state import incorporate, save_state, \ get_last_record_value_for_table from tap_exacttarget.util import partition_all, sudsobj_to_dict @@ -35,24 +32,6 @@ def _get_list_subscriber_filter(_list, start, unit): class ListSubscriberDataAccessObject(DataAccessObject): - SCHEMA = with_properties({ - 'ID': ID_FIELD, - 'CreatedDate': CREATED_DATE_FIELD, - 'ModifiedDate': MODIFIED_DATE_FIELD, - 'ObjectID': OBJECT_ID_FIELD, - 'PartnerProperties': CUSTOM_PROPERTY_LIST, - 'ListID': { - 'type': ['null', 'integer'], - 'description': ('Defines identification for a list the ' - 'subscriber resides on.'), - }, - 'Status': { - 'type': ['null', 'string'], - 'description': ('Defines status of object. Status of ' - 'an address.'), - }, - 'SubscriberKey': SUBSCRIBER_KEY_FIELD, - }) TABLE = 'list_subscriber' KEY_PROPERTIES = ['SubscriberKey', 'ListID'] diff --git a/tap_exacttarget/endpoints/lists.py b/tap_exacttarget/endpoints/lists.py index abc1638..648c288 100644 --- a/tap_exacttarget/endpoints/lists.py +++ b/tap_exacttarget/endpoints/lists.py @@ -4,9 +4,6 @@ from tap_exacttarget.client import request from tap_exacttarget.dao import DataAccessObject -from tap_exacttarget.schemas import ID_FIELD, CUSTOM_PROPERTY_LIST, \ - CREATED_DATE_FIELD, OBJECT_ID_FIELD, DESCRIPTION_FIELD, \ - MODIFIED_DATE_FIELD, with_properties from tap_exacttarget.state import incorporate, save_state, \ get_last_record_value_for_table @@ -16,38 +13,6 @@ class ListDataAccessObject(DataAccessObject): - SCHEMA = with_properties({ - 'Category': { - 'type': ['null', 'integer'], - 'description': 'ID of the folder that an item is located in.', - }, - 'CreatedDate': CREATED_DATE_FIELD, - 'ID': ID_FIELD, - 'ModifiedDate': MODIFIED_DATE_FIELD, - 'ObjectID': OBJECT_ID_FIELD, - 'PartnerProperties': CUSTOM_PROPERTY_LIST, - 'ListClassification': { - 'type': ['null', 'string'], - 'description': ('Specifies the classification for a list.'), - }, - 'ListName': { - 'type': ['null', 'string'], - 'description': 'Name of a specific list.', - }, - 'Description': DESCRIPTION_FIELD, - 'SendClassification': { - 'type': ['null', 'string'], - 'description': ('Indicates the send classification to use ' - 'as part of a send definition.'), - }, - 'Type': { - 'type': ['null', 'string'], - 'description': ('Indicates type of specific list. Valid ' - 'values include Public, Private, Salesforce, ' - 'GlobalUnsubscribe, and Master.') - } - }) - TABLE = 'list' KEY_PROPERTIES = ['ID'] REPLICATION_METHOD = 'INCREMENTAL' diff --git a/tap_exacttarget/endpoints/sends.py b/tap_exacttarget/endpoints/sends.py index e9424f5..612b7f6 100644 --- a/tap_exacttarget/endpoints/sends.py +++ b/tap_exacttarget/endpoints/sends.py @@ -4,8 +4,6 @@ from tap_exacttarget.client import request from tap_exacttarget.dao import DataAccessObject -from tap_exacttarget.schemas import ID_FIELD, CUSTOM_PROPERTY_LIST, \ - CREATED_DATE_FIELD, MODIFIED_DATE_FIELD, with_properties from tap_exacttarget.state import incorporate, save_state, \ get_last_record_value_for_table @@ -13,72 +11,6 @@ class SendDataAccessObject(DataAccessObject): - SCHEMA = with_properties({ - 'CreatedDate': CREATED_DATE_FIELD, - 'EmailID': { - 'type': ['null', 'integer'], - 'description': ('Specifies the ID of an email message ' - 'associated with a send.'), - }, - 'EmailName': { - 'type': ['null', 'string'], - 'description': ('Specifies the name of an email message ' - 'associated with a send.'), - }, - 'FromAddress': { - 'type': ['null', 'string'], - 'description': ('Indicates From address associated with a ' - 'object. Deprecated for email send ' - 'definitions and triggered send ' - 'definitions.'), - }, - 'FromName': { - 'type': ['null', 'string'], - 'description': ('Specifies the default email message From ' - 'Name. Deprecated for email send ' - 'definitions and triggered send ' - 'definitions.'), - }, - 'ID': ID_FIELD, - 'IsAlwaysOn': { - 'type': ['null', 'boolean'], - 'description': ('Indicates whether the request can be ' - 'performed while the system is is ' - 'maintenance mode. A value of true ' - 'indicates the system will process the ' - 'request.'), - }, - 'IsMultipart': { - 'type': ['null', 'boolean'], - 'description': ('Indicates whether the email is sent with ' - 'Multipart/MIME enabled.'), - }, - 'ModifiedDate': MODIFIED_DATE_FIELD, - 'PartnerProperties': CUSTOM_PROPERTY_LIST, - 'SendDate': { - 'type': ['null', 'string'], - 'format': 'date-time', - 'description': ('Indicates the date on which a send ' - 'occurred. Set this value to have a CST ' - '(Central Standard Time) value.'), - }, - 'SentDate': { - 'type': ['null', 'string'], - 'format': 'date-time', - 'description': ('Indicates date on which a send took ' - 'place.'), - }, - 'Status': { - 'type': ['null', 'string'], - 'description': ('Defines status of object. Status of an ' - 'address.'), - }, - 'Subject': { - 'type': ['null', 'string'], - 'description': ('Contains subject area information for ' - 'a message.'), - } - }) TABLE = 'send' KEY_PROPERTIES = ['ID'] diff --git a/tap_exacttarget/endpoints/subscribers.py b/tap_exacttarget/endpoints/subscribers.py index 9b7051c..6317a27 100644 --- a/tap_exacttarget/endpoints/subscribers.py +++ b/tap_exacttarget/endpoints/subscribers.py @@ -4,103 +4,11 @@ from tap_exacttarget.client import request from tap_exacttarget.dao import DataAccessObject -from tap_exacttarget.schemas import CUSTOM_PROPERTY_LIST, ID_FIELD, \ - CREATED_DATE_FIELD, CUSTOMER_KEY_FIELD, OBJECT_ID_FIELD, \ - SUBSCRIBER_KEY_FIELD, MODIFIED_DATE_FIELD, with_properties - LOGGER = singer.get_logger() -SCHEMA = with_properties({ - 'Addresses': { - 'type': 'array', - 'description': ('Indicates addresses belonging to a subscriber, ' - 'used to create, retrieve, update or delete an ' - 'email or SMS Address for a given subscriber.'), - 'items': { - 'type': 'object', - 'properties': { - 'Address': {'type': ['null', 'string']}, - 'AddressType': {'type': ['null', 'string']}, - 'AddressStatus': {'type': ['null', 'string']} - } - } - }, - 'Attributes': CUSTOM_PROPERTY_LIST, - 'CreatedDate': CREATED_DATE_FIELD, - 'CustomerKey': CUSTOMER_KEY_FIELD, - 'EmailAddress': { - 'type': ['null', 'string'], - 'description': ('Contains the email address for a subscriber. ' - 'Indicates the data extension field contains ' - 'email address data.'), - }, - 'EmailTypePreference': { - 'type': ['null', 'string'], - 'description': 'The format in which email should be sent' - }, - 'ID': ID_FIELD, - 'ListIDs': { - 'type': 'array', - 'description': 'Defines list IDs a subscriber resides on.', - 'items': { - 'type': ['null', 'string'] - } - }, - 'Locale': { - 'type': ['null', 'string'], - 'description': ('Contains the locale information for an Account. ' - 'If no location is set, Locale defaults to en-US ' - '(English in United States).'), - }, - 'ModifiedDate': MODIFIED_DATE_FIELD, - 'ObjectID': OBJECT_ID_FIELD, - 'PartnerKey': { - 'type': ['null', 'string'], - 'description': ('Unique identifier provided by partner for an ' - 'object, accessible only via API.'), - }, - 'PartnerProperties': CUSTOM_PROPERTY_LIST, - 'PartnerType': { - 'type': ['null', 'string'], - 'description': 'Defines partner associated with a subscriber.' - }, - 'PrimaryEmailAddress': { - 'type': ['null', 'string'], - 'description': 'Indicates primary email address for a subscriber.' - }, - 'PrimarySMSAddress': { - 'type': ['null', 'string'], - 'description': ('Indicates primary SMS address for a subscriber. ' - 'Used to create and update SMS Address for a ' - 'given subscriber.'), - }, - 'PrimarySMSPublicationStatus': { - 'type': ['null', 'string'], - 'description': 'Indicates the subscriber\'s modality status.', - }, - 'Status': { - 'type': ['null', 'string'], - 'description': 'Defines status of object. Status of an address.', - }, - 'SubscriberKey': SUBSCRIBER_KEY_FIELD, - 'SubscriberTypeDefinition': { - 'type': ['null', 'string'], - 'description': ('Specifies if a subscriber resides in an ' - 'integration, such as Salesforce or Microsoft ' - 'Dynamics CRM'), - }, - 'UnsubscribedDate': { - 'type': ['null', 'string'], - 'description': ('Represents date subscriber unsubscribed ' - 'from a list.'), - } -}) - - class SubscriberDataAccessObject(DataAccessObject): - SCHEMA = SCHEMA TABLE = 'subscriber' KEY_PROPERTIES = ['ID'] REPLICATION_METHOD = 'INCREMENTAL' diff --git a/tap_exacttarget/schemas.py b/tap_exacttarget/schemas.py deleted file mode 100644 index 674a4a5..0000000 --- a/tap_exacttarget/schemas.py +++ /dev/null @@ -1,64 +0,0 @@ -def with_properties(properties): - return { - 'type': 'object', - 'properties': properties - } - - -CUSTOM_PROPERTY_LIST = { - 'type': 'array', - 'description': ('Specifies key-value pairs of properties associated with ' - 'an object.'), - 'items': { - 'type': 'object', - 'properties': { - 'Name': {'type': ['null', 'string']}, - 'Value': {'type': ['null', 'string']}, - } - } -} - -ID_FIELD = { - 'type': ['null', 'integer'], - 'description': ('Read-only legacy identifier for an object. Not ' - 'supported on all objects. Some objects use the ' - 'ObjectID property as the Marketing Cloud unique ' - 'ID.') -} - -CREATED_DATE_FIELD = { - 'type': ['null', 'string'], - 'description': ('Read-only date and time of the object\'s' - 'creation.'), -} - -MODIFIED_DATE_FIELD = { - 'type': ['null', 'string'], - 'description': ('Indicates the last time object information ' - 'was modified.') -} - -CUSTOMER_KEY_FIELD = { - 'type': ['null', 'string'], - 'description': ('User-supplied unique identifier for an ' - 'object within an object type (corresponds ' - 'to the external key assigned to an object ' - 'in the user interface).'), -} - -OBJECT_ID_FIELD = { - 'type': ['null', 'string'], - 'description': ('System-controlled, read-only text string ' - 'identifier for object.'), -} - -DESCRIPTION_FIELD = { - 'type': ['null', 'string'], - 'description': ('Describes and provides information regarding ' - 'the object.'), -} - -SUBSCRIBER_KEY_FIELD = { - 'type': ['null', 'string'], - 'description': 'Identification of a specific subscriber.', -} diff --git a/tap_exacttarget/schemas/campaigns.json b/tap_exacttarget/schemas/campaigns.json new file mode 100644 index 0000000..0aee2e8 --- /dev/null +++ b/tap_exacttarget/schemas/campaigns.json @@ -0,0 +1,26 @@ +{ + "type": "object", + "properties": { + "id": { + "type": ["null", "string"] + }, + "createdDate": { + "type": ["null", "string"] + }, + "modifiedDate": { + "type": ["null", "string"] + }, + "name": { + "type": ["null", "string"] + }, + "description": { + "type": ["null", "string"] + }, + "campaignCode": { + "type": ["null", "string"] + }, + "color": { + "type": ["null", "string"] + } + } +} \ No newline at end of file diff --git a/tap_exacttarget/schemas/content_areas.json b/tap_exacttarget/schemas/content_areas.json new file mode 100644 index 0000000..4c8a1c6 --- /dev/null +++ b/tap_exacttarget/schemas/content_areas.json @@ -0,0 +1,87 @@ +{ + "type": "object", + "properties": { + "BackgroundColor": { + "type": ["null", "string"], + "description": "Indicates background color of content area" + }, + "BorderColor": { + "type": ["null", "string"], + "description": "Indicates color of border surrounding content area" + }, + "BorderWidth": { + "type": ["null", "integer"], + "description": "Indicates pixel width of border surrounding content area" + }, + "CategoryID": { + "type": ["null", "integer"], + "description": "Specifies the identifier of the folder." + }, + "Cellpadding": { + "type": ["null", "integer"], + "description": "Indicates pixel value of padding around content area" + }, + "Cellspacing": { + "type": ["null", "integer"], + "description": "Indicates pixel value of spacing for content area" + }, + "Content": { + "type": ["null", "string"], + "description": "Identifies content contained in a content area." + }, + "CreatedDate": { + "$ref": "definations.json#/CREATED_DATE_FIELD" + }, + "CustomerKey": { + "$ref": "definations.json#/CUSTOMER_KEY_FIELD" + }, + "FontFamily": { + "type": ["null", "string"], + "description": "Indicates font family used in content area" + }, + "HasFontSize": { + "type": ["null", "boolean"], + "description": "Indicates whether the content area includes a specified font size or not" + }, + "ID": { + "$ref": "definations.json#/ID_FIELD" + }, + "IsBlank": { + "type": ["null", "boolean"], + "description": "Indicates if specified content area contains no content." + }, + "IsDynamicContent": { + "type": ["null", "boolean"], + "description": "Indicates if specific content area contains dynamic content." + }, + "IsLocked": { + "type": ["null", "boolean"], + "description": "Indicates if specific email content area within an Enterprise or Enterprise 2.0 account is locked and cannot be changed by subaccounts." + }, + "IsSurvey": { + "type": ["null", "boolean"], + "description": "Indicates whether a specific content area contains survey questions." + }, + "Key": { + "type": ["null", "string"], + "description": "Specifies key associated with content area in HTML body. Relates to the Email object via a custom type." + }, + "ModifiedDate": { + "$ref": "definations.json#/MODIFIED_DATE_FIELD" + }, + "Name": { + "type": ["null", "string"], + "description": "Name of the object or property." + }, + "ObjectID": { + "$ref": "definations.json#/OBJECT_ID_FIELD" + }, + "PartnerProperties": { + "$ref": "definations.json#/CUSTOM_PROPERTY_LIST" + }, + "Width": { + "type": ["null", "integer"], + "description": "Indicates pixel width of content area" + } + } +} \ No newline at end of file diff --git a/tap_exacttarget/schemas/definations.json b/tap_exacttarget/schemas/definations.json new file mode 100644 index 0000000..de10f87 --- /dev/null +++ b/tap_exacttarget/schemas/definations.json @@ -0,0 +1,41 @@ +{ + "CUSTOM_PROPERTY_LIST": { + "type": "array", + "description": "Specifies key-value pairs of properties associated with an object.", + "items": { + "type": "object", + "properties": { + "Name": {"type": ["null", "string"]}, + "Value": {"type": ["null", "string"]} + } + } + }, + "ID_FIELD": { + "type": ["null", "integer"], + "description": "Read-only legacy identifier for an object. Not supported on all objects. Some objects use the ObjectID property as the Marketing Cloud unique ID." + }, + "CREATED_DATE_FIELD": { + "type": ["null", "string"], + "description": "Read-only date and time of the object's creation." + }, + "MODIFIED_DATE_FIELD": { + "type": ["null", "string"], + "description": "Indicates the last time object information was modified." + }, + "CUSTOMER_KEY_FIELD": { + "type": ["null", "string"], + "description": "User-supplied unique identifier for an object within an object type (corresponds to the external key assigned to an object in the user interface)." + }, + "OBJECT_ID_FIELD": { + "type": ["null", "string"], + "description": "System-controlled, read-only text string identifier for object." + }, + "DESCRIPTION_FIELD": { + "type": ["null", "string"], + "description": "Describes and provides information regarding the object." + }, + "SUBSCRIBER_KEY_FIELD": { + "type": ["null", "string"], + "description": "Identification of a specific subscriber." + } +} \ No newline at end of file diff --git a/tap_exacttarget/schemas/emails.json b/tap_exacttarget/schemas/emails.json new file mode 100644 index 0000000..b02aaf2 --- /dev/null +++ b/tap_exacttarget/schemas/emails.json @@ -0,0 +1,95 @@ +{ + "type": "object", + "properties": { + "CategoryID": { + "type": ["null", "integer"], + "description": "Specifies the identifier of the folder containing the email." + }, + "CharacterSet": { + "type": ["null", "string"], + "description": "Indicates encoding used in an email message." + }, + "ClonedFromID": { + "type": ["null", "integer"], + "description": "ID of email message from which the specified email message was created." + }, + "ContentAreaIDs": { + "type": "array", + "description": "Contains information on content areas included in an email message.", + "items": { + "type": ["null", "integer"] + } + }, + "ContentCheckStatus": { + "type": ["null", "string"], + "description": "Indicates whether content validation has completed for this email message." + }, + "CreatedDate": { + "$ref": "definations.json#/CREATED_DATE_FIELD" + }, + "CustomerKey": { + "$ref": "definations.json#/CUSTOMER_KEY_FIELD" + }, + "EmailType": { + "type": ["null", "string"], + "description": "Defines preferred email type." + }, + "HasDynamicSubjectLine": { + "type": ["null", "boolean"], + "description": "Indicates whether email message contains a dynamic subject line." + }, + "HTMLBody": { + "type": ["null", "string"], + "description": "Contains HTML body of an email message." + }, + "ID": { + "$ref": "definations.json#/ID_FIELD" + }, + "IsActive": { + "type": ["null", "boolean"], + "description": "Specifies whether the object is active." + }, + "IsHTMLPaste": { + "type": ["null", "boolean"], + "description": "Indicates whether email message was created via pasted HTML." + }, + "ModifiedDate": { + "$ref": "definations.json#/MODIFIED_DATE_FIELD" + }, + "Name": { + "type": ["null", "string"], + "description": "Name of the object or property." + }, + "ObjectID": { + "$ref": "definations.json#/OBJECT_ID_FIELD" + }, + "PartnerProperties": { + "$ref": "definations.json#/CUSTOM_PROPERTY_LIST" + }, + "PreHeader": { + "type": ["null", "string"], + "description": "Contains text used in preheader of email message on mobile devices." + }, + "Status": { + "type": ["null", "string"], + "description": "Defines status of object. Status of an address." + }, + "Subject": { + "type": ["null", "string"], + "description": "Contains subject area information for a message." + }, + "SyncTextWithHTML": { + "type": ["null", "boolean"], + "description": "Makes the text version of an email contain the same content as the HTML version." + }, + "TextBody": { + "type": ["null", "string"], + "description": "Contains raw text body of a message." + }, + "__AdditionalEmailAttribute1": {"type": ["null", "string"]}, + "__AdditionalEmailAttribute2": {"type": ["null", "string"]}, + "__AdditionalEmailAttribute3": {"type": ["null", "string"]}, + "__AdditionalEmailAttribute4": {"type": ["null", "string"]}, + "__AdditionalEmailAttribute5": {"type": ["null", "string"]} + } +} \ No newline at end of file diff --git a/tap_exacttarget/schemas/events.json b/tap_exacttarget/schemas/events.json new file mode 100644 index 0000000..641c4ca --- /dev/null +++ b/tap_exacttarget/schemas/events.json @@ -0,0 +1,33 @@ +{ + "type": "object", + "properties": { + "SendID": { + "type": ["null", "integer"], + "description": "Contains identifier for a specific send." + }, + "EventDate": { + "type": ["null", "string"], + "format": "datetime", + "description": "Date when a tracking event occurred." + }, + "EventType": { + "type": ["null", "string"], + "description": "The type of tracking event" + }, + "BatchID": { + "type": ["null","integer"], + "description": "Ties triggered send sent events to other events (like clicks and opens that occur at a later date and time)" + }, + "CorrelationID": { + "type": ["null","string"], + "description": "Identifies correlation of objects across several requests." + }, + "URL": { + "type": ["null","string"], + "description": "URL that was clicked." + }, + "SubscriberKey": { + "$ref": "definations.json#/SUBSCRIBER_KEY_FIELD" + } + } +} \ No newline at end of file diff --git a/tap_exacttarget/schemas/folders.json b/tap_exacttarget/schemas/folders.json new file mode 100644 index 0000000..06de8e6 --- /dev/null +++ b/tap_exacttarget/schemas/folders.json @@ -0,0 +1,46 @@ +{ + "type": "object", + "properties": { + "AllowChildren": { + "type": ["null", "boolean"], + "description": "Specifies whether a data folder can have child data folders." + }, + "ContentType": { + "type": ["null", "string"], + "description": "Defines the type of content contained within a folder." + }, + "CreatedDate": { + "$ref": "definations.json#/CREATED_DATE_FIELD" + }, + "CustomerKey": { + "$ref": "definations.json#/CUSTOMER_KEY_FIELD" + }, + "Description": { + "$ref": "definations.json#/DESCRIPTION_FIELD" + }, + "ID": { + "$ref": "definations.json#/ID_FIELD" + }, + "ModifiedDate": { + "$ref": "definations.json#/MODIFIED_DATE_FIELD" + }, + "Name": { + "type": ["null", "string"], + "description": "Name of the object or property." + }, + "ObjectID": { + "$ref": "definations.json#/OBJECT_ID_FIELD" + }, + "ParentFolder": { + "type": ["null", "integer"], + "description": "Specifies the parent folder for a data folder." + }, + "PartnerProperties": { + "$ref": "definations.json#/CUSTOM_PROPERTY_LIST" + }, + "Type": { + "type": ["null", "string"], + "description": "Indicates type of specific list. Valid values include Public, Private, Salesforce, GlobalUnsubscribe, and Master." + } + } +} \ No newline at end of file diff --git a/tap_exacttarget/schemas/list_sends.json b/tap_exacttarget/schemas/list_sends.json new file mode 100644 index 0000000..a4ef842 --- /dev/null +++ b/tap_exacttarget/schemas/list_sends.json @@ -0,0 +1,83 @@ +{ + "type": "object", + "properties": { + "CreatedDate": { + "$ref": "definations.json#/CREATED_DATE_FIELD" + }, + "CustomerKey": { + "$ref": "definations.json#/CUSTOMER_KEY_FIELD" + }, + "ExistingUndeliverables": { + "type": ["null", "integer"], + "description": "Indicates whether bounces occurred on previous send." + }, + "ExistingUnsubscribes": { + "type": ["null", "integer"], + "description": "Indicates whether unsubscriptions occurred on previous send." + }, + "ForwardedEmails": { + "type": ["null", "integer"], + "description": "Number of emails forwarded for a send." + }, + "HardBounces": { + "type": ["null", "integer"], + "description": "Indicates number of hard bounces associated with a send." + }, + "InvalidAddresses": { + "type": ["null", "integer"], + "description": "Specifies the number of invalid addresses associated with a send." + }, + "ListID": { + "type": ["null", "integer"], + "description": "List associated with the send." + }, + "ID": { + "$ref": "definations.json#/ID_FIELD" + }, + "MissingAddresses": { + "type": ["null", "integer"], + "description": "Specifies number of missing addresses encountered within a send." + }, + "ModifiedDate": { + "$ref": "definations.json#/MODIFIED_DATE_FIELD" + }, + "NumberDelivered": { + "type": ["null", "integer"], + "description": "Number of sent emails that did not bounce." + }, + "NumberSent": { + "type": ["null", "integer"], + "description": "Number of emails actually sent as part of an email send. This number reflects all of the sent messages and may include bounced messages." + }, + "ObjectID": { + "$ref": "definations.json#/OBJECT_ID_FIELD" + }, + "OtherBounces": { + "type": ["null", "integer"], + "description": "Specifies number of Other-type bounces in a send." + }, + "PartnerProperties": { + "$ref": "definations.json#/CUSTOM_PROPERTY_LIST" + }, + "SendID": { + "type": ["null", "integer"], + "description": "Contains identifier for a specific send." + }, + "SoftBounces": { + "type": ["null", "integer"], + "description": "Indicates number of soft bounces associated with a specific send." + }, + "UniqueClicks": { + "type": ["null", "integer"], + "description": "Indicates number of unique clicks on message." + }, + "UniqueOpens": { + "type": ["null", "integer"], + "description": "Indicates number of unique opens resulting from a triggered send." + }, + "Unsubscribes": { + "type": ["null", "integer"], + "description": "Indicates the number of unsubscribe events associated with a send." + } + } +} \ No newline at end of file diff --git a/tap_exacttarget/schemas/list_subscribers.json b/tap_exacttarget/schemas/list_subscribers.json new file mode 100644 index 0000000..77c2a4d --- /dev/null +++ b/tap_exacttarget/schemas/list_subscribers.json @@ -0,0 +1,31 @@ +{ + "type": "object", + "properties": { + "ID": { + "$ref": "definations.json#/ID_FIELD" + }, + "CreatedDate": { + "$ref": "definations.json#/CREATED_DATE_FIELD" + }, + "ModifiedDate": { + "$ref": "definations.json#/MODIFIED_DATE_FIELD" + }, + "ObjectID": { + "$ref": "definations.json#/OBJECT_ID_FIELD" + }, + "PartnerProperties": { + "$ref": "definations.json#/CUSTOM_PROPERTY_LIST" + }, + "ListID": { + "type": ["null", "integer"], + "description": "Defines identification for a list the subscriber resides on." + }, + "Status": { + "type": ["null", "string"], + "description": "Defines status of object. Status of an address." + }, + "SubscriberKey": { + "$ref": "definations.json#/SUBSCRIBER_KEY_FIELD" + } + } +} \ No newline at end of file diff --git a/tap_exacttarget/schemas/lists.json b/tap_exacttarget/schemas/lists.json new file mode 100644 index 0000000..f8adaf9 --- /dev/null +++ b/tap_exacttarget/schemas/lists.json @@ -0,0 +1,43 @@ +{ + "type": "object", + "properties": { + "Category": { + "type": ["null", "integer"], + "description": "ID of the folder that an item is located in." + }, + "CreatedDate": { + "$ref": "definations.json#/CREATED_DATE_FIELD" + }, + "ID": { + "$ref": "definations.json#/ID_FIELD" + }, + "ModifiedDate": { + "$ref": "definations.json#/MODIFIED_DATE_FIELD" + }, + "ObjectID": { + "$ref": "definations.json#/OBJECT_ID_FIELD" + }, + "PartnerProperties": { + "$ref": "definations.json#/CUSTOM_PROPERTY_LIST" + }, + "ListClassification": { + "type": ["null", "string"], + "description": "Specifies the classification for a list." + }, + "ListName": { + "type": ["null", "string"], + "description": "Name of a specific list." + }, + "Description": { + "$ref": "definations.json#/DESCRIPTION_FIELD" + }, + "SendClassification": { + "type": ["null", "string"], + "description": "Indicates the send classification to use as part of a send definition." + }, + "Type": { + "type": ["null", "string"], + "description": "Indicates type of specific list. Valid values include Public, Private, Salesforce, GlobalUnsubscribe, and Master." + } + } +} \ No newline at end of file diff --git a/tap_exacttarget/schemas/sends.json b/tap_exacttarget/schemas/sends.json new file mode 100644 index 0000000..1a22577 --- /dev/null +++ b/tap_exacttarget/schemas/sends.json @@ -0,0 +1,59 @@ +{ + "type": "object", + "properties": { + "CreatedDate": { + "$ref": "definations.json#/CREATED_DATE_FIELD" + }, + "EmailID": { + "type": ["null", "integer"], + "description": "Specifies the ID of an email message associated with a send." + }, + "EmailName": { + "type": ["null", "string"], + "description": "Specifies the name of an email message associated with a send." + }, + "FromAddress": { + "type": ["null", "string"], + "description": "Indicates From address associated with a object. Deprecated for email send definitions and triggered send definitions." + }, + "FromName": { + "type": ["null", "string"], + "description": "Specifies the default email message From Name. Deprecated for email send definitions and triggered send definitions." + }, + "ID": { + "$ref": "definations.json#/ID_FIELD" + }, + "IsAlwaysOn": { + "type": ["null", "boolean"], + "description": "Indicates whether the request can be performed while the system is is maintenance mode. A value of true indicates the system will process the request." + }, + "IsMultipart": { + "type": ["null", "boolean"], + "description": "Indicates whether the email is sent with Multipart/MIME enabled." + }, + "ModifiedDate": { + "$ref": "definations.json#/MODIFIED_DATE_FIELD" + }, + "PartnerProperties": { + "$ref": "definations.json#/CUSTOM_PROPERTY_LIST" + }, + "SendDate": { + "type": ["null", "string"], + "format": "date-time", + "description": "Indicates the date on which a send occurred. Set this value to have a CST (Central Standard Time) value." + }, + "SentDate": { + "type": ["null", "string"], + "format": "date-time", + "description": "Indicates date on which a send took place." + }, + "Status": { + "type": ["null", "string"], + "description": "Defines status of object. Status of an address." + }, + "Subject": { + "type": ["null", "string"], + "description": "Contains subject area information for a message." + } + } +} \ No newline at end of file diff --git a/tap_exacttarget/schemas/subscribers.json b/tap_exacttarget/schemas/subscribers.json new file mode 100644 index 0000000..fe50e28 --- /dev/null +++ b/tap_exacttarget/schemas/subscribers.json @@ -0,0 +1,92 @@ +{ + "type": "object", + "properties": { + "Addresses": { + "type": "array", + "description": "Indicates addresses belonging to a subscriber, used to create, retrieve, update or delete an email or SMS Address for a given subscriber.", + "items": { + "type": "object", + "properties": { + "Address": {"type": ["null", "string"]}, + "AddressType": {"type": ["null", "string"]}, + "AddressStatus": {"type": ["null", "string"]} + } + } + }, + "Attributes": { + "$ref": "definations.json#/CUSTOM_PROPERTY_LIST" + }, + "CreatedDate": { + "$ref": "definations.json#/CREATED_DATE_FIELD" + }, + "CustomerKey": { + "$ref": "definations.json#/CUSTOMER_KEY_FIELD" + }, + "EmailAddress": { + "type": ["null", "string"], + "description": "Contains the email address for a subscriber. Indicates the data extension field contains email address data." + }, + "EmailTypePreference": { + "type": ["null", "string"], + "description": "The format in which email should be sent" + }, + "ID": { + "$ref": "definations.json#/ID_FIELD" + }, + "ListIDs": { + "type": "array", + "description": "Defines list IDs a subscriber resides on.", + "items": { + "type": ["null", "string"] + } + }, + "Locale": { + "type": ["null", "string"], + "description": "Contains the locale information for an Account. If no location is set, Locale defaults to en-US (English in United States)." + }, + "ModifiedDate": { + "$ref": "definations.json#/MODIFIED_DATE_FIELD" + }, + "ObjectID": { + "$ref": "definations.json#/OBJECT_ID_FIELD" + }, + "PartnerKey": { + "type": ["null", "string"], + "description": "Unique identifier provided by partner for an object, accessible only via API." + }, + "PartnerProperties": { + "$ref": "definations.json#/CUSTOM_PROPERTY_LIST" + }, + "PartnerType": { + "type": ["null", "string"], + "description": "Defines partner associated with a subscriber." + }, + "PrimaryEmailAddress": { + "type": ["null", "string"], + "description": "Indicates primary email address for a subscriber." + }, + "PrimarySMSAddress": { + "type": ["null", "string"], + "description": "Indicates primary SMS address for a subscriber. Used to create and update SMS Address for a given subscriber." + }, + "PrimarySMSPublicationStatus": { + "type": ["null", "string"], + "description": "Indicates the subscriber's modality status." + }, + "Status": { + "type": ["null", "string"], + "description": "Defines status of object. Status of an address." + }, + "SubscriberKey": { + "$ref": "definations.json#/SUBSCRIBER_KEY_FIELD" + }, + "SubscriberTypeDefinition": { + "type": ["null", "string"], + "description": "Specifies if a subscriber resides in an integration, such as Salesforce or Microsoft Dynamics CRM" + }, + "UnsubscribedDate": { + "type": ["null", "string"], + "description": "Represents date subscriber unsubscribed from a list." + } + } +} \ No newline at end of file diff --git a/tests/unittests/test_schema.py b/tests/unittests/test_schema.py new file mode 100644 index 0000000..dc5a66c --- /dev/null +++ b/tests/unittests/test_schema.py @@ -0,0 +1,54 @@ +import unittest +import tap_exacttarget.dao as dao +from unittest import mock + +class TestSchema(unittest.TestCase): + + @mock.patch("tap_exacttarget.dao.get_abs_path") + @mock.patch("singer.utils.load_json") + def test_load_schema(self, mocked_load_json, mocked_get_abs_path): + field_schema = { + "type": "object", + "properties": { + "field": { + "type": ["null", "string"] + } + } + } + + # mock singer.utils.load_json and return 'field_schema' + mocked_load_json.return_value = field_schema.copy() + + # call actual function + schema = dao.load_schema("test") + + # verify if the 'schema' is same as 'field_schema' + self.assertEquals(schema, field_schema) + + @mock.patch("tap_exacttarget.dao.get_abs_path") + @mock.patch("singer.utils.load_json") + def test_load_schema_references(self, mocked_load_json, mocked_get_abs_path): + field_schema = { + "type": "object", + "properties": { + "field1": { + "type": ["null", "string"] + }, + "field2": { + "type": ["null", "string"] + } + } + } + + # mock singer.utils.load_json and return 'field_schema' + mocked_load_json.return_value = field_schema.copy() + + # call the actual function + schema = dao.load_schema_references() + + # make data for assertion + expected_schema = {} + expected_schema["definations.json"] = field_schema + + # verify if the 'schema' is same as 'field_schema' + self.assertEquals(schema, expected_schema) From ab439956573cc349fbd63db358ae4b1887ed162d Mon Sep 17 00:00:00 2001 From: harshpatel4_crest Date: Fri, 8 Oct 2021 14:38:50 +0530 Subject: [PATCH 28/36] resolve pylint --- tap_exacttarget/dao.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tap_exacttarget/dao.py b/tap_exacttarget/dao.py index 427a94f..5fda5b4 100644 --- a/tap_exacttarget/dao.py +++ b/tap_exacttarget/dao.py @@ -13,7 +13,7 @@ def _get_catalog_schema(catalog): return catalog.get('schema', {}).get('properties') def get_abs_path(path): - return os.path.join(os.path.dirname(os.path.realpath(__file__)), path) + return os.path.join(os.path.dirname(os.path.realpath(__file__)), path) # function to load the fields in the 'definations' which contains the reference fields def load_schema_references(): From 5864acce670223c12bcc9ec74552d2b27fdadc83 Mon Sep 17 00:00:00 2001 From: harshpatel4_crest Date: Fri, 8 Oct 2021 14:57:18 +0530 Subject: [PATCH 29/36] resolve integration test --- setup.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/setup.py b/setup.py index 07c1e2d..497302e 100644 --- a/setup.py +++ b/setup.py @@ -29,5 +29,8 @@ [console_scripts] tap-exacttarget=tap_exacttarget:main ''', - packages=find_packages() + packages=find_packages(), + package_data={ + 'tap_exacttarget': ['schemas/*.json'] + } ) From 2a2b20c35bdf15a2acc0f3d85c7a457cb89cfb6f Mon Sep 17 00:00:00 2001 From: harshpatel4_crest Date: Fri, 8 Oct 2021 15:29:25 +0530 Subject: [PATCH 30/36] corrected typo --- tap_exacttarget/dao.py | 6 +++--- tap_exacttarget/schemas/content_areas.json | 12 ++++++------ .../{definations.json => definitions.json} | 0 tap_exacttarget/schemas/emails.json | 12 ++++++------ tap_exacttarget/schemas/events.json | 2 +- tap_exacttarget/schemas/folders.json | 14 +++++++------- tap_exacttarget/schemas/list_sends.json | 12 ++++++------ tap_exacttarget/schemas/list_subscribers.json | 12 ++++++------ tap_exacttarget/schemas/lists.json | 12 ++++++------ tap_exacttarget/schemas/sends.json | 8 ++++---- tap_exacttarget/schemas/subscribers.json | 16 ++++++++-------- tests/unittests/test_schema.py | 2 +- 12 files changed, 54 insertions(+), 54 deletions(-) rename tap_exacttarget/schemas/{definations.json => definitions.json} (100%) diff --git a/tap_exacttarget/dao.py b/tap_exacttarget/dao.py index 5fda5b4..7380a71 100644 --- a/tap_exacttarget/dao.py +++ b/tap_exacttarget/dao.py @@ -15,13 +15,13 @@ def _get_catalog_schema(catalog): def get_abs_path(path): return os.path.join(os.path.dirname(os.path.realpath(__file__)), path) -# function to load the fields in the 'definations' which contains the reference fields +# function to load the fields in the 'definitions' which contains the reference fields def load_schema_references(): - shared_schema_path = get_abs_path('schemas/definations.json') + shared_schema_path = get_abs_path('schemas/definitions.json') refs = {} # load json from the path - refs["definations.json"] = utils.load_json(shared_schema_path) + refs["definitions.json"] = utils.load_json(shared_schema_path) return refs diff --git a/tap_exacttarget/schemas/content_areas.json b/tap_exacttarget/schemas/content_areas.json index 4c8a1c6..e0850a3 100644 --- a/tap_exacttarget/schemas/content_areas.json +++ b/tap_exacttarget/schemas/content_areas.json @@ -30,10 +30,10 @@ "description": "Identifies content contained in a content area." }, "CreatedDate": { - "$ref": "definations.json#/CREATED_DATE_FIELD" + "$ref": "definitions.json#/CREATED_DATE_FIELD" }, "CustomerKey": { - "$ref": "definations.json#/CUSTOMER_KEY_FIELD" + "$ref": "definitions.json#/CUSTOMER_KEY_FIELD" }, "FontFamily": { "type": ["null", "string"], @@ -44,7 +44,7 @@ "description": "Indicates whether the content area includes a specified font size or not" }, "ID": { - "$ref": "definations.json#/ID_FIELD" + "$ref": "definitions.json#/ID_FIELD" }, "IsBlank": { "type": ["null", "boolean"], @@ -67,17 +67,17 @@ "description": "Specifies key associated with content area in HTML body. Relates to the Email object via a custom type." }, "ModifiedDate": { - "$ref": "definations.json#/MODIFIED_DATE_FIELD" + "$ref": "definitions.json#/MODIFIED_DATE_FIELD" }, "Name": { "type": ["null", "string"], "description": "Name of the object or property." }, "ObjectID": { - "$ref": "definations.json#/OBJECT_ID_FIELD" + "$ref": "definitions.json#/OBJECT_ID_FIELD" }, "PartnerProperties": { - "$ref": "definations.json#/CUSTOM_PROPERTY_LIST" + "$ref": "definitions.json#/CUSTOM_PROPERTY_LIST" }, "Width": { "type": ["null", "integer"], diff --git a/tap_exacttarget/schemas/definations.json b/tap_exacttarget/schemas/definitions.json similarity index 100% rename from tap_exacttarget/schemas/definations.json rename to tap_exacttarget/schemas/definitions.json diff --git a/tap_exacttarget/schemas/emails.json b/tap_exacttarget/schemas/emails.json index b02aaf2..13f9136 100644 --- a/tap_exacttarget/schemas/emails.json +++ b/tap_exacttarget/schemas/emails.json @@ -25,10 +25,10 @@ "description": "Indicates whether content validation has completed for this email message." }, "CreatedDate": { - "$ref": "definations.json#/CREATED_DATE_FIELD" + "$ref": "definitions.json#/CREATED_DATE_FIELD" }, "CustomerKey": { - "$ref": "definations.json#/CUSTOMER_KEY_FIELD" + "$ref": "definitions.json#/CUSTOMER_KEY_FIELD" }, "EmailType": { "type": ["null", "string"], @@ -43,7 +43,7 @@ "description": "Contains HTML body of an email message." }, "ID": { - "$ref": "definations.json#/ID_FIELD" + "$ref": "definitions.json#/ID_FIELD" }, "IsActive": { "type": ["null", "boolean"], @@ -54,17 +54,17 @@ "description": "Indicates whether email message was created via pasted HTML." }, "ModifiedDate": { - "$ref": "definations.json#/MODIFIED_DATE_FIELD" + "$ref": "definitions.json#/MODIFIED_DATE_FIELD" }, "Name": { "type": ["null", "string"], "description": "Name of the object or property." }, "ObjectID": { - "$ref": "definations.json#/OBJECT_ID_FIELD" + "$ref": "definitions.json#/OBJECT_ID_FIELD" }, "PartnerProperties": { - "$ref": "definations.json#/CUSTOM_PROPERTY_LIST" + "$ref": "definitions.json#/CUSTOM_PROPERTY_LIST" }, "PreHeader": { "type": ["null", "string"], diff --git a/tap_exacttarget/schemas/events.json b/tap_exacttarget/schemas/events.json index 641c4ca..ab47a2b 100644 --- a/tap_exacttarget/schemas/events.json +++ b/tap_exacttarget/schemas/events.json @@ -27,7 +27,7 @@ "description": "URL that was clicked." }, "SubscriberKey": { - "$ref": "definations.json#/SUBSCRIBER_KEY_FIELD" + "$ref": "definitions.json#/SUBSCRIBER_KEY_FIELD" } } } \ No newline at end of file diff --git a/tap_exacttarget/schemas/folders.json b/tap_exacttarget/schemas/folders.json index 06de8e6..f81dd7e 100644 --- a/tap_exacttarget/schemas/folders.json +++ b/tap_exacttarget/schemas/folders.json @@ -10,33 +10,33 @@ "description": "Defines the type of content contained within a folder." }, "CreatedDate": { - "$ref": "definations.json#/CREATED_DATE_FIELD" + "$ref": "definitions.json#/CREATED_DATE_FIELD" }, "CustomerKey": { - "$ref": "definations.json#/CUSTOMER_KEY_FIELD" + "$ref": "definitions.json#/CUSTOMER_KEY_FIELD" }, "Description": { - "$ref": "definations.json#/DESCRIPTION_FIELD" + "$ref": "definitions.json#/DESCRIPTION_FIELD" }, "ID": { - "$ref": "definations.json#/ID_FIELD" + "$ref": "definitions.json#/ID_FIELD" }, "ModifiedDate": { - "$ref": "definations.json#/MODIFIED_DATE_FIELD" + "$ref": "definitions.json#/MODIFIED_DATE_FIELD" }, "Name": { "type": ["null", "string"], "description": "Name of the object or property." }, "ObjectID": { - "$ref": "definations.json#/OBJECT_ID_FIELD" + "$ref": "definitions.json#/OBJECT_ID_FIELD" }, "ParentFolder": { "type": ["null", "integer"], "description": "Specifies the parent folder for a data folder." }, "PartnerProperties": { - "$ref": "definations.json#/CUSTOM_PROPERTY_LIST" + "$ref": "definitions.json#/CUSTOM_PROPERTY_LIST" }, "Type": { "type": ["null", "string"], diff --git a/tap_exacttarget/schemas/list_sends.json b/tap_exacttarget/schemas/list_sends.json index a4ef842..4a67dfd 100644 --- a/tap_exacttarget/schemas/list_sends.json +++ b/tap_exacttarget/schemas/list_sends.json @@ -2,10 +2,10 @@ "type": "object", "properties": { "CreatedDate": { - "$ref": "definations.json#/CREATED_DATE_FIELD" + "$ref": "definitions.json#/CREATED_DATE_FIELD" }, "CustomerKey": { - "$ref": "definations.json#/CUSTOMER_KEY_FIELD" + "$ref": "definitions.json#/CUSTOMER_KEY_FIELD" }, "ExistingUndeliverables": { "type": ["null", "integer"], @@ -32,14 +32,14 @@ "description": "List associated with the send." }, "ID": { - "$ref": "definations.json#/ID_FIELD" + "$ref": "definitions.json#/ID_FIELD" }, "MissingAddresses": { "type": ["null", "integer"], "description": "Specifies number of missing addresses encountered within a send." }, "ModifiedDate": { - "$ref": "definations.json#/MODIFIED_DATE_FIELD" + "$ref": "definitions.json#/MODIFIED_DATE_FIELD" }, "NumberDelivered": { "type": ["null", "integer"], @@ -50,14 +50,14 @@ "description": "Number of emails actually sent as part of an email send. This number reflects all of the sent messages and may include bounced messages." }, "ObjectID": { - "$ref": "definations.json#/OBJECT_ID_FIELD" + "$ref": "definitions.json#/OBJECT_ID_FIELD" }, "OtherBounces": { "type": ["null", "integer"], "description": "Specifies number of Other-type bounces in a send." }, "PartnerProperties": { - "$ref": "definations.json#/CUSTOM_PROPERTY_LIST" + "$ref": "definitions.json#/CUSTOM_PROPERTY_LIST" }, "SendID": { "type": ["null", "integer"], diff --git a/tap_exacttarget/schemas/list_subscribers.json b/tap_exacttarget/schemas/list_subscribers.json index 77c2a4d..808f5c1 100644 --- a/tap_exacttarget/schemas/list_subscribers.json +++ b/tap_exacttarget/schemas/list_subscribers.json @@ -2,19 +2,19 @@ "type": "object", "properties": { "ID": { - "$ref": "definations.json#/ID_FIELD" + "$ref": "definitions.json#/ID_FIELD" }, "CreatedDate": { - "$ref": "definations.json#/CREATED_DATE_FIELD" + "$ref": "definitions.json#/CREATED_DATE_FIELD" }, "ModifiedDate": { - "$ref": "definations.json#/MODIFIED_DATE_FIELD" + "$ref": "definitions.json#/MODIFIED_DATE_FIELD" }, "ObjectID": { - "$ref": "definations.json#/OBJECT_ID_FIELD" + "$ref": "definitions.json#/OBJECT_ID_FIELD" }, "PartnerProperties": { - "$ref": "definations.json#/CUSTOM_PROPERTY_LIST" + "$ref": "definitions.json#/CUSTOM_PROPERTY_LIST" }, "ListID": { "type": ["null", "integer"], @@ -25,7 +25,7 @@ "description": "Defines status of object. Status of an address." }, "SubscriberKey": { - "$ref": "definations.json#/SUBSCRIBER_KEY_FIELD" + "$ref": "definitions.json#/SUBSCRIBER_KEY_FIELD" } } } \ No newline at end of file diff --git a/tap_exacttarget/schemas/lists.json b/tap_exacttarget/schemas/lists.json index f8adaf9..a7516c9 100644 --- a/tap_exacttarget/schemas/lists.json +++ b/tap_exacttarget/schemas/lists.json @@ -6,19 +6,19 @@ "description": "ID of the folder that an item is located in." }, "CreatedDate": { - "$ref": "definations.json#/CREATED_DATE_FIELD" + "$ref": "definitions.json#/CREATED_DATE_FIELD" }, "ID": { - "$ref": "definations.json#/ID_FIELD" + "$ref": "definitions.json#/ID_FIELD" }, "ModifiedDate": { - "$ref": "definations.json#/MODIFIED_DATE_FIELD" + "$ref": "definitions.json#/MODIFIED_DATE_FIELD" }, "ObjectID": { - "$ref": "definations.json#/OBJECT_ID_FIELD" + "$ref": "definitions.json#/OBJECT_ID_FIELD" }, "PartnerProperties": { - "$ref": "definations.json#/CUSTOM_PROPERTY_LIST" + "$ref": "definitions.json#/CUSTOM_PROPERTY_LIST" }, "ListClassification": { "type": ["null", "string"], @@ -29,7 +29,7 @@ "description": "Name of a specific list." }, "Description": { - "$ref": "definations.json#/DESCRIPTION_FIELD" + "$ref": "definitions.json#/DESCRIPTION_FIELD" }, "SendClassification": { "type": ["null", "string"], diff --git a/tap_exacttarget/schemas/sends.json b/tap_exacttarget/schemas/sends.json index 1a22577..30369fe 100644 --- a/tap_exacttarget/schemas/sends.json +++ b/tap_exacttarget/schemas/sends.json @@ -2,7 +2,7 @@ "type": "object", "properties": { "CreatedDate": { - "$ref": "definations.json#/CREATED_DATE_FIELD" + "$ref": "definitions.json#/CREATED_DATE_FIELD" }, "EmailID": { "type": ["null", "integer"], @@ -21,7 +21,7 @@ "description": "Specifies the default email message From Name. Deprecated for email send definitions and triggered send definitions." }, "ID": { - "$ref": "definations.json#/ID_FIELD" + "$ref": "definitions.json#/ID_FIELD" }, "IsAlwaysOn": { "type": ["null", "boolean"], @@ -32,10 +32,10 @@ "description": "Indicates whether the email is sent with Multipart/MIME enabled." }, "ModifiedDate": { - "$ref": "definations.json#/MODIFIED_DATE_FIELD" + "$ref": "definitions.json#/MODIFIED_DATE_FIELD" }, "PartnerProperties": { - "$ref": "definations.json#/CUSTOM_PROPERTY_LIST" + "$ref": "definitions.json#/CUSTOM_PROPERTY_LIST" }, "SendDate": { "type": ["null", "string"], diff --git a/tap_exacttarget/schemas/subscribers.json b/tap_exacttarget/schemas/subscribers.json index fe50e28..68225e4 100644 --- a/tap_exacttarget/schemas/subscribers.json +++ b/tap_exacttarget/schemas/subscribers.json @@ -14,13 +14,13 @@ } }, "Attributes": { - "$ref": "definations.json#/CUSTOM_PROPERTY_LIST" + "$ref": "definitions.json#/CUSTOM_PROPERTY_LIST" }, "CreatedDate": { - "$ref": "definations.json#/CREATED_DATE_FIELD" + "$ref": "definitions.json#/CREATED_DATE_FIELD" }, "CustomerKey": { - "$ref": "definations.json#/CUSTOMER_KEY_FIELD" + "$ref": "definitions.json#/CUSTOMER_KEY_FIELD" }, "EmailAddress": { "type": ["null", "string"], @@ -31,7 +31,7 @@ "description": "The format in which email should be sent" }, "ID": { - "$ref": "definations.json#/ID_FIELD" + "$ref": "definitions.json#/ID_FIELD" }, "ListIDs": { "type": "array", @@ -45,17 +45,17 @@ "description": "Contains the locale information for an Account. If no location is set, Locale defaults to en-US (English in United States)." }, "ModifiedDate": { - "$ref": "definations.json#/MODIFIED_DATE_FIELD" + "$ref": "definitions.json#/MODIFIED_DATE_FIELD" }, "ObjectID": { - "$ref": "definations.json#/OBJECT_ID_FIELD" + "$ref": "definitions.json#/OBJECT_ID_FIELD" }, "PartnerKey": { "type": ["null", "string"], "description": "Unique identifier provided by partner for an object, accessible only via API." }, "PartnerProperties": { - "$ref": "definations.json#/CUSTOM_PROPERTY_LIST" + "$ref": "definitions.json#/CUSTOM_PROPERTY_LIST" }, "PartnerType": { "type": ["null", "string"], @@ -78,7 +78,7 @@ "description": "Defines status of object. Status of an address." }, "SubscriberKey": { - "$ref": "definations.json#/SUBSCRIBER_KEY_FIELD" + "$ref": "definitions.json#/SUBSCRIBER_KEY_FIELD" }, "SubscriberTypeDefinition": { "type": ["null", "string"], diff --git a/tests/unittests/test_schema.py b/tests/unittests/test_schema.py index dc5a66c..c4d8307 100644 --- a/tests/unittests/test_schema.py +++ b/tests/unittests/test_schema.py @@ -48,7 +48,7 @@ def test_load_schema_references(self, mocked_load_json, mocked_get_abs_path): # make data for assertion expected_schema = {} - expected_schema["definations.json"] = field_schema + expected_schema["definitions.json"] = field_schema # verify if the 'schema' is same as 'field_schema' self.assertEquals(schema, expected_schema) From 041819810e2ea7b8ae68801e0788aac23c940e73 Mon Sep 17 00:00:00 2001 From: Harsh <80324346+harshpatel4crest@users.noreply.github.com> Date: Wed, 13 Oct 2021 11:44:41 +0530 Subject: [PATCH 31/36] TDL-14621: Add retry logic to requests and TDL-14622: Retry ConnectionResetErrors (#71) * added backoff for certain errors * resolve pylint * updated decorator location * added unittests * added comment * added comments --- tap_exacttarget/dao.py | 10 + tap_exacttarget/endpoints/campaigns.py | 3 +- tap_exacttarget/endpoints/content_areas.py | 3 +- tap_exacttarget/endpoints/data_extensions.py | 6 +- tap_exacttarget/endpoints/emails.py | 3 +- tap_exacttarget/endpoints/events.py | 3 +- tap_exacttarget/endpoints/folders.py | 3 +- tap_exacttarget/endpoints/list_sends.py | 3 +- tap_exacttarget/endpoints/list_subscribers.py | 4 +- tap_exacttarget/endpoints/lists.py | 3 +- tap_exacttarget/endpoints/sends.py | 3 +- tap_exacttarget/endpoints/subscribers.py | 3 +- tests/unittests/test_backoff.py | 810 ++++++++++++++++++ tests/unittests/test_pagination.py | 2 + tests/unittests/test_state.py | 5 + tests/unittests/test_util.py | 1 + 16 files changed, 854 insertions(+), 11 deletions(-) create mode 100644 tests/unittests/test_backoff.py diff --git a/tap_exacttarget/dao.py b/tap_exacttarget/dao.py index ec7224d..445d30d 100644 --- a/tap_exacttarget/dao.py +++ b/tap_exacttarget/dao.py @@ -1,3 +1,6 @@ +import backoff +import socket +import functools import singer from singer import metadata @@ -11,6 +14,13 @@ def _get_catalog_schema(catalog): return catalog.get('schema', {}).get('properties') +# decorator for retrying on error +def exacttarget_error_handling(fnc): + @backoff.on_exception(backoff.expo, (socket.timeout, ConnectionError), max_tries=5, factor=2) + @functools.wraps(fnc) + def wrapper(*args, **kwargs): + return fnc(*args, **kwargs) + return wrapper class DataAccessObject(): diff --git a/tap_exacttarget/endpoints/campaigns.py b/tap_exacttarget/endpoints/campaigns.py index 6c3d264..03ed37e 100644 --- a/tap_exacttarget/endpoints/campaigns.py +++ b/tap_exacttarget/endpoints/campaigns.py @@ -2,7 +2,7 @@ import singer from tap_exacttarget.client import request -from tap_exacttarget.dao import DataAccessObject +from tap_exacttarget.dao import (DataAccessObject, exacttarget_error_handling) from tap_exacttarget.schemas import with_properties LOGGER = singer.get_logger() @@ -37,6 +37,7 @@ class CampaignDataAccessObject(DataAccessObject): TABLE = 'campaign' KEY_PROPERTIES = ['id'] + @exacttarget_error_handling def sync_data(self): cursor = request( 'Campaign', diff --git a/tap_exacttarget/endpoints/content_areas.py b/tap_exacttarget/endpoints/content_areas.py index e44f6a9..1f5f621 100644 --- a/tap_exacttarget/endpoints/content_areas.py +++ b/tap_exacttarget/endpoints/content_areas.py @@ -2,7 +2,7 @@ import singer from tap_exacttarget.client import request -from tap_exacttarget.dao import DataAccessObject +from tap_exacttarget.dao import (DataAccessObject, exacttarget_error_handling) from tap_exacttarget.schemas import ID_FIELD, CUSTOM_PROPERTY_LIST, \ CREATED_DATE_FIELD, MODIFIED_DATE_FIELD, CUSTOMER_KEY_FIELD, \ OBJECT_ID_FIELD, with_properties @@ -105,6 +105,7 @@ class ContentAreaDataAccessObject(DataAccessObject): TABLE = 'content_area' KEY_PROPERTIES = ['ID'] + @exacttarget_error_handling def sync_data(self): table = self.__class__.TABLE selector = FuelSDK.ET_ContentArea diff --git a/tap_exacttarget/endpoints/data_extensions.py b/tap_exacttarget/endpoints/data_extensions.py index 8479585..d87a9e0 100644 --- a/tap_exacttarget/endpoints/data_extensions.py +++ b/tap_exacttarget/endpoints/data_extensions.py @@ -4,7 +4,7 @@ from funcy import set_in, update_in, merge from tap_exacttarget.client import request, request_from_cursor -from tap_exacttarget.dao import DataAccessObject +from tap_exacttarget.dao import (DataAccessObject, exacttarget_error_handling) from tap_exacttarget.pagination import get_date_page, before_now, \ increment_date from tap_exacttarget.state import incorporate, save_state, \ @@ -44,6 +44,7 @@ class DataExtensionDataAccessObject(DataAccessObject): def matches_catalog(cls, catalog): return 'data_extension.' in catalog.get('stream') + @exacttarget_error_handling def _get_extensions(self): result = request( 'DataExtension', @@ -88,6 +89,7 @@ def _get_extensions(self): return to_return + @exacttarget_error_handling def _get_fields(self, extensions): to_return = extensions.copy() @@ -184,6 +186,7 @@ def filter_keys_and_parse(self, obj): return to_return + @exacttarget_error_handling def _replicate(self, customer_key, keys, parent_category_id, table, partial=False, start=None, @@ -225,6 +228,7 @@ def _replicate(self, customer_key, keys, save_state(self.state) + @exacttarget_error_handling def sync_data(self): tap_stream_id = self.catalog.get('tap_stream_id') table = self.catalog.get('stream') diff --git a/tap_exacttarget/endpoints/emails.py b/tap_exacttarget/endpoints/emails.py index 4c0c089..cf7ec34 100644 --- a/tap_exacttarget/endpoints/emails.py +++ b/tap_exacttarget/endpoints/emails.py @@ -2,7 +2,7 @@ import singer from tap_exacttarget.client import request -from tap_exacttarget.dao import DataAccessObject +from tap_exacttarget.dao import (DataAccessObject, exacttarget_error_handling) from tap_exacttarget.schemas import ID_FIELD, CUSTOM_PROPERTY_LIST, \ CREATED_DATE_FIELD, CUSTOMER_KEY_FIELD, OBJECT_ID_FIELD, \ MODIFIED_DATE_FIELD, with_properties @@ -120,6 +120,7 @@ def parse_object(self, obj): return super(EmailDataAccessObject, self).parse_object(to_return) + @exacttarget_error_handling def sync_data(self): table = self.__class__.TABLE selector = FuelSDK.ET_Email diff --git a/tap_exacttarget/endpoints/events.py b/tap_exacttarget/endpoints/events.py index 8ed64d5..c7044f0 100644 --- a/tap_exacttarget/endpoints/events.py +++ b/tap_exacttarget/endpoints/events.py @@ -2,7 +2,7 @@ import singer from tap_exacttarget.client import request -from tap_exacttarget.dao import DataAccessObject +from tap_exacttarget.dao import (DataAccessObject, exacttarget_error_handling) from tap_exacttarget.pagination import get_date_page, before_now, \ increment_date from tap_exacttarget.schemas import SUBSCRIBER_KEY_FIELD, with_properties @@ -46,6 +46,7 @@ class EventDataAccessObject(DataAccessObject): TABLE = 'event' KEY_PROPERTIES = ['SendID', 'EventType', 'SubscriberKey', 'EventDate'] + @exacttarget_error_handling def sync_data(self): table = self.__class__.TABLE endpoints = { diff --git a/tap_exacttarget/endpoints/folders.py b/tap_exacttarget/endpoints/folders.py index e52247a..5e27adc 100644 --- a/tap_exacttarget/endpoints/folders.py +++ b/tap_exacttarget/endpoints/folders.py @@ -2,7 +2,7 @@ import singer from tap_exacttarget.client import request -from tap_exacttarget.dao import DataAccessObject +from tap_exacttarget.dao import (DataAccessObject, exacttarget_error_handling) from tap_exacttarget.schemas import ID_FIELD, CUSTOM_PROPERTY_LIST, \ CREATED_DATE_FIELD, CUSTOMER_KEY_FIELD, MODIFIED_DATE_FIELD, \ DESCRIPTION_FIELD, OBJECT_ID_FIELD, with_properties @@ -60,6 +60,7 @@ def parse_object(self, obj): return super(FolderDataAccessObject, self).parse_object(to_return) + @exacttarget_error_handling def sync_data(self): table = self.__class__.TABLE selector = FuelSDK.ET_Folder diff --git a/tap_exacttarget/endpoints/list_sends.py b/tap_exacttarget/endpoints/list_sends.py index 136697a..2ed4c82 100644 --- a/tap_exacttarget/endpoints/list_sends.py +++ b/tap_exacttarget/endpoints/list_sends.py @@ -2,7 +2,7 @@ import singer from tap_exacttarget.client import request -from tap_exacttarget.dao import DataAccessObject +from tap_exacttarget.dao import (DataAccessObject, exacttarget_error_handling) from tap_exacttarget.schemas import ID_FIELD, CUSTOM_PROPERTY_LIST, \ CREATED_DATE_FIELD, CUSTOMER_KEY_FIELD, OBJECT_ID_FIELD, \ MODIFIED_DATE_FIELD, with_properties @@ -102,6 +102,7 @@ def parse_object(self, obj): return super(ListSendDataAccessObject, self).parse_object(to_return) + @exacttarget_error_handling def sync_data(self): table = self.__class__.TABLE selector = FuelSDK.ET_ListSend diff --git a/tap_exacttarget/endpoints/list_subscribers.py b/tap_exacttarget/endpoints/list_subscribers.py index 5a7d23f..2d8025c 100644 --- a/tap_exacttarget/endpoints/list_subscribers.py +++ b/tap_exacttarget/endpoints/list_subscribers.py @@ -2,7 +2,7 @@ import singer from tap_exacttarget.client import request -from tap_exacttarget.dao import DataAccessObject +from tap_exacttarget.dao import (DataAccessObject, exacttarget_error_handling) from tap_exacttarget.endpoints.subscribers import SubscriberDataAccessObject from tap_exacttarget.pagination import get_date_page, before_now, \ increment_date @@ -63,6 +63,7 @@ def __init__(self, config, state, auth_stub, catalog): self.replicate_subscriber = False self.subscriber_catalog = None + @exacttarget_error_handling def _get_all_subscribers_list(self): """ Find the 'All Subscribers' list via the SOAP API, and return it. @@ -82,6 +83,7 @@ def _get_all_subscribers_list(self): return sudsobj_to_dict(lists[0]) + @exacttarget_error_handling def sync_data(self): table = self.__class__.TABLE subscriber_dao = SubscriberDataAccessObject( diff --git a/tap_exacttarget/endpoints/lists.py b/tap_exacttarget/endpoints/lists.py index b9f2dbf..263beb4 100644 --- a/tap_exacttarget/endpoints/lists.py +++ b/tap_exacttarget/endpoints/lists.py @@ -2,7 +2,7 @@ import singer from tap_exacttarget.client import request -from tap_exacttarget.dao import DataAccessObject +from tap_exacttarget.dao import (DataAccessObject, exacttarget_error_handling) from tap_exacttarget.schemas import ID_FIELD, CUSTOM_PROPERTY_LIST, \ CREATED_DATE_FIELD, OBJECT_ID_FIELD, DESCRIPTION_FIELD, \ MODIFIED_DATE_FIELD, with_properties @@ -50,6 +50,7 @@ class ListDataAccessObject(DataAccessObject): TABLE = 'list' KEY_PROPERTIES = ['ID'] + @exacttarget_error_handling def sync_data(self): table = self.__class__.TABLE selector = FuelSDK.ET_List diff --git a/tap_exacttarget/endpoints/sends.py b/tap_exacttarget/endpoints/sends.py index 5129197..93fb41e 100644 --- a/tap_exacttarget/endpoints/sends.py +++ b/tap_exacttarget/endpoints/sends.py @@ -2,7 +2,7 @@ import singer from tap_exacttarget.client import request -from tap_exacttarget.dao import DataAccessObject +from tap_exacttarget.dao import (DataAccessObject, exacttarget_error_handling) from tap_exacttarget.schemas import ID_FIELD, CUSTOM_PROPERTY_LIST, \ CREATED_DATE_FIELD, MODIFIED_DATE_FIELD, with_properties from tap_exacttarget.state import incorporate, save_state, \ @@ -89,6 +89,7 @@ def parse_object(self, obj): return super(SendDataAccessObject, self).parse_object(to_return) + @exacttarget_error_handling def sync_data(self): table = self.__class__.TABLE selector = FuelSDK.ET_Send diff --git a/tap_exacttarget/endpoints/subscribers.py b/tap_exacttarget/endpoints/subscribers.py index 9a846a2..eff94aa 100644 --- a/tap_exacttarget/endpoints/subscribers.py +++ b/tap_exacttarget/endpoints/subscribers.py @@ -2,7 +2,7 @@ import singer from tap_exacttarget.client import request -from tap_exacttarget.dao import DataAccessObject +from tap_exacttarget.dao import (DataAccessObject, exacttarget_error_handling) from tap_exacttarget.schemas import CUSTOM_PROPERTY_LIST, ID_FIELD, \ CREATED_DATE_FIELD, CUSTOMER_KEY_FIELD, OBJECT_ID_FIELD, \ SUBSCRIBER_KEY_FIELD, MODIFIED_DATE_FIELD, with_properties @@ -124,6 +124,7 @@ def parse_object(self, obj): def sync_data(self): pass + @exacttarget_error_handling def pull_subscribers_batch(self, subscriber_keys): if not subscriber_keys: return diff --git a/tests/unittests/test_backoff.py b/tests/unittests/test_backoff.py new file mode 100644 index 0000000..bf9daeb --- /dev/null +++ b/tests/unittests/test_backoff.py @@ -0,0 +1,810 @@ +import unittest +import socket +from unittest import mock +from tap_exacttarget.endpoints import ( + campaigns, content_areas, data_extensions, + emails, events, folders, list_sends, + list_subscribers, lists, sends, subscribers) + +# prepare mock response +class Mockresponse: + def __init__(self, status, json): + self.status = status + self.results = json + self.more_results = False + +# get mock response +def get_response(status, json={}): + return Mockresponse(status, json) + +@mock.patch("time.sleep") +class TestConnectionResetError(unittest.TestCase): + """ + Tests for verifying that the backoff is working as expected for 'ConnectionResetError' + """ + + @mock.patch("FuelSDK.rest.ET_GetSupport.get") + def test_connection_reset_error_occurred__content_area(self, mocked_get, mocked_sleep): + # mocked 'get' and raise error + mocked_get.side_effect = socket.error(104, 'Connection reset by peer') + # make the object of 'ContentAreaDataAccessObject' + obj = content_areas.ContentAreaDataAccessObject({}, {}, None, {}) + try: + # call sync + obj.sync_data() + except ConnectionError: + pass + # verify the code backed off and requested for 5 times + self.assertEquals(mocked_get.call_count, 5) + + @mock.patch("FuelSDK.rest.ET_GetSupport.get") + @mock.patch("singer.write_records") + def test_no_connection_reset_error_occurred__content_area(self, mocked_write_records, mocked_get, mocked_sleep): + # mock 'get' and return the dummy data + mocked_get.side_effect = [get_response(True, [{ + "CategoryID": 12345, + "ContentCheckStatus": "Not Checked", + "CreatedDate": "2021-01-01T00:00:00Z", + "EmailType": "Normal" + }])] + # make the object of 'ContentAreaDataAccessObject' + obj = content_areas.ContentAreaDataAccessObject({}, {}, None, {}) + # call sync + obj.sync_data() + # verify if 'singer.write_records' was called + # once as there is only one record + self.assertEquals(mocked_write_records.call_count, 1) + + @mock.patch("FuelSDK.rest.ET_GetSupportRest.get") + def test_connection_reset_error_occurred__campaign(self, mocked_get_rest, mocked_sleep): + # mock 'get' and raise error + mocked_get_rest.side_effect = socket.error(104, 'Connection reset by peer') + # # make the object of 'CampaignDataAccessObject' + obj = campaigns.CampaignDataAccessObject({}, {}, None, {}) + try: + # call sync + obj.sync_data() + except ConnectionError: + pass + # verify the code backed off and requested for 5 times + self.assertEquals(mocked_get_rest.call_count, 5) + + @mock.patch("FuelSDK.rest.ET_GetSupportRest.get") + @mock.patch("singer.write_records") + def test_no_connection_reset_error_occurred__campaign(self, mocked_write_records, mocked_get_rest, mocked_sleep): + # mock 'get' and return the dummy data + mocked_get_rest.side_effect = [get_response(True, [{ + "CategoryID": 12345, + "ContentCheckStatus": "Not Checked", + "CreatedDate": "2021-01-01T00:00:00Z", + "EmailType": "Normal" + }])] + # make the object of 'CampaignDataAccessObject' + obj = campaigns.CampaignDataAccessObject({}, {}, None, {}) + # call sync + obj.sync_data() + # verify if 'singer.write_records' was called + # once as there is only one record + self.assertEquals(mocked_write_records.call_count, 1) + + @mock.patch("FuelSDK.rest.ET_GetSupport.get") + def test_connection_reset_error_occurred__data_extension(self, mocked_get, mocked_sleep): + # mock 'get' and raise error + mocked_get.side_effect = socket.error(104, 'Connection reset by peer') + # make the object of 'DataExtensionDataAccessObject' + obj = data_extensions.DataExtensionDataAccessObject({"start_date": "2020-01-01T00:00:00Z"}, {}, None, { + # dummy catalog file + "stream": "data_extention.e1", + "tap_stream_id": "data_extention.e1", + "schema": { + "properties": { + "id": { + "type": [ + "null", + "string" + ] + }, + "CategoryID": { + "type": [ + "null", + "string" + ] + } + } + }}) + try: + # call sync + obj.sync_data() + except ConnectionError: + pass + # verify the code backed off and requested for 5 times + self.assertEquals(mocked_get.call_count, 5) + + @mock.patch("FuelSDK.rest.ET_GetSupport.get") + def test_connection_reset_error_occurred__data_extension_get_extensions(self, mocked_get, mocked_sleep): + # mock 'get' and raise error + mocked_get.side_effect = socket.error(104, 'Connection reset by peer') + # make the object of 'DataExtensionDataAccessObject' + obj = data_extensions.DataExtensionDataAccessObject({"start_date": "2020-01-01T00:00:00Z"}, {}, None, {}) + try: + # call function + obj._get_extensions() + except ConnectionError: + pass + # verify the code backed off and requested for 5 times + self.assertEquals(mocked_get.call_count, 5) + + @mock.patch("FuelSDK.objects.ET_DataExtension_Column.get") + def test_connection_reset_error_occurred__data_extension_get_fields(self, mocked_data_ext_column, mocked_sleep): + # mock 'get' and raise error + mocked_data_ext_column.side_effect = socket.error(104, 'Connection reset by peer') + # make the object of 'DataExtensionDataAccessObject' + obj = data_extensions.DataExtensionDataAccessObject({"start_date": "2020-01-01T00:00:00Z"}, {}, None, {}) + try: + # call function + obj._get_fields([]) + except ConnectionError: + pass + # verify the code backed off and requested for 5 times + self.assertEquals(mocked_data_ext_column.call_count, 5) + + @mock.patch("FuelSDK.objects.ET_DataExtension_Row.get") + def test_connection_reset_error_occurred__data_extension_replicate(self, mocked_data_ext_column, mocked_sleep): + # mock 'get' and raise error + mocked_data_ext_column.side_effect = socket.error(104, 'Connection reset by peer') + # make the object of 'DataExtensionDataAccessObject' + obj = data_extensions.DataExtensionDataAccessObject({"start_date": "2020-01-01T00:00:00Z"}, {}, None, {}) + try: + # call function + obj._replicate(None, None, None, None) + except ConnectionError: + pass + # verify the code backed off and requested for 5 times + self.assertEquals(mocked_data_ext_column.call_count, 5) + + @mock.patch("FuelSDK.rest.ET_GetSupport.get") + def test_connection_reset_error_occurred__email(self, mocked_get, mocked_sleep): + # mock 'get' and raise error + mocked_get.side_effect = socket.error(104, 'Connection reset by peer') + # make the object of 'EmailDataAccessObject' + obj = emails.EmailDataAccessObject({}, {}, None, {}) + try: + # call function + obj.sync_data() + except ConnectionError: + pass + # verify the code backed off and requested for 5 times + self.assertEquals(mocked_get.call_count, 5) + + @mock.patch("FuelSDK.rest.ET_GetSupport.get") + @mock.patch("singer.write_records") + def test_no_connection_reset_error_occurred__email(self, mocked_write_records, mocked_get, mocked_sleep): + # mock 'get' and return the dummy data + mocked_get.side_effect = [get_response(True, [{ + "CategoryID": 12345, + "ContentCheckStatus": "Not Checked", + "CreatedDate": "2021-01-01T00:00:00Z", + "EmailType": "Normal" + }])] + # make the object of 'EmailDataAccessObject' + obj = emails.EmailDataAccessObject({}, {}, None, {}) + # call sync + obj.sync_data() + # verify if 'singer.write_records' was called + # once as there is only one record + self.assertEquals(mocked_write_records.call_count, 1) + + @mock.patch("FuelSDK.rest.ET_GetSupport.get") + def test_connection_reset_error_occurred__events(self, mocked_get, mocked_sleep): + # mock 'get' and raise error + mocked_get.side_effect = socket.error(104, 'Connection reset by peer') + # make the object of 'EventDataAccessObject' + obj = events.EventDataAccessObject({"start_date": "2020-01-01T00:00:00Z"}, {}, None, {}) + try: + # call sync + obj.sync_data() + except ConnectionError: + pass + # verify the code backed off and requested for 5 times + self.assertEquals(mocked_get.call_count, 5) + + @mock.patch("FuelSDK.rest.ET_GetSupport.get") + def test_connection_reset_error_occurred__folder(self, mocked_get, mocked_sleep): + # mock 'get' and raise error + mocked_get.side_effect = socket.error(104, 'Connection reset by peer') + # make the object of 'FolderDataAccessObject' + obj = folders.FolderDataAccessObject({}, {}, None, {}) + try: + # call sync + obj.sync_data() + except ConnectionError: + pass + # verify the code backed off and requested for 5 times + self.assertEquals(mocked_get.call_count, 5) + + @mock.patch("FuelSDK.rest.ET_GetSupport.get") + @mock.patch("singer.write_records") + def test_no_connection_reset_error_occurred__folder(self, mocked_write_records, mocked_get, mocked_sleep): + # mock 'get' and return the dummy data + mocked_get.side_effect = [get_response(True, [{ + "CategoryID": 12345, + "ContentCheckStatus": "Not Checked", + "CreatedDate": "2021-01-01T00:00:00Z", + "EmailType": "Normal" + }])] + # make the object of 'FolderDataAccessObject' + obj = folders.FolderDataAccessObject({}, {}, None, {}) + # call sync + obj.sync_data() + # verify if 'singer.write_records' was called + # once as there is only one record + self.assertEquals(mocked_write_records.call_count, 1) + + @mock.patch("FuelSDK.rest.ET_GetSupport.get") + def test_connection_reset_error_occurred__list_send(self, mocked_get, mocked_sleep): + # mock 'get' and raise error + mocked_get.side_effect = socket.error(104, 'Connection reset by peer') + # make the object of 'ListSendDataAccessObject' + obj = list_sends.ListSendDataAccessObject({}, {}, None, {}) + try: + # call sync + obj.sync_data() + except ConnectionError: + pass + # verify the code backed off and requested for 5 times + self.assertEquals(mocked_get.call_count, 5) + + @mock.patch("FuelSDK.rest.ET_GetSupport.get") + @mock.patch("singer.write_records") + def test_no_connection_reset_error_occurred__list_send(self, mocked_write_records, mocked_get, mocked_sleep): + # mock 'get' and return the dummy data + mocked_get.side_effect = [get_response(True, [{ + "CategoryID": 12345, + "ContentCheckStatus": "Not Checked", + "CreatedDate": "2021-01-01T00:00:00Z", + "EmailType": "Normal" + }])] + # make the object of 'ListSendDataAccessObject' + obj = list_sends.ListSendDataAccessObject({}, {}, None, {}) + # call sync + obj.sync_data() + # verify if 'singer.write_records' was called + # once as there is only one record + self.assertEquals(mocked_write_records.call_count, 1) + + @mock.patch("tap_exacttarget.endpoints.list_subscribers.ListSubscriberDataAccessObject._get_all_subscribers_list") + def test_connection_reset_error_occurred__list_subscriber(self, mocked_get, mocked_sleep): + # mock 'get' and raise error + mocked_get.side_effect = socket.error(104, 'Connection reset by peer') + # make the object of 'ListSubscriberDataAccessObject' + obj = list_subscribers.ListSubscriberDataAccessObject({"start_date": "2020-01-01T00:00:00Z"}, {}, None, {}) + try: + # call sync + obj.sync_data() + except ConnectionError: + pass + # verify the code backed off and requested for 5 times + self.assertEquals(mocked_get.call_count, 5) + + @mock.patch("FuelSDK.rest.ET_GetSupport.get") + def test_connection_reset_error_occurred__list_subscriber__get_all_subscribers_list(self, mocked_get, mocked_sleep): + # mock 'get' and raise error + mocked_get.side_effect = socket.error(104, 'Connection reset by peer') + # make the object of 'ListSubscriberDataAccessObject' + obj = list_subscribers.ListSubscriberDataAccessObject({"start_date": "2020-01-01T00:00:00Z"}, {}, None, {}) + try: + # call function + obj._get_all_subscribers_list() + except ConnectionError: + pass + # verify the code backed off and requested for 5 times + self.assertEquals(mocked_get.call_count, 5) + + @mock.patch("FuelSDK.rest.ET_GetSupport.get") + def test_no_connection_reset_error_occurred__list_subscriber__get_all_subscribers_list(self, mocked_get, mocked_sleep): + json = { + "CategoryID": 12345, + "ContentCheckStatus": "Not Checked", + "CreatedDate": "2021-01-01T00:00:00Z", + "EmailType": "Normal" + } + # mock 'get' and return the dummy data + mocked_get.side_effect = [get_response(True, [json])] + # make the object of 'ListSubscriberDataAccessObject' + obj = list_subscribers.ListSubscriberDataAccessObject({}, {}, None, {}) + # call function + actual = obj._get_all_subscribers_list() + # verify if the record was returned as response + self.assertEquals(actual, json) + + @mock.patch("FuelSDK.rest.ET_GetSupport.get") + def test_connection_reset_error_occurred__list(self, mocked_get, mocked_sleep): + # mock 'get' and raise error + mocked_get.side_effect = socket.error(104, 'Connection reset by peer') + # make the object of 'ListDataAccessObject' + obj = lists.ListDataAccessObject({}, {}, None, {}) + try: + # call sync + obj.sync_data() + except ConnectionError: + pass + # verify the code backed off and requested for 5 times + self.assertEquals(mocked_get.call_count, 5) + + @mock.patch("FuelSDK.rest.ET_GetSupport.get") + @mock.patch("singer.write_records") + def test_no_connection_reset_error_occurred__list(self, mocked_write_records, mocked_get, mocked_sleep): + # mock 'get' and return the dummy data + mocked_get.side_effect = [get_response(True, [{ + "CategoryID": 12345, + "ContentCheckStatus": "Not Checked", + "CreatedDate": "2021-01-01T00:00:00Z", + "EmailType": "Normal" + }])] + # make the object of 'ListDataAccessObject' + obj = lists.ListDataAccessObject({}, {}, None, {}) + # call sync + obj.sync_data() + # verify if 'singer.write_records' was called + # once as there is only one record + self.assertEquals(mocked_write_records.call_count, 1) + + @mock.patch("FuelSDK.rest.ET_GetSupport.get") + def test_connection_reset_error_occurred__sends(self, mocked_get, mocked_sleep): + # mock 'get' and raise error + mocked_get.side_effect = socket.error(104, 'Connection reset by peer') + # make the object of 'SendDataAccessObject' + obj = sends.SendDataAccessObject({}, {}, None, {}) + try: + # call sync + obj.sync_data() + except ConnectionError: + pass + # verify the code backed off and requested for 5 times + self.assertEquals(mocked_get.call_count, 5) + + @mock.patch("FuelSDK.rest.ET_GetSupport.get") + @mock.patch("singer.write_records") + def test_no_connection_reset_error_occurred__sends(self, mocked_write_records, mocked_get, mocked_sleep): + # mock 'get' and return the dummy data + mocked_get.side_effect = [get_response(True, [{ + "CategoryID": 12345, + "ContentCheckStatus": "Not Checked", + "CreatedDate": "2021-01-01T00:00:00Z", + "EmailType": "Normal" + }])] + # make the object of 'SendDataAccessObject' + obj = sends.SendDataAccessObject({}, {}, None, {}) + # call sync + obj.sync_data() + # verify if 'singer.write_records' was called + # once as there is only one record + self.assertEquals(mocked_write_records.call_count, 1) + + @mock.patch("FuelSDK.rest.ET_GetSupport.get") + def test_connection_reset_error_occurred__subscriber(self, mocked_get, mocked_sleep): + # mock 'get' and raise error + mocked_get.side_effect = socket.error(104, 'Connection reset by peer') + # make the object of 'SubscriberDataAccessObject' + obj = subscribers.SubscriberDataAccessObject({}, {}, None, {}) + try: + # call function + obj.pull_subscribers_batch(['sub1']) + except ConnectionError: + pass + # verify the code backed off and requested for 5 times + self.assertEquals(mocked_get.call_count, 5) + + @mock.patch("FuelSDK.rest.ET_GetSupport.get") + @mock.patch("singer.write_records") + def test_no_connection_reset_error_occurred__subscriber(self, mocked_write_records, mocked_get, mocked_sleep): + # mock 'get' and return the dummy data + mocked_get.side_effect = [get_response(True, [{ + "CategoryID": 12345, + "ContentCheckStatus": "Not Checked", + "CreatedDate": "2021-01-01T00:00:00Z", + "EmailType": "Normal" + }])] + # make the object of 'SubscriberDataAccessObject' + obj = subscribers.SubscriberDataAccessObject({}, {}, None, {}) + # call function + obj.pull_subscribers_batch(['sub1']) + # verify if 'singer.write_records' was called + # once as there is only one record + self.assertEquals(mocked_write_records.call_count, 1) + +@mock.patch("time.sleep") +class TestSocketTimeoutError(unittest.TestCase): + """ + Tests for verifying that the backoff is working as expected for 'socket.timeout' error + """ + + @mock.patch("FuelSDK.rest.ET_GetSupport.get") + def test_socket_timeout_error_occurred__content_area(self, mocked_get, mocked_sleep): + # mock 'get' and raise error + mocked_get.side_effect = socket.timeout("The read operation timed out") + # make the object of 'ContentAreaDataAccessObject' + obj = content_areas.ContentAreaDataAccessObject({}, {}, None, {}) + try: + # call sync + obj.sync_data() + except socket.timeout: + pass + # verify the code backed off and requested for 5 times + self.assertEquals(mocked_get.call_count, 5) + + @mock.patch("FuelSDK.rest.ET_GetSupport.get") + @mock.patch("singer.write_records") + def test_no_socket_timeout_error_occurred__content_area(self, mocked_write_records, mocked_get, mocked_sleep): + # mock 'get' and return the dummy data + mocked_get.side_effect = [get_response(True, [{ + "CategoryID": 12345, + "ContentCheckStatus": "Not Checked", + "CreatedDate": "2021-01-01T00:00:00Z", + "EmailType": "Normal" + }])] + # make the object of 'ContentAreaDataAccessObject' + obj = content_areas.ContentAreaDataAccessObject({}, {}, None, {}) + # call sync + obj.sync_data() + # verify if 'singer.write_records' was called + # once as there is only one record + self.assertEquals(mocked_write_records.call_count, 1) + + @mock.patch("FuelSDK.rest.ET_GetSupportRest.get") + def test_socket_timeout_error_occurred__campaign(self, mocked_get_rest, mocked_sleep): + # mock 'get' and raise error + mocked_get_rest.side_effect = socket.timeout("The read operation timed out") + # make the object of 'CampaignDataAccessObject' + obj = campaigns.CampaignDataAccessObject({}, {}, None, {}) + try: + # call sync + obj.sync_data() + except socket.timeout: + pass + # verify the code backed off and requested for 5 times + self.assertEquals(mocked_get_rest.call_count, 5) + + @mock.patch("FuelSDK.rest.ET_GetSupportRest.get") + @mock.patch("singer.write_records") + def test_no_socket_timeout_error_occurred__campaign(self, mocked_write_records, mocked_get_rest, mocked_sleep): + # mock 'get' and return the dummy data + mocked_get_rest.side_effect = [get_response(True, [{ + "CategoryID": 12345, + "ContentCheckStatus": "Not Checked", + "CreatedDate": "2021-01-01T00:00:00Z", + "EmailType": "Normal" + }])] + # make the object of 'CampaignDataAccessObject' + obj = campaigns.CampaignDataAccessObject({}, {}, None, {}) + # call sync + obj.sync_data() + # verify if 'singer.write_records' was called + # once as there is only one record + self.assertEquals(mocked_write_records.call_count, 1) + + @mock.patch("FuelSDK.rest.ET_GetSupport.get") + def test_socket_timeout_error_occurred__data_extension(self, mocked_get, mocked_sleep): + # mock 'get' and raise error + mocked_get.side_effect = socket.timeout("The read operation timed out") + # make the object of 'DataExtensionDataAccessObject' + obj = data_extensions.DataExtensionDataAccessObject({"start_date": "2020-01-01T00:00:00Z"}, {}, None, { + # dummy catalog file + "stream": "data_extention.e1", + "tap_stream_id": "data_extention.e1", + "schema": { + "properties": { + "id": { + "type": [ + "null", + "string" + ] + }, + "CategoryID": { + "type": [ + "null", + "string" + ] + } + } + }}) + try: + # call sync + obj.sync_data() + except socket.timeout: + pass + # verify the code backed off and requested for 5 times + self.assertEquals(mocked_get.call_count, 5) + + @mock.patch("FuelSDK.rest.ET_GetSupport.get") + def test_socket_timeout_error_occurred__data_extension_get_extensions(self, mocked_get, mocked_sleep): + # mock 'get' and raise error + mocked_get.side_effect = socket.timeout("The read operation timed out") + # make the object of 'DataExtensionDataAccessObject' + obj = data_extensions.DataExtensionDataAccessObject({"start_date": "2020-01-01T00:00:00Z"}, {}, None, {}) + try: + # call function + obj._get_extensions() + except socket.timeout: + pass + # verify the code backed off and requested for 5 times + self.assertEquals(mocked_get.call_count, 5) + + @mock.patch("FuelSDK.objects.ET_DataExtension_Column.get") + def test_socket_timeout_error_occurred__data_extension_get_fields(self, mocked_data_ext_column, mocked_sleep): + # mock 'get' and raise error + mocked_data_ext_column.side_effect = socket.timeout("The read operation timed out") + # make the object of 'DataExtensionDataAccessObject' + obj = data_extensions.DataExtensionDataAccessObject({"start_date": "2020-01-01T00:00:00Z"}, {}, None, {}) + try: + # call function + obj._get_fields([]) + except socket.timeout: + pass + # verify the code backed off and requested for 5 times + self.assertEquals(mocked_data_ext_column.call_count, 5) + + @mock.patch("FuelSDK.objects.ET_DataExtension_Row.get") + def test_socket_timeout_error_occurred__data_extension_replicate(self, mocked_data_ext_column, mocked_sleep): + # mock 'get' and raise error + mocked_data_ext_column.side_effect = socket.timeout("The read operation timed out") + # make the object of 'DataExtensionDataAccessObject' + obj = data_extensions.DataExtensionDataAccessObject({"start_date": "2020-01-01T00:00:00Z"}, {}, None, {}) + try: + # call function + obj._replicate(None, None, None, None) + except socket.timeout: + pass + # verify the code backed off and requested for 5 times + self.assertEquals(mocked_data_ext_column.call_count, 5) + + @mock.patch("FuelSDK.rest.ET_GetSupport.get") + def test_socket_timeout_error_occurred__email(self, mocked_get, mocked_sleep): + # mock 'get' and raise error + mocked_get.side_effect = socket.timeout("The read operation timed out") + # # make the object of 'EmailDataAccessObject' + obj = emails.EmailDataAccessObject({}, {}, None, {}) + try: + # call sync + obj.sync_data() + except socket.timeout: + pass + # verify the code backed off and requested for 5 times + self.assertEquals(mocked_get.call_count, 5) + + @mock.patch("FuelSDK.rest.ET_GetSupport.get") + @mock.patch("singer.write_records") + def test_no_socket_timeout_error_occurred__email(self, mocked_write_records, mocked_get, mocked_sleep): + # mock 'get' and return the dummy data + mocked_get.side_effect = [get_response(True, [{ + "CategoryID": 12345, + "ContentCheckStatus": "Not Checked", + "CreatedDate": "2021-01-01T00:00:00Z", + "EmailType": "Normal" + }])] + # make the object of 'EmailDataAccessObject' + obj = emails.EmailDataAccessObject({}, {}, None, {}) + # call sync + obj.sync_data() + # verify if 'singer.write_records' was called + # once as there is only one record + self.assertEquals(mocked_write_records.call_count, 1) + + @mock.patch("FuelSDK.rest.ET_GetSupport.get") + def test_socket_timeout_error_occurred__events(self, mocked_get, mocked_sleep): + # mock 'get' and raise error + mocked_get.side_effect = socket.timeout("The read operation timed out") + # make the object of 'EventDataAccessObject' + obj = events.EventDataAccessObject({"start_date": "2020-01-01T00:00:00Z"}, {}, None, {}) + try: + # call sync + obj.sync_data() + except socket.timeout: + pass + # verify the code backed off and requested for 5 times + self.assertEquals(mocked_get.call_count, 5) + + @mock.patch("FuelSDK.rest.ET_GetSupport.get") + def test_socket_timeout_error_occurred__folder(self, mocked_get, mocked_sleep): + # mock 'get' and raise error + mocked_get.side_effect = socket.timeout("The read operation timed out") + # make the object of 'FolderDataAccessObject' + obj = folders.FolderDataAccessObject({}, {}, None, {}) + try: + # call sync + obj.sync_data() + except socket.timeout: + pass + # verify the code backed off and requested for 5 times + self.assertEquals(mocked_get.call_count, 5) + + @mock.patch("FuelSDK.rest.ET_GetSupport.get") + @mock.patch("singer.write_records") + def test_no_socket_timeout_error_occurred__folder(self, mocked_write_records, mocked_get, mocked_sleep): + # mock 'get' and return the dummy data + mocked_get.side_effect = [get_response(True, [{ + "CategoryID": 12345, + "ContentCheckStatus": "Not Checked", + "CreatedDate": "2021-01-01T00:00:00Z", + "EmailType": "Normal" + }])] + # make the object of 'FolderDataAccessObject' + obj = folders.FolderDataAccessObject({}, {}, None, {}) + # call sync + obj.sync_data() + # verify if 'singer.write_records' was called + # once as there is only one record + self.assertEquals(mocked_write_records.call_count, 1) + + @mock.patch("FuelSDK.rest.ET_GetSupport.get") + def test_socket_timeout_error_occurred__list_send(self, mocked_get, mocked_sleep): + # mock 'get' and raise error + mocked_get.side_effect = socket.timeout("The read operation timed out") + # make the object of 'ListSendDataAccessObject' + obj = list_sends.ListSendDataAccessObject({}, {}, None, {}) + try: + # call sync + obj.sync_data() + except socket.timeout: + pass + # verify the code backed off and requested for 5 times + self.assertEquals(mocked_get.call_count, 5) + + @mock.patch("FuelSDK.rest.ET_GetSupport.get") + @mock.patch("singer.write_records") + def test_no_socket_timeout_error_occurred__list_send(self, mocked_write_records, mocked_get, mocked_sleep): + # mock 'get' and return the dummy data + mocked_get.side_effect = [get_response(True, [{ + "CategoryID": 12345, + "ContentCheckStatus": "Not Checked", + "CreatedDate": "2021-01-01T00:00:00Z", + "EmailType": "Normal" + }])] + # make the object of 'ListSendDataAccessObject' + obj = list_sends.ListSendDataAccessObject({}, {}, None, {}) + # call sync + obj.sync_data() + # verify if 'singer.write_records' was called + # once as there is only one record + self.assertEquals(mocked_write_records.call_count, 1) + + @mock.patch("tap_exacttarget.endpoints.list_subscribers.ListSubscriberDataAccessObject._get_all_subscribers_list") + def test_socket_timeout_error_occurred__list_subscriber(self, mocked_get, mocked_sleep): + # mock 'get' and raise error + mocked_get.side_effect = socket.timeout("The read operation timed out") + # make the object of 'ListSubscriberDataAccessObject' + obj = list_subscribers.ListSubscriberDataAccessObject({"start_date": "2020-01-01T00:00:00Z"}, {}, None, {}) + try: + # call sync + obj.sync_data() + except socket.timeout: + pass + # verify the code backed off and requested for 5 times + self.assertEquals(mocked_get.call_count, 5) + + @mock.patch("FuelSDK.rest.ET_GetSupport.get") + def test_socket_timeout_error_occurred__list_subscriber__get_all_subscribers_list(self, mocked_get, mocked_sleep): + # mock 'get' and raise error + mocked_get.side_effect = socket.timeout("The read operation timed out") + # make the object of 'ListSubscriberDataAccessObject' + obj = list_subscribers.ListSubscriberDataAccessObject({"start_date": "2020-01-01T00:00:00Z"}, {}, None, {}) + try: + # call function + obj._get_all_subscribers_list() + except socket.timeout: + pass + # verify the code backed off and requested for 5 times + self.assertEquals(mocked_get.call_count, 5) + + @mock.patch("FuelSDK.rest.ET_GetSupport.get") + def test_no_socket_timeout_error_occurred__list_subscriber__get_all_subscribers_list(self, mocked_get, mocked_sleep): + json = { + "CategoryID": 12345, + "ContentCheckStatus": "Not Checked", + "CreatedDate": "2021-01-01T00:00:00Z", + "EmailType": "Normal" + } + # mock 'get' and return the dummy data + mocked_get.side_effect = [get_response(True, [json])] + # make the object of 'ListSubscriberDataAccessObject' + obj = list_subscribers.ListSubscriberDataAccessObject({}, {}, None, {}) + # call function + actual = obj._get_all_subscribers_list() + # verify if the record was returned as response + self.assertEquals(actual, json) + + @mock.patch("FuelSDK.rest.ET_GetSupport.get") + def test_socket_timeout_error_occurred__list(self, mocked_get, mocked_sleep): + # mock 'get' and raise error + mocked_get.side_effect = socket.timeout("The read operation timed out") + # make the object of 'ListDataAccessObject' + obj = lists.ListDataAccessObject({}, {}, None, {}) + try: + # call sync + obj.sync_data() + except socket.timeout: + pass + # verify the code backed off and requested for 5 times + self.assertEquals(mocked_get.call_count, 5) + + @mock.patch("FuelSDK.rest.ET_GetSupport.get") + @mock.patch("singer.write_records") + def test_no_socket_timeout_error_occurred__list(self, mocked_write_records, mocked_get, mocked_sleep): + # mock 'get' and return the dummy data + mocked_get.side_effect = [get_response(True, [{ + "CategoryID": 12345, + "ContentCheckStatus": "Not Checked", + "CreatedDate": "2021-01-01T00:00:00Z", + "EmailType": "Normal" + }])] + # make the object of 'ListDataAccessObject' + obj = lists.ListDataAccessObject({}, {}, None, {}) + # call sync + obj.sync_data() + # verify if 'singer.write_records' was called + # once as there is only one record + self.assertEquals(mocked_write_records.call_count, 1) + + @mock.patch("FuelSDK.rest.ET_GetSupport.get") + def test_socket_timeout_error_occurred__sends(self, mocked_get, mocked_sleep): + # mock 'get' and raise error + mocked_get.side_effect = socket.timeout("The read operation timed out") + # make the object of 'SendDataAccessObject' + obj = sends.SendDataAccessObject({}, {}, None, {}) + try: + # call sync + obj.sync_data() + except socket.timeout: + pass + # verify the code backed off and requested for 5 times + self.assertEquals(mocked_get.call_count, 5) + + @mock.patch("FuelSDK.rest.ET_GetSupport.get") + @mock.patch("singer.write_records") + def test_no_socket_timeout_error_occurred__sends(self, mocked_write_records, mocked_get, mocked_sleep): + # mock 'get' and return the dummy data + mocked_get.side_effect = [get_response(True, [{ + "CategoryID": 12345, + "ContentCheckStatus": "Not Checked", + "CreatedDate": "2021-01-01T00:00:00Z", + "EmailType": "Normal" + }])] + # make the object of 'SendDataAccessObject' + obj = sends.SendDataAccessObject({}, {}, None, {}) + # call sync + obj.sync_data() + # verify if 'singer.write_records' was called + # once as there is only one record + self.assertEquals(mocked_write_records.call_count, 1) + + @mock.patch("FuelSDK.rest.ET_GetSupport.get") + def test_socket_timeout_error_occurred__subscriber(self, mocked_get, mocked_sleep): + # mock 'get' and raise error + mocked_get.side_effect = socket.timeout("The read operation timed out") + # make the object of 'SubscriberDataAccessObject' + obj = subscribers.SubscriberDataAccessObject({}, {}, None, {}) + try: + # call function + obj.pull_subscribers_batch(['sub1']) + except socket.timeout: + pass + # verify the code backed off and requested for 5 times + self.assertEquals(mocked_get.call_count, 5) + + @mock.patch("FuelSDK.rest.ET_GetSupport.get") + @mock.patch("singer.write_records") + def test_no_socket_timeout_error_occurred__subscriber(self, mocked_write_records, mocked_get, mocked_sleep): + # mock 'get' and return the dummy data + mocked_get.side_effect = [get_response(True, [{ + "CategoryID": 12345, + "ContentCheckStatus": "Not Checked", + "CreatedDate": "2021-01-01T00:00:00Z", + "EmailType": "Normal" + }])] + # make the object of 'SubscriberDataAccessObject' + obj = subscribers.SubscriberDataAccessObject({}, {}, None, {}) + # call function + obj.pull_subscribers_batch(['sub1']) + # verify if 'singer.write_records' was called + # once as there is only one record + self.assertEquals(mocked_write_records.call_count, 1) diff --git a/tests/unittests/test_pagination.py b/tests/unittests/test_pagination.py index 0784f64..f76c139 100644 --- a/tests/unittests/test_pagination.py +++ b/tests/unittests/test_pagination.py @@ -6,9 +6,11 @@ class TestPagination(unittest.TestCase): def test_increment_date(self): + # verify that if there is no 'unit' mentioned, then the date should be incremented by '1 day' self.assertEqual( increment_date("2015-09-28T10:05:53Z"), "2015-09-29T10:05:53Z") + # verify that the 'increment_date' correctly increments the date by sepcified 'unit'(here: 1 hour) self.assertEqual( increment_date("2015-09-28T10:05:53Z", {'hours': 1}), "2015-09-28T11:05:53Z") diff --git a/tests/unittests/test_state.py b/tests/unittests/test_state.py index c05be93..8250be6 100644 --- a/tests/unittests/test_state.py +++ b/tests/unittests/test_state.py @@ -6,6 +6,7 @@ class TestState(unittest.TestCase): def test_incorporate(self): + # verify that the state file is updated if there is no previous bookmark present self.assertEqual( incorporate({}, 'table', 'modifieddate', '2017-11-01'), { @@ -17,6 +18,8 @@ def test_incorporate(self): } }) + # verify that the bookmark value is updated as the previous + # bookmark value is smaller than the current record's value self.assertEqual( incorporate({ 'bookmarks': { @@ -35,6 +38,8 @@ def test_incorporate(self): } }) + # verify that the bookmark value is not updated as the previous + # bookmark value is greater than the current record's value self.assertEqual( incorporate({ 'bookmarks': { diff --git a/tests/unittests/test_util.py b/tests/unittests/test_util.py index 4ffbf66..224e9fe 100644 --- a/tests/unittests/test_util.py +++ b/tests/unittests/test_util.py @@ -6,6 +6,7 @@ class TestPartitionAll(unittest.TestCase): def test__partition_all(self): + # verify that the 'partion_all' correctly divides the records into the specified chunk size self.assertEqual( list(partition_all([1, 2, 3, 4, 5, 6, 7], 3)), [[1, 2, 3], [4, 5, 6], [7]]) From a947b16575f3c3a77b1a81db5ed711c71cf5e7ed Mon Sep 17 00:00:00 2001 From: Harsh <80324346+harshpatel4crest@users.noreply.github.com> Date: Wed, 13 Oct 2021 11:50:49 +0530 Subject: [PATCH 32/36] TDL-14890: Print user friendly error messages (#73) * updated error message when generating auth_stub * made changes according to the comments * updated the code acording to the comments * updated the tap tester image * updated pylint and astroid to latest version * updated the code as, on updating tap tester image it was throwing cascading errors * updated config.yml file * updated the start date for integration tests as per the params * removed scenario as new tap-tester version does not support it * updated start date in the base file test --- .circleci/config.yml | 12 +- setup.py | 4 +- tap_exacttarget/__init__.py | 8 +- tap_exacttarget/client.py | 6 +- tap_exacttarget/endpoints/emails.py | 2 +- tap_exacttarget/endpoints/folders.py | 2 +- tap_exacttarget/endpoints/list_sends.py | 2 +- tap_exacttarget/endpoints/list_subscribers.py | 2 +- tap_exacttarget/endpoints/sends.py | 2 +- tap_exacttarget/endpoints/subscribers.py | 2 +- tap_exacttarget/fuel_overrides.py | 2 +- tests/test_exacttarget_base.py | 4 +- tests/test_exacttarget_discover.py | 6 +- tests/unittests/test_error_messages.py | 194 ++++++++++++++++++ 14 files changed, 219 insertions(+), 29 deletions(-) create mode 100644 tests/unittests/test_error_messages.py diff --git a/.circleci/config.yml b/.circleci/config.yml index e7b208b..b1568b7 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -2,7 +2,7 @@ version: 2 jobs: build: docker: - - image: 218546966473.dkr.ecr.us-east-1.amazonaws.com/circle-ci:tap-tester + - image: 218546966473.dkr.ecr.us-east-1.amazonaws.com/circle-ci:stitch-tap-tester steps: - checkout - run: @@ -26,16 +26,10 @@ jobs: - run: name: 'Integration Tests' command: | - aws s3 cp s3://com-stitchdata-dev-deployment-assets/environments/tap-tester/sandbox dev_env.sh + aws s3 cp s3://com-stitchdata-dev-deployment-assets/environments/tap-tester/tap_tester_sandbox dev_env.sh source dev_env.sh source /usr/local/share/virtualenvs/tap-tester/bin/activate - run-test --tap=tap-exacttarget \ - --target=target-stitch \ - --orchestrator=stitch-orchestrator \ - --email=harrison+sandboxtest@stitchdata.com \ - --password=$SANDBOX_PASSWORD \ - --client-id=50 \ - tests + run-test --tap=tap-exacttarget tests workflows: version: 2 commit: diff --git a/setup.py b/setup.py index 07c1e2d..df1e00c 100644 --- a/setup.py +++ b/setup.py @@ -20,8 +20,8 @@ extras_require={ 'dev': [ 'ipdb==0.11', - 'pylint==2.1.1', - 'astroid==2.1.0', + 'pylint==2.10.2', + 'astroid==2.7.3', 'nose' ] }, diff --git a/tap_exacttarget/__init__.py b/tap_exacttarget/__init__.py index 66c61a0..58bcd9d 100644 --- a/tap_exacttarget/__init__.py +++ b/tap_exacttarget/__init__.py @@ -3,6 +3,8 @@ import argparse import json +import sys + import singer from singer import utils from singer import metadata @@ -123,7 +125,7 @@ def do_sync(args): LOGGER.fatal('Cannot replicate `subscriber` without ' '`list_subscriber`. Please select `list_subscriber` ' 'and try again.') - exit(1) + sys.exit(1) for stream_accessor in stream_accessors: if isinstance(stream_accessor, ListSubscriberDataAccessObject) and \ @@ -161,10 +163,10 @@ def main(): if success: LOGGER.info("Completed successfully, exiting.") - exit(0) + sys.exit(0) else: LOGGER.info("Run failed, exiting.") - exit(1) + sys.exit(1) if __name__ == '__main__': main() diff --git a/tap_exacttarget/client.py b/tap_exacttarget/client.py index 701985c..662e589 100644 --- a/tap_exacttarget/client.py +++ b/tap_exacttarget/client.py @@ -61,7 +61,8 @@ def get_auth_stub(config): LOGGER.info('Failed to auth using V1 endpoint') if not config.get('tenant_subdomain'): LOGGER.warning('No tenant_subdomain found, will not attempt to auth with V2 endpoint') - raise e + message = f"{str(e)}. Please check your \'client_id\', \'client_secret\' or try adding the \'tenant_subdomain\'." + raise Exception(message) from None # Next try V2 # Move to OAuth2: https://help.salesforce.com/articleView?id=mc_rn_january_2019_platform_ip_remove_legacy_package_create_ability.htm&type=5 @@ -77,7 +78,8 @@ def get_auth_stub(config): transport=transport) except Exception as e: LOGGER.info('Failed to auth using V2 endpoint') - raise e + message = f"{str(e)}. Please check your \'client_id\', \'client_secret\' or \'tenant_subdomain\'." + raise Exception(message) from None LOGGER.info("Success.") return auth_stub diff --git a/tap_exacttarget/endpoints/emails.py b/tap_exacttarget/endpoints/emails.py index cf7ec34..19600d3 100644 --- a/tap_exacttarget/endpoints/emails.py +++ b/tap_exacttarget/endpoints/emails.py @@ -118,7 +118,7 @@ def parse_object(self, obj): to_return['EmailID'] = to_return.get('Email', {}).get('ID') to_return['ContentAreaIDs'] = content_area_ids - return super(EmailDataAccessObject, self).parse_object(to_return) + return super().parse_object(to_return) @exacttarget_error_handling def sync_data(self): diff --git a/tap_exacttarget/endpoints/folders.py b/tap_exacttarget/endpoints/folders.py index 5e27adc..2c9c0d6 100644 --- a/tap_exacttarget/endpoints/folders.py +++ b/tap_exacttarget/endpoints/folders.py @@ -58,7 +58,7 @@ def parse_object(self, obj): to_return['ParentFolder'] = to_return.get('ParentFolder', {}).get('ID') - return super(FolderDataAccessObject, self).parse_object(to_return) + return super().parse_object(to_return) @exacttarget_error_handling def sync_data(self): diff --git a/tap_exacttarget/endpoints/list_sends.py b/tap_exacttarget/endpoints/list_sends.py index 2ed4c82..3dc62cb 100644 --- a/tap_exacttarget/endpoints/list_sends.py +++ b/tap_exacttarget/endpoints/list_sends.py @@ -100,7 +100,7 @@ def parse_object(self, obj): to_return['ListID'] = to_return.get('List', {}).get('ID') - return super(ListSendDataAccessObject, self).parse_object(to_return) + return super().parse_object(to_return) @exacttarget_error_handling def sync_data(self): diff --git a/tap_exacttarget/endpoints/list_subscribers.py b/tap_exacttarget/endpoints/list_subscribers.py index 2d8025c..75aa4c4 100644 --- a/tap_exacttarget/endpoints/list_subscribers.py +++ b/tap_exacttarget/endpoints/list_subscribers.py @@ -57,7 +57,7 @@ class ListSubscriberDataAccessObject(DataAccessObject): KEY_PROPERTIES = ['SubscriberKey', 'ListID'] def __init__(self, config, state, auth_stub, catalog): - super(ListSubscriberDataAccessObject, self).__init__( + super().__init__( config, state, auth_stub, catalog) self.replicate_subscriber = False diff --git a/tap_exacttarget/endpoints/sends.py b/tap_exacttarget/endpoints/sends.py index 93fb41e..9a900cf 100644 --- a/tap_exacttarget/endpoints/sends.py +++ b/tap_exacttarget/endpoints/sends.py @@ -87,7 +87,7 @@ def parse_object(self, obj): to_return['EmailID'] = to_return.get('Email', {}).get('ID') - return super(SendDataAccessObject, self).parse_object(to_return) + return super().parse_object(to_return) @exacttarget_error_handling def sync_data(self): diff --git a/tap_exacttarget/endpoints/subscribers.py b/tap_exacttarget/endpoints/subscribers.py index eff94aa..a0b54a0 100644 --- a/tap_exacttarget/endpoints/subscribers.py +++ b/tap_exacttarget/endpoints/subscribers.py @@ -119,7 +119,7 @@ def parse_object(self, obj): if to_return.get('PartnerProperties') is None: to_return['PartnerProperties'] = [] - return super(SubscriberDataAccessObject, self).parse_object(obj) + return super().parse_object(obj) def sync_data(self): pass diff --git a/tap_exacttarget/fuel_overrides.py b/tap_exacttarget/fuel_overrides.py index 6266af1..594f5b4 100644 --- a/tap_exacttarget/fuel_overrides.py +++ b/tap_exacttarget/fuel_overrides.py @@ -26,7 +26,7 @@ def __init__(self, auth_stub, request_id, batch_size): response = auth_stub.soap_client.service.Retrieve(ws_continueRequest) if response is not None: - super(TapExacttarget__ET_Continue, self).__init__(response) + super().__init__(response) def tap_exacttarget__getMoreResults(cursor, batch_size=2500): obj = TapExacttarget__ET_Continue(cursor.auth_stub, cursor.last_request_id, batch_size) diff --git a/tests/test_exacttarget_base.py b/tests/test_exacttarget_base.py index 3c70772..7d921f5 100644 --- a/tests/test_exacttarget_base.py +++ b/tests/test_exacttarget_base.py @@ -1,4 +1,3 @@ -from tap_tester.scenario import SCENARIOS import datetime import tap_tester.connections as connections @@ -39,7 +38,7 @@ def get_credentials(self): def get_properties(self): yesterday = datetime.datetime.now(datetime.timezone.utc) - datetime.timedelta(days=1) return { - 'start_date': yesterday.strftime("%Y-%m-%dT%H:%M:%SZ"), + 'start_date': yesterday.strftime("%Y-%m-%dT00:00:00Z"), 'client_id': os.getenv('TAP_EXACTTARGET_CLIENT_ID') } @@ -92,4 +91,3 @@ def test_run(self): menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) -SCENARIOS.add(ExactTargetBase) diff --git a/tests/test_exacttarget_discover.py b/tests/test_exacttarget_discover.py index b183e17..c6755b8 100644 --- a/tests/test_exacttarget_discover.py +++ b/tests/test_exacttarget_discover.py @@ -41,7 +41,7 @@ def get_credentials(self): def get_properties(self): yesterday = datetime.datetime.now(datetime.timezone.utc) - datetime.timedelta(days=1) return { - 'start_date': yesterday.strftime("%Y-%m-%dT%H:%M:%SZ"), + 'start_date': yesterday.strftime("%Y-%m-%dT00:00:00Z"), 'client_id': os.getenv('TAP_EXACTTARGET_CLIENT_ID') } @@ -77,7 +77,7 @@ def get_credentials(self): def get_properties(self): yesterday = datetime.datetime.now(datetime.timezone.utc) - datetime.timedelta(days=1) return { - 'start_date': yesterday.strftime("%Y-%m-%dT%H:%M:%SZ"), + 'start_date': yesterday.strftime("%Y-%m-%dT00:00:00Z"), 'client_id': os.getenv('TAP_EXACTTARGET_CLIENT_ID'), 'tenant_subdomain': os.getenv('TAP_EXACTTARGET_TENANT_SUBDOMAIN') } @@ -95,7 +95,7 @@ def get_credentials(self): def get_properties(self): yesterday = datetime.datetime.now(datetime.timezone.utc) - datetime.timedelta(days=1) return { - 'start_date': yesterday.strftime("%Y-%m-%dT%H:%M:%SZ"), + 'start_date': yesterday.strftime("%Y-%m-%dT00:00:00Z"), 'client_id': os.getenv('TAP_EXACTTARGET_V2_CLIENT_ID'), 'tenant_subdomain': os.getenv('TAP_EXACTTARGET_V2_TENANT_SUBDOMAIN') } diff --git a/tests/unittests/test_error_messages.py b/tests/unittests/test_error_messages.py new file mode 100644 index 0000000..7a5e8cb --- /dev/null +++ b/tests/unittests/test_error_messages.py @@ -0,0 +1,194 @@ +from unittest import mock +import unittest +import tap_exacttarget.client as _client +import FuelSDK +import time +import requests + +class Mockresponse: + def __init__(self, json, headers=None): + self.text = json + self.headers = headers + + def json(self): + return self.text + +def get_response(json={}): + return Mockresponse(json) + +class TestErrorMessages(unittest.TestCase): + + @mock.patch("requests.post") + def test_error_1(self, mocked_post_request): + json = { + "error": "Client authentication failed."} + mocked_post_request.return_value = get_response(json) + config = { + "client_id": "client_id_123", + "client_secret": "client_secret_123", + "tenant_subdomain": "", + "start_date": "2019-01-01T00:00:00Z", + "request_timeout": "900", + "batch_size": 2500 + } + + try: + _client.get_auth_stub(config) + except Exception as e: + # as "tenant_subdomain" is not provided, error will be raised after call from v1 + self.assertEquals( + str(e), + "Unable to validate App Keys(ClientID/ClientSecret) provided: " + str(json) + ". Please check your 'client_id', 'client_secret' or try adding the 'tenant_subdomain'.") + + @mock.patch("requests.post") + def test_error_2(self, mocked_post_request): + json = { + "error": "Client authentication failed."} + mocked_post_request.return_value = get_response(json) + config = { + "client_id": "client_id_123", + "client_secret": "client_secret_123", + "tenant_subdomain": "tenant_subdomain_123", + "start_date": "2019-01-01T00:00:00Z", + "request_timeout": "900", + "batch_size": 2500 + } + + try: + _client.get_auth_stub(config) + except Exception as e: + # as "tenant_subdomain" is provided, error will be raised after call from v2 + self.assertEquals( + str(e), + "Unable to validate App Keys(ClientID/ClientSecret) provided: " + str(json) + ". Please check your 'client_id', 'client_secret' or 'tenant_subdomain'.") + + @mock.patch("requests.post") + def test_error_3(self, mocked_post_request): + json = { + "error": "Client authentication failed."} + mocked_post_request.return_value = get_response(json) + config = { + "client_id": "", + "client_secret": "", + "tenant_subdomain": "", + "start_date": "2019-01-01T00:00:00Z", + "request_timeout": "900", + "batch_size": 2500 + } + + try: + _client.get_auth_stub(config) + except Exception as e: + # as "client_secret" and "client_id" is not provided and + # "tenant_subdomain" is not provided, error will be raised after call from v1 + self.assertEquals( + str(e), + "clientid or clientsecret is null: clientid and clientsecret must be passed when instantiating ET_Client or must be provided in environment variables / config file. Please check your 'client_id', 'client_secret' or try adding the 'tenant_subdomain'.") + + @mock.patch("requests.post") + def test_error_4(self, mocked_post_request): + json = { + "error": "Client authentication failed."} + mocked_post_request.return_value = get_response(json) + config = { + "client_id": "", + "client_secret": "", + "tenant_subdomain": "tenant_subdomain_123", + "start_date": "2019-01-01T00:00:00Z", + "request_timeout": "900", + "batch_size": 2500 + } + + try: + _client.get_auth_stub(config) + except Exception as e: + # as "client_secret" and "client_id" is not provided and + # "tenant_subdomain" is provided, error will be raised after call from v2 + self.assertEquals( + str(e), + "clientid or clientsecret is null: clientid and clientsecret must be passed when instantiating ET_Client or must be provided in environment variables / config file. Please check your 'client_id', 'client_secret' or 'tenant_subdomain'.") + + @mock.patch("FuelSDK.ET_Client") + def test_error_5(self, mocked_ET_Client): + mocked_ET_Client.side_effect = requests.exceptions.ConnectionError("Connection Error") + config = { + "client_id": "client_id_123", + "client_secret": "client_secret_123", + "tenant_subdomain": "", + "start_date": "2019-01-01T00:00:00Z", + "request_timeout": "900", + "batch_size": 2500 + } + + try: + _client.get_auth_stub(config) + except Exception as e: + # as "tenant_subdomain" is not provided, error will be raised after call from v1 + self.assertEquals( + str(e), + "Connection Error. Please check your 'client_id', 'client_secret' or try adding the 'tenant_subdomain'.") + + @mock.patch("FuelSDK.ET_Client") + def test_error_6(self, mocked_ET_Client): + mocked_ET_Client.side_effect = requests.exceptions.ConnectionError("Connection Error") + config = { + "client_id": "client_id_123", + "client_secret": "client_secret_123", + "tenant_subdomain": "tenant_subdomain_123", + "start_date": "2019-01-01T00:00:00Z", + "request_timeout": "900", + "batch_size": 2500 + } + + try: + _client.get_auth_stub(config) + except Exception as e: + # as "tenant_subdomain" is provided, error will be raised after call from v2 + self.assertEquals( + str(e), + "Connection Error. Please check your 'client_id', 'client_secret' or 'tenant_subdomain'.") + + @mock.patch("requests.post") + @mock.patch("tap_exacttarget.client.LOGGER.info") + def test_no_error_1(self, mocked_logger, mocked_post_request): + json = { + "accessToken": "access_token_123", + "expiresIn": time.time() + 3600, + "legacyToken": "legacyToken_123" + } + mocked_post_request.return_value = get_response(json) + config = { + "client_id": "client_id_123", + "client_secret": "client_secret_123", + "tenant_subdomain": "", + "start_date": "2019-01-01T00:00:00Z", + "request_timeout": "900", + "batch_size": 2500 + } + + _client.get_auth_stub(config) + # as "tenant_subdomain" is not provided, auth_stub will be generated from v1 + mocked_logger.assert_called_with("Success.") + + @mock.patch("requests.post") + @mock.patch("tap_exacttarget.client.LOGGER.info") + def test_no_error_2(self, mocked_logger, mocked_post_request): + json = { + "access_token": "access_token_123", + "expires_in": time.time() + 3600, + "rest_instance_url": "aaa", + "soap_instance_url": "bbb" + } + mocked_post_request.return_value = get_response(json) + config = { + "client_id": "client_id_123", + "client_secret": "client_secret_123", + "tenant_subdomain": "tenant_subdomain_123", + "start_date": "2019-01-01T00:00:00Z", + "request_timeout": "900", + "batch_size": 2500 + } + + _client.get_auth_stub(config) + # as "tenant_subdomain" is provided, auth_stub will be generated from v2 + mocked_logger.assert_called_with("Success.") From 1a7909b37fe3cc1aad50af1c4f8e13be3424bd3e Mon Sep 17 00:00:00 2001 From: Harsh <80324346+harshpatel4crest@users.noreply.github.com> Date: Wed, 13 Oct 2021 13:55:14 +0530 Subject: [PATCH 33/36] TDL-14989: Check best practices (#74) * added best practices * resolve pylint * resolve test failure * test: updated the test cases * test: updated some test cases * updated the code as per comments * resolve test case failure * updated the code as per comments * resolve integration test failure * resolve tes case failure * resolve test case failure * add data extension stream in test cases * added data extension incremental stream * test: run bookmark test * test: run bookmark test * test: run bookmark test, debug failing test * test: pylint resolve * test: run all data extenstion stream test in bookmark test * test: updated data extension code * updated the test to run data extension stream * run all tests * added sys.exit and updated pylint and astroid to latest versions * resolve pylint * updated the files as per comments * updated the code --- .circleci/config.yml | 2 +- setup.py | 8 +- tap_exacttarget/state.py | 9 +- tests/base.py | 197 +++++++++++++++++++++++++++ tests/test_exacttarget_all_fields.py | 110 +++++++++++++++ tests/test_exacttarget_base.py | 93 ------------- tests/test_exacttarget_bookmarks.py | 127 +++++++++++++++++ tests/test_exacttarget_discover.py | 101 -------------- tests/test_exacttarget_sync.py | 22 +++ 9 files changed, 470 insertions(+), 199 deletions(-) create mode 100644 tests/base.py create mode 100644 tests/test_exacttarget_all_fields.py delete mode 100644 tests/test_exacttarget_base.py create mode 100644 tests/test_exacttarget_bookmarks.py delete mode 100644 tests/test_exacttarget_discover.py create mode 100644 tests/test_exacttarget_sync.py diff --git a/.circleci/config.yml b/.circleci/config.yml index b1568b7..ef84689 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -11,7 +11,7 @@ jobs: python3 -mvenv /usr/local/share/virtualenvs/tap-exacttarget source /usr/local/share/virtualenvs/tap-exacttarget/bin/activate pip install -U 'pip<19.2' 'setuptools<51.0.0' - pip install .[dev] + pip install .[test] - run: name: 'unittest' command: | diff --git a/setup.py b/setup.py index df1e00c..eb8844b 100644 --- a/setup.py +++ b/setup.py @@ -12,17 +12,19 @@ py_modules=['tap_exacttarget'], install_requires=[ 'funcy==1.9.1', - 'singer-python==5.9.0', + 'singer-python==5.12.1', 'python-dateutil==2.6.0', 'voluptuous==0.10.5', 'Salesforce-FuelSDK==1.3.0' ], extras_require={ - 'dev': [ - 'ipdb==0.11', + 'test': [ 'pylint==2.10.2', 'astroid==2.7.3', 'nose' + ], + 'dev': [ + 'ipdb==0.11' ] }, entry_points=''' diff --git a/tap_exacttarget/state.py b/tap_exacttarget/state.py index 966a88a..0836b12 100644 --- a/tap_exacttarget/state.py +++ b/tap_exacttarget/state.py @@ -44,8 +44,15 @@ def incorporate(state, table, field, value): if 'bookmarks' not in new_state: new_state['bookmarks'] = {} + # used 'parsed' value in second condition below instead of original 'value' + # because for data extensions bookmark value is coming in the format + # 'dd/mm/yyyy hh:mm:ss am/pm' and the bookmark in the state file + # is saved in 'yyyy-mm-ddThh:mm:ssZ' + # Value in STATE file: 2021-08-31T18:00:00Z + # Replication key value from data: 8/24/2021 6:00:00 PM + # Replication key value from data 'parsed': 2021-08-24T18:00:00Z if(new_state['bookmarks'].get(table, {}).get('last_record') is None or - new_state['bookmarks'].get(table, {}).get('last_record') < value): + new_state['bookmarks'].get(table, {}).get('last_record') < parsed): new_state['bookmarks'][table] = { 'field': field, 'last_record': parsed, diff --git a/tests/base.py b/tests/base.py new file mode 100644 index 0000000..c8e3f69 --- /dev/null +++ b/tests/base.py @@ -0,0 +1,197 @@ +import tap_tester.connections as connections +import tap_tester.menagerie as menagerie +import tap_tester.runner as runner +import os +import unittest +from datetime import datetime as dt +import time + +class ExactTargetBase(unittest.TestCase): + START_DATE = "" + DATETIME_FMT = { + "%Y-%m-%dT%H:%M:%SZ", + "%Y-%m-%d %H:%M:%S", + "%Y-%m-%dT%H:%M:%S.%fZ" + } + PRIMARY_KEYS = "table-key-properties" + REPLICATION_METHOD = "forced-replication-method" + REPLICATION_KEYS = "valid-replication-keys" + FULL_TABLE = "FULL_TABLE" + INCREMENTAL = "INCREMENTAL" + + def name(self): + return "tap_tester_exacttarget_base" + + def tap_name(self): + return "tap-exacttarget" + + def setUp(self): + required_env = { + "TAP_EXACTTARGET_CLIENT_ID", + "TAP_EXACTTARGET_CLIENT_SECRET", + "TAP_EXACTTARGET_TENANT_SUBDOMAIN", + "TAP_EXACTTARGET_V2_CLIENT_ID", + "TAP_EXACTTARGET_V2_CLIENT_SECRET", + "TAP_EXACTTARGET_V2_TENANT_SUBDOMAIN", + } + missing_envs = [v for v in required_env if not os.getenv(v)] + if missing_envs: + raise Exception("set " + ", ".join(missing_envs)) + + def get_type(self): + return "platform.exacttarget" + + def get_credentials(self): + return { + 'client_secret': os.getenv('TAP_EXACTTARGET_CLIENT_SECRET') + } + + def get_properties(self, original: bool = True): + return_value = { + 'start_date': '2019-01-01T00:00:00Z', + 'client_id': os.getenv('TAP_EXACTTARGET_CLIENT_ID'), + 'tenant_subdomain': os.getenv('TAP_EXACTTARGET_TENANT_SUBDOMAIN') + } + if original: + return return_value + + # Reassign start date + return_value["start_date"] = self.START_DATE + return return_value + + def expected_metadata(self): + return { + "campaign": { + self.PRIMARY_KEYS: {"id"}, + self.REPLICATION_METHOD: self.FULL_TABLE + }, + "content_area":{ + self.PRIMARY_KEYS: {"ID"}, + self.REPLICATION_METHOD: self.INCREMENTAL, + self.REPLICATION_KEYS: {"ModifiedDate"}, + }, + "data_extension.test emails":{ + self.PRIMARY_KEYS: {"_CustomObjectKey", "ID"}, + self.REPLICATION_METHOD: self.FULL_TABLE, + }, + "data_extension.This is a test":{ + self.PRIMARY_KEYS: {"_CustomObjectKey", "ID"}, + self.REPLICATION_METHOD: self.FULL_TABLE, + }, + "data_extension.my_test":{ + self.PRIMARY_KEYS: {"_CustomObjectKey", "ID"}, + self.REPLICATION_METHOD: self.FULL_TABLE, + }, + "data_extension.test 1":{ + self.PRIMARY_KEYS: {"_CustomObjectKey", "ID"}, + self.REPLICATION_METHOD: self.INCREMENTAL, + self.REPLICATION_KEYS: {"JoinDate"}, + }, + "email":{ + self.PRIMARY_KEYS: {"ID"}, + self.REPLICATION_METHOD: self.INCREMENTAL, + self.REPLICATION_KEYS: {"ModifiedDate"}, + }, + "event": { + self.PRIMARY_KEYS: {"SendID", "EventType", "SubscriberKey", "EventDate"}, + self.REPLICATION_METHOD: self.INCREMENTAL, + self.REPLICATION_KEYS: {"EventDate"}, + }, + "folder":{ + self.PRIMARY_KEYS: {"ID"}, + self.REPLICATION_METHOD: self.INCREMENTAL, + self.REPLICATION_KEYS: {"ModifiedDate"}, + }, + "list":{ + self.PRIMARY_KEYS: {"ID"}, + self.REPLICATION_METHOD: self.INCREMENTAL, + self.REPLICATION_KEYS: {"ModifiedDate"}, + }, + "list_send":{ + self.PRIMARY_KEYS: {"ListID", "SendID"}, + self.REPLICATION_METHOD: self.INCREMENTAL, + self.REPLICATION_KEYS: {"ModifiedDate"}, + }, + "list_subscriber":{ + self.PRIMARY_KEYS: {"SubscriberKey", "ListID"}, + self.REPLICATION_METHOD: self.INCREMENTAL, + self.REPLICATION_KEYS: {"ModifiedDate"}, + }, + "send":{ + self.PRIMARY_KEYS: {"ID"}, + self.REPLICATION_METHOD: self.INCREMENTAL, + self.REPLICATION_KEYS: {"ModifiedDate"}, + }, + "subscriber":{ + self.PRIMARY_KEYS: {"ID"}, + self.REPLICATION_METHOD: self.INCREMENTAL, + self.REPLICATION_KEYS: {"ModifiedDate"}, + } + } + + def streams_to_select(self): + return set(self.expected_metadata().keys()) - {'event', 'list_send'} + + def expected_replication_keys(self): + return {table: properties.get(self.REPLICATION_KEYS, set()) + for table, properties in self.expected_metadata().items()} + + def expected_primary_keys(self): + return {table: properties.get(self.PRIMARY_KEYS, set()) + for table, properties in self.expected_metadata().items()} + + def expected_replication_method(self): + return {table: properties.get(self.REPLICATION_METHOD, set()) + for table, properties in self.expected_metadata().items()} + + def select_found_catalogs(self, conn_id, catalogs, only_streams=None, deselect_all_fields: bool = False, non_selected_props=[]): + """Select all streams and all fields within streams""" + for catalog in catalogs: + if only_streams and catalog["stream_name"] not in only_streams: + continue + + schema = menagerie.get_annotated_schema(conn_id, catalog['stream_id']) + + non_selected_properties = non_selected_props if not deselect_all_fields else [] + if deselect_all_fields: + # get a list of all properties so that none are selected + non_selected_properties = schema.get('annotated-schema', {}).get('properties', {}) + non_selected_properties = non_selected_properties.keys() + + additional_md = [] + connections.select_catalog_and_fields_via_metadata(conn_id, + catalog, + schema, + additional_md=additional_md, + non_selected_fields=non_selected_properties) + + def run_and_verify_sync(self, conn_id): + sync_job_name = runner.run_sync_mode(self, conn_id) + + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + sync_record_count = runner.examine_target_output_file(self, + conn_id, + self.streams_to_select(), + self.expected_primary_keys()) + + self.assertGreater( + sum(sync_record_count.values()), 0, + msg="failed to replicate any data: {}".format(sync_record_count) + ) + print("total replicated row count: %s", sum(sync_record_count.values())) + + return sync_record_count + + def dt_to_ts(self, dtime): + for date_format in self.DATETIME_FMT: + try: + date_stripped = int(time.mktime(dt.strptime(dtime, date_format).timetuple())) + return date_stripped + except ValueError: + continue + + def is_incremental(self, stream): + return self.expected_metadata()[stream][self.REPLICATION_METHOD] == self.INCREMENTAL diff --git a/tests/test_exacttarget_all_fields.py b/tests/test_exacttarget_all_fields.py new file mode 100644 index 0000000..386f50f --- /dev/null +++ b/tests/test_exacttarget_all_fields.py @@ -0,0 +1,110 @@ +from base import ExactTargetBase +import tap_tester.connections as connections +import tap_tester.menagerie as menagerie +import tap_tester.runner as runner + +class ExactTargetAllFields(ExactTargetBase): + + # Note: some fields are not retrievable as discussed below + # https://salesforce.stackexchange.com/questions/354332/not-getting-modifieddate-for-listsend-endpoint + # so we have to remove them + fields_to_remove = { + 'list': [ + 'SendClassification', # not retrievable + 'PartnerProperties'], # not retrievable + 'subscriber': [ + 'CustomerKey', # not retrievable + 'PartnerType', # not retrievable + 'UnsubscribedDate', + 'PrimarySMSAddress', # not retrievable + 'PrimaryEmailAddress', # not retrievable + 'PartnerProperties', # not retrievable + 'SubscriberTypeDefinition', # not retrievable + 'Addresses', # not retrievable + 'ListIDs', + 'Locale', # not retrievable + 'PrimarySMSPublicationStatus', # not retrievable + 'ModifiedDate'], # not retrievable + 'list_send': [ + 'CreatedDate', # not retrievable + 'CustomerKey', # not retrievable + 'ID', + 'PartnerProperties', # not retrievable + 'ModifiedDate'], # not retrievable + 'folder': [ + 'Type', + 'PartnerProperties'], + 'email': [ + '__AdditionalEmailAttribute1', # not retrievable + '__AdditionalEmailAttribute3', # not retrievable + 'SyncTextWithHTML', # not retrievable + 'PartnerProperties', # not retrievable + '__AdditionalEmailAttribute5', # not retrievable + 'ClonedFromID', + '__AdditionalEmailAttribute4', # not retrievable + '__AdditionalEmailAttribute2'], # not retrievable + 'content_area': [ + # most of them are included in the 'Content' data + 'BackgroundColor', # not retrievable + 'Cellpadding', # not retrievable + 'HasFontSize', # not retrievable + 'BorderColor', # not retrievable + 'BorderWidth', # not retrievable + 'Width', # not retrievable + 'IsLocked', # not retrievable + 'Cellspacing', # not retrievable + 'FontFamily'] # not retrievable + } + + def name(self): + return "tap_tester_exacttarget_all_fields" + + def test_run(self): + conn_id = connections.ensure_connection(self) + runner.run_check_mode(self, conn_id) + + expected_streams = self.streams_to_select() + + found_catalogs = menagerie.get_catalogs(conn_id) + self.select_found_catalogs(conn_id, found_catalogs, only_streams=expected_streams) + + test_catalogs_all_fields = [catalog for catalog in found_catalogs + if catalog.get('stream_name') in expected_streams] + + # grab metadata after performing table-and-field selection to set expectations + stream_to_all_catalog_fields = dict() # used for asserting all fields are replicated + for catalog in test_catalogs_all_fields: + stream_id, stream_name = catalog['stream_id'], catalog['stream_name'] + catalog_entry = menagerie.get_annotated_schema(conn_id, stream_id) + fields_from_field_level_md = [md_entry['breadcrumb'][1] for md_entry in catalog_entry['metadata'] + if md_entry['breadcrumb'] != []] + stream_to_all_catalog_fields[stream_name] = set(fields_from_field_level_md) + + # run initial sync + record_count_by_stream = self.run_and_verify_sync(conn_id) + synced_records = runner.get_records_from_target_output() + + # Verify no unexpected streams were replicated + synced_stream_names = set(synced_records.keys()) + self.assertSetEqual(expected_streams, synced_stream_names) + + for stream in expected_streams: + with self.subTest(stream=stream): + + # get all expected keys + expected_all_keys = stream_to_all_catalog_fields[stream] + + # collect actual values + messages = synced_records.get(stream) + actual_all_keys = [set(message['data'].keys()) for message in messages['messages'] + if message['action'] == 'upsert'][0] + + # Verify that you get some records for each stream + self.assertGreater(record_count_by_stream.get(stream, -1), 0) + + # remove some fields as data cannot be generated / retrieved + fields = self.fields_to_remove.get(stream) or [] + for field in fields: + expected_all_keys.remove(field) + + self.assertSetEqual(expected_all_keys, actual_all_keys) diff --git a/tests/test_exacttarget_base.py b/tests/test_exacttarget_base.py deleted file mode 100644 index 7d921f5..0000000 --- a/tests/test_exacttarget_base.py +++ /dev/null @@ -1,93 +0,0 @@ - -import datetime -import tap_tester.connections as connections -import tap_tester.menagerie as menagerie -import tap_tester.runner as runner -import os -import unittest -import pdb -import json -import requests - - -class ExactTargetBase(unittest.TestCase): - - def name(self): - return "tap_tester_exacttarget_base" - - def tap_name(self): - return "tap-exacttarget" - - def setUp(self): - required_env = { - "client_id": "TAP_EXACTTARGET_CLIENT_ID", - "client_secret": "TAP_EXACTTARGET_CLIENT_SECRET", - } - missing_envs = [v for v in required_env.values() if not os.getenv(v)] - if missing_envs: - raise Exception("set " + ", ".join(missing_envs)) - - def get_type(self): - return "platform.exacttarget" - - def get_credentials(self): - return { - 'client_secret': os.getenv('TAP_EXACTTARGET_CLIENT_SECRET') - } - - def get_properties(self): - yesterday = datetime.datetime.now(datetime.timezone.utc) - datetime.timedelta(days=1) - return { - 'start_date': yesterday.strftime("%Y-%m-%dT00:00:00Z"), - 'client_id': os.getenv('TAP_EXACTTARGET_CLIENT_ID') - } - - def streams_to_select(self): - # Note: Custom streams failed on our account with an error on - # `_CustomObjectKey` not being valid - return ["campaign", - "content_area", - "email", - "event", - "folder", - "list", - "list_send", - "list_subscriber", - "send", - "subscriber"] - - def select_found_catalogs(self, conn_id, found_catalogs, only_streams=None): - selected = [] - for catalog in found_catalogs: - if only_streams and catalog["tap_stream_id"] not in only_streams: - continue - schema = menagerie.select_catalog(conn_id, catalog) - - selected.append({ - "key_properties": catalog.get("key_properties"), - "schema": schema, - "tap_stream_id": catalog.get("tap_stream_id"), - "replication_method": catalog.get("replication_method"), - "replication_key": catalog.get("replication_key"), - }) - - for catalog_entry in selected: - connections.select_catalog_and_fields_via_metadata( - conn_id, - catalog_entry, - {"annotated-schema": catalog_entry['schema']} - ) - - def test_run(self): - conn_id = connections.ensure_connection(self) - runner.run_check_mode(self, conn_id) - - found_catalogs = menagerie.get_catalogs(conn_id) - self.select_found_catalogs(conn_id, found_catalogs, only_streams=self.streams_to_select()) - sync_job_name = runner.run_sync_mode(self, conn_id) - - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - diff --git a/tests/test_exacttarget_bookmarks.py b/tests/test_exacttarget_bookmarks.py new file mode 100644 index 0000000..ea856f2 --- /dev/null +++ b/tests/test_exacttarget_bookmarks.py @@ -0,0 +1,127 @@ +import datetime +from dateutil.parser import parse +from base import ExactTargetBase +import tap_tester.connections as connections +import tap_tester.menagerie as menagerie +import tap_tester.runner as runner +import datetime +import dateutil.parser +import pytz + +class ExactTargetBookmarks(ExactTargetBase): + def name(self): + return "tap_tester_exacttarget_bookmarks" + + def convert_state_to_utc(self, date_str): + date_object = dateutil.parser.parse(date_str) + date_object_utc = date_object.astimezone(tz=pytz.UTC) + return datetime.datetime.strftime(date_object_utc, "%Y-%m-%dT%H:%M:%SZ") + + def test_run(self): + self.run_test(self.streams_to_select() - {'data_extension.test 1'}, '2019-01-01T00:00:00Z') + self.run_test({'data_extension.test 1'}, '2021-08-01T00:00:00Z') + + def run_test(self, streams, start_date): + self.START_DATE = start_date + + conn_id = connections.ensure_connection(self, original_properties=False) + runner.run_check_mode(self, conn_id) + + expected_streams = streams + + found_catalogs_1 = menagerie.get_catalogs(conn_id) + self.select_found_catalogs(conn_id, found_catalogs_1, only_streams=expected_streams) + + # Run a sync job using orchestrator + first_sync_record_count = self.run_and_verify_sync(conn_id) + first_sync_records = runner.get_records_from_target_output() + first_sync_bookmarks = menagerie.get_state(conn_id) + + ########################################################################## + ### Update State + ########################################################################## + + new_state = {'bookmarks': dict()} + replication_keys = self.expected_replication_keys() + for stream in expected_streams: + if self.is_incremental(stream): + new_state['bookmarks'][stream] = dict() + new_state['bookmarks'][stream]['field'] = next(iter(replication_keys[stream])) + new_state['bookmarks'][stream]['last_record'] = '2021-08-23T00:00:00Z' if stream == 'data_extension.test 1' else '2019-01-14T00:00:00Z' + + # Set state for next sync + menagerie.set_state(conn_id, new_state) + + ########################################################################## + ### Second Sync + ########################################################################## + + # Run a sync job using orchestrator + second_sync_record_count = self.run_and_verify_sync(conn_id) + second_sync_records = runner.get_records_from_target_output() + second_sync_bookmarks = menagerie.get_state(conn_id) + + for stream in expected_streams: + # skip "subscriber" stream as replication key is not retrievable + if stream == "subscriber": + continue + + with self.subTest(stream=stream): + # collect information for assertions from syncs 1 & 2 base on expected values + first_sync_count = first_sync_record_count.get(stream, 0) + second_sync_count = second_sync_record_count.get(stream, 0) + first_sync_messages = [record.get('data') for record in first_sync_records.get(stream).get('messages') + if record.get('action') == 'upsert'] + second_sync_messages = [record.get('data') for record in second_sync_records.get(stream).get('messages') + if record.get('action') == 'upsert'] + first_bookmark_key_value = first_sync_bookmarks.get('bookmarks', {stream: None}).get(stream) + second_bookmark_key_value = second_sync_bookmarks.get('bookmarks', {stream: None}).get(stream) + + if self.is_incremental(stream): + + # collect information specific to incremental streams from syncs 1 & 2 + replication_key = next(iter(self.expected_replication_keys()[stream])) + first_bookmark_value = first_bookmark_key_value.get('last_record') + second_bookmark_value = second_bookmark_key_value.get('last_record') + first_bookmark_value_utc = self.convert_state_to_utc(first_bookmark_value) + second_bookmark_value_utc = self.convert_state_to_utc(second_bookmark_value) + + # Verify the first sync sets a bookmark of the expected form + self.assertIsNotNone(first_bookmark_key_value) + self.assertIsNotNone(first_bookmark_key_value.get('last_record')) + + # Verify the second sync sets a bookmark of the expected form + self.assertIsNotNone(second_bookmark_key_value) + self.assertIsNotNone(second_bookmark_key_value.get('last_record')) + + # Verify the second sync bookmark is Equal to the first sync bookmark + self.assertEqual(second_bookmark_value, first_bookmark_value) # assumes no changes to data during test + + + for record in second_sync_messages: + + # Verify the second sync bookmark value is the max replication key value for a given stream + replication_key_value = record.get(replication_key) + replication_key_value_parsed = parse(replication_key_value).strftime("%Y-%m-%dT%H:%M:%SZ") + self.assertLessEqual(replication_key_value_parsed, second_bookmark_value_utc, + msg="Second sync bookmark was set incorrectly, a record with a greater replication-key value was synced.") + + for record in first_sync_messages: + + # Verify the first sync bookmark value is the max replication key value for a given stream + replication_key_value = record.get(replication_key) + replication_key_value_parsed = parse(replication_key_value).strftime("%Y-%m-%dT%H:%M:%SZ") + self.assertLessEqual(replication_key_value_parsed, first_bookmark_value_utc, + msg="First sync bookmark was set incorrectly, a record with a greater replication-key value was synced.") + + # Verify the number of records in the 2nd sync is less then or equal to the first + self.assertLessEqual(second_sync_count, first_sync_count) + + else: + + # Verify the syncs do not set a bookmark for full table streams + self.assertIsNone(first_bookmark_key_value) + self.assertIsNone(second_bookmark_key_value) + + # Verify the number of records in the second sync is the same as the first + self.assertEqual(second_sync_count, first_sync_count) diff --git a/tests/test_exacttarget_discover.py b/tests/test_exacttarget_discover.py deleted file mode 100644 index c6755b8..0000000 --- a/tests/test_exacttarget_discover.py +++ /dev/null @@ -1,101 +0,0 @@ -import datetime -import tap_tester.connections as connections -import tap_tester.menagerie as menagerie -import tap_tester.runner as runner -import os -import unittest -import pdb -import json -import requests - - -class ExactTargetDiscover(unittest.TestCase): - - def name(self): - return "tap_tester_exacttarget_discover_v1" - - def tap_name(self): - return "tap-exacttarget" - - def setUp(self): - required_env = { - "TAP_EXACTTARGET_CLIENT_ID", - "TAP_EXACTTARGET_CLIENT_SECRET", - "TAP_EXACTTARGET_TENANT_SUBDOMAIN", - "TAP_EXACTTARGET_V2_CLIENT_ID", - "TAP_EXACTTARGET_V2_CLIENT_SECRET", - "TAP_EXACTTARGET_V2_TENANT_SUBDOMAIN", - } - missing_envs = [v for v in required_env if not os.getenv(v)] - if missing_envs: - raise Exception("set " + ", ".join(missing_envs)) - - def get_type(self): - return "platform.exacttarget" - - def get_credentials(self): - return { - 'client_secret': os.getenv('TAP_EXACTTARGET_CLIENT_SECRET') - } - - def get_properties(self): - yesterday = datetime.datetime.now(datetime.timezone.utc) - datetime.timedelta(days=1) - return { - 'start_date': yesterday.strftime("%Y-%m-%dT00:00:00Z"), - 'client_id': os.getenv('TAP_EXACTTARGET_CLIENT_ID') - } - - def test_run(self): - conn_id = connections.ensure_connection(self) - runner.run_check_mode(self, conn_id) - - found_catalog = menagerie.get_catalog(conn_id) - for catalog_entry in found_catalog['streams']: - field_names_in_schema = set([ k for k in catalog_entry['schema']['properties'].keys()]) - field_names_in_breadcrumbs = set([x['breadcrumb'][1] for x in catalog_entry['metadata'] if len(x['breadcrumb']) == 2]) - self.assertEqual(field_names_in_schema, field_names_in_breadcrumbs) - - inclusions_set = set([(x['breadcrumb'][1], x['metadata']['inclusion']) - for x in catalog_entry['metadata'] - if len(x['breadcrumb']) == 2]) - # Validate that all fields are in metadata - self.assertEqual(len(inclusions_set), len(field_names_in_schema)) - self.assertEqual(set([i[0] for i in inclusions_set]), field_names_in_schema) - # Validate that all metadata['inclusion'] are 'available' - unique_inclusions = set([i[1] for i in inclusions_set]) - self.assertTrue(len(unique_inclusions) == 1 and 'available' in unique_inclusions) - -class ExactTargetDiscover2(ExactTargetDiscover): - def name(self): - return "tap_tester_exacttarget_discover_v1_with_subdomain" - - def get_credentials(self): - return { - 'client_secret': os.getenv('TAP_EXACTTARGET_CLIENT_SECRET') - } - - def get_properties(self): - yesterday = datetime.datetime.now(datetime.timezone.utc) - datetime.timedelta(days=1) - return { - 'start_date': yesterday.strftime("%Y-%m-%dT00:00:00Z"), - 'client_id': os.getenv('TAP_EXACTTARGET_CLIENT_ID'), - 'tenant_subdomain': os.getenv('TAP_EXACTTARGET_TENANT_SUBDOMAIN') - } - - -class ExactTargetDiscover3(ExactTargetDiscover): - def name(self): - return "tap_tester_exacttarget_discover_v2_with_subdomain" - - def get_credentials(self): - return { - 'client_secret': os.getenv('TAP_EXACTTARGET_V2_CLIENT_SECRET') - } - - def get_properties(self): - yesterday = datetime.datetime.now(datetime.timezone.utc) - datetime.timedelta(days=1) - return { - 'start_date': yesterday.strftime("%Y-%m-%dT00:00:00Z"), - 'client_id': os.getenv('TAP_EXACTTARGET_V2_CLIENT_ID'), - 'tenant_subdomain': os.getenv('TAP_EXACTTARGET_V2_TENANT_SUBDOMAIN') - } diff --git a/tests/test_exacttarget_sync.py b/tests/test_exacttarget_sync.py new file mode 100644 index 0000000..7851e81 --- /dev/null +++ b/tests/test_exacttarget_sync.py @@ -0,0 +1,22 @@ +from base import ExactTargetBase +import tap_tester.connections as connections +import tap_tester.menagerie as menagerie +import tap_tester.runner as runner + +class ExactTargetSync(ExactTargetBase): + + def name(self): + return "tap_tester_exacttarget_sync" + + def test_run(self): + expected_streams = self.streams_to_select() + conn_id = connections.ensure_connection(self) + runner.run_check_mode(self, conn_id) + + found_catalogs = menagerie.get_catalogs(conn_id) + self.select_found_catalogs(conn_id, found_catalogs, only_streams=expected_streams) + + sync_record_count = self.run_and_verify_sync(conn_id) + + for stream in expected_streams: + self.assertGreater(sync_record_count.get(stream, 0), 0) From ca2ca099106184e171a1f2dfe6c34cfa945e6bb2 Mon Sep 17 00:00:00 2001 From: Harsh <80324346+harshpatel4crest@users.noreply.github.com> Date: Wed, 13 Oct 2021 14:33:50 +0530 Subject: [PATCH 34/36] TDL-14889: Keys should be marked automatic and TDL-14891: list_sends stream does not bookmark correctly (#75) * make keys automatic * pylint resolve * add full replication test case * added code change for data extension stream * pylint resolve * added comment * added comments * added comment in base file * updated discovery test and removed full replication test * updated the code * added a comment explaining subscriber and list subscriber syncing * added comments --- tap_exacttarget/__init__.py | 7 ++ tap_exacttarget/dao.py | 19 +++- tap_exacttarget/endpoints/campaigns.py | 1 + tap_exacttarget/endpoints/content_areas.py | 2 + tap_exacttarget/endpoints/data_extensions.py | 102 ++++++++++++++++-- tap_exacttarget/endpoints/emails.py | 2 + tap_exacttarget/endpoints/events.py | 2 + tap_exacttarget/endpoints/folders.py | 2 + tap_exacttarget/endpoints/list_sends.py | 25 +---- tap_exacttarget/endpoints/list_subscribers.py | 2 + tap_exacttarget/endpoints/lists.py | 2 + tap_exacttarget/endpoints/sends.py | 2 + tap_exacttarget/endpoints/subscribers.py | 2 + tests/base.py | 11 +- 14 files changed, 142 insertions(+), 39 deletions(-) diff --git a/tap_exacttarget/__init__.py b/tap_exacttarget/__init__.py index 58bcd9d..293389a 100644 --- a/tap_exacttarget/__init__.py +++ b/tap_exacttarget/__init__.py @@ -104,6 +104,13 @@ def do_sync(args): .format(stream_catalog.get('stream'))) continue + # The 'subscribers' stream is the child stream of 'list_subscribers' + # When we sync 'list_subscribers', it makes the list of subscriber's + # 'SubscriberKey' that were returned as part of 'list_subscribers' records + # and pass that list to 'subscribers' stream and thus 'subscribers' stream + # will only sync records of subscribers that are present in the list. + # Hence, for different start dates the 'SubscriberKey' list will differ and + # thus 'subscribers' records will also be different for different start dates. if SubscriberDataAccessObject.matches_catalog(stream_catalog): subscriber_selected = True subscriber_catalog = stream_catalog diff --git a/tap_exacttarget/dao.py b/tap_exacttarget/dao.py index 445d30d..82d8366 100644 --- a/tap_exacttarget/dao.py +++ b/tap_exacttarget/dao.py @@ -38,16 +38,25 @@ def generate_catalog(self): cls = self.__class__ mdata = metadata.new() - metadata.write(mdata, (), 'inclusion', 'available') - for prop in cls.SCHEMA['properties']: # pylint:disable=unsubscriptable-object - metadata.write(mdata, ('properties', prop), 'inclusion', 'available') + + # use 'get_standard_metadata' with primary key, replication key and replication method + mdata = metadata.get_standard_metadata(schema=self.SCHEMA, + key_properties=self.KEY_PROPERTIES, + valid_replication_keys=self.REPLICATION_KEYS if self.REPLICATION_KEYS else None, + replication_method=self.REPLICATION_METHOD) + + mdata_map = metadata.to_map(mdata) + + # make 'automatic' inclusion for replication keys + for replication_key in self.REPLICATION_KEYS: + mdata_map[('properties', replication_key)]['inclusion'] = 'automatic' return [{ 'tap_stream_id': cls.TABLE, 'stream': cls.TABLE, 'key_properties': cls.KEY_PROPERTIES, 'schema': cls.SCHEMA, - 'metadata': metadata.to_list(mdata) + 'metadata': metadata.to_list(mdata_map) }] def filter_keys_and_parse(self, obj): @@ -88,6 +97,8 @@ def sync(self): SCHEMA = None TABLE = None KEY_PROPERTIES = None + REPLICATION_KEYS = [] + REPLICATION_METHOD = None def sync_data(self): # pylint: disable=no-self-use raise RuntimeError('sync_data is not implemented!') diff --git a/tap_exacttarget/endpoints/campaigns.py b/tap_exacttarget/endpoints/campaigns.py index 03ed37e..52a434b 100644 --- a/tap_exacttarget/endpoints/campaigns.py +++ b/tap_exacttarget/endpoints/campaigns.py @@ -36,6 +36,7 @@ class CampaignDataAccessObject(DataAccessObject): TABLE = 'campaign' KEY_PROPERTIES = ['id'] + REPLICATION_METHOD = 'FULL_TABLE' @exacttarget_error_handling def sync_data(self): diff --git a/tap_exacttarget/endpoints/content_areas.py b/tap_exacttarget/endpoints/content_areas.py index 1f5f621..994ec24 100644 --- a/tap_exacttarget/endpoints/content_areas.py +++ b/tap_exacttarget/endpoints/content_areas.py @@ -104,6 +104,8 @@ class ContentAreaDataAccessObject(DataAccessObject): TABLE = 'content_area' KEY_PROPERTIES = ['ID'] + REPLICATION_METHOD = 'INCREMENTAL' + REPLICATION_KEYS = ['ModifiedDate'] @exacttarget_error_handling def sync_data(self): diff --git a/tap_exacttarget/endpoints/data_extensions.py b/tap_exacttarget/endpoints/data_extensions.py index d87a9e0..a2aa248 100644 --- a/tap_exacttarget/endpoints/data_extensions.py +++ b/tap_exacttarget/endpoints/data_extensions.py @@ -80,17 +80,31 @@ def _get_extensions(self): } } }, - 'metadata': [{'breadcrumb': (), 'metadata': {'inclusion':'available'}}, - {'breadcrumb': ('properties', '_CustomObjectKey'), - 'metadata': {'inclusion':'available'}}, - {'breadcrumb': ('properties', 'CategoryID'), - 'metadata': {'inclusion':'available'}}] + 'metadata': [ + { + 'breadcrumb': (), + 'metadata': { + 'inclusion':'available', + 'forced-replication-method': 'FULL_TABLE', + 'table-key-properties': ['_CustomObjectKey'], + 'valid-replication-keys': [] + } + }, + { + 'breadcrumb': ('properties', '_CustomObjectKey'), + 'metadata': {'inclusion':'automatic'} + }, + { + 'breadcrumb': ('properties', 'CategoryID'), + 'metadata': {'inclusion':'available'} + } + ] } return to_return @exacttarget_error_handling - def _get_fields(self, extensions): + def _get_fields(self, extensions): # pylint: disable=too-many-branches to_return = extensions.copy() result = request( @@ -98,17 +112,31 @@ def _get_fields(self, extensions): FuelSDK.ET_DataExtension_Column, self.auth_stub) + # iterate through all the fields and determine if it is primary key + # or replication key and update the catalog file accordingly: + # is_primary_key: + # update catalog file by appending that field in 'table-key-properties' + # is_replication_key: + # update value of 'forced-replication-method' as INCREMENTAL + # update catalog file by appending that field in 'valid-replication-keys' + # add 'AUTOMATIC' replication method for both primary and replication keys for field in result: + is_replication_key = False + is_primary_key = False extension_id = field.DataExtension.CustomerKey field = sudsobj_to_dict(field) field_name = field['Name'] if field.get('IsPrimaryKey'): + is_primary_key = True to_return = _merge_in( to_return, [extension_id, 'key_properties'], field_name) + if field_name in ['ModifiedDate', 'JoinDate']: + is_replication_key = True + field_schema = { 'type': [ 'null', @@ -122,13 +150,65 @@ def _get_fields(self, extensions): [extension_id, 'schema', 'properties', field_name], field_schema) + # add primary key in 'table-key-properties' + if is_primary_key: + for mdata in to_return[extension_id]['metadata']: + if not mdata.get('breadcrumb'): + mdata.get('metadata').get('table-key-properties').append(field_name) + + # add replication key in 'valid-replication-keys' + # and change 'forced-replication-method' to INCREMENTAL + if is_replication_key: + for mdata in to_return[extension_id]['metadata']: + if not mdata.get('breadcrumb'): + mdata.get('metadata')['forced-replication-method'] = "INCREMENTAL" + mdata.get('metadata').get('valid-replication-keys').append(field_name) + # These fields are defaulted into the schema, do not add to metadata again. if field_name not in {'_CustomObjectKey', 'CategoryID'}: - to_return[extension_id]['metadata'].append({ - 'breadcrumb': ('properties', field_name), - 'metadata': {'inclusion': 'available'} - }) - + # if primary of replication key, then mark it as automatic + if is_primary_key or is_replication_key: + to_return[extension_id]['metadata'].append({ + 'breadcrumb': ('properties', field_name), + 'metadata': {'inclusion': 'automatic'} + }) + else: + to_return[extension_id]['metadata'].append({ + 'breadcrumb': ('properties', field_name), + 'metadata': {'inclusion': 'available'} + }) + + # the structure of 'to_return' is like: + # { + # 'de1': { + # 'tap_stream_id': 'data_extension.de1', + # 'stream': 'data_extension.de1', + # 'key_properties': ['_CustomObjectKey'], + # 'schema': { + # 'type': 'object', + # 'properties': {...} + # }, + # 'metadata': [...] + # }, + # 'de2': { + # 'tap_stream_id': 'data_extension.de2', + # 'stream': 'data_extension.de2', + # 'key_properties': ['_CustomObjectKey'], + # 'schema': { + # 'type': 'object', + # 'properties': {...} + # }, + # 'metadata': [...] + # } + # } + + # loop through all the data extension catalog in 'to_return' + # and remove empty 'valid-replication-keys' present in metadata + for catalog in to_return.values(): + for mdata in catalog.get('metadata'): + if not mdata.get('breadcrumb'): + if not mdata.get('metadata').get('valid-replication-keys'): + del mdata.get('metadata')['valid-replication-keys'] return to_return def generate_catalog(self): diff --git a/tap_exacttarget/endpoints/emails.py b/tap_exacttarget/endpoints/emails.py index 19600d3..d469795 100644 --- a/tap_exacttarget/endpoints/emails.py +++ b/tap_exacttarget/endpoints/emails.py @@ -107,6 +107,8 @@ class EmailDataAccessObject(DataAccessObject): TABLE = 'email' KEY_PROPERTIES = ['ID'] + REPLICATION_METHOD = 'INCREMENTAL' + REPLICATION_KEYS = ['ModifiedDate'] def parse_object(self, obj): to_return = obj.copy() diff --git a/tap_exacttarget/endpoints/events.py b/tap_exacttarget/endpoints/events.py index c7044f0..b274b5c 100644 --- a/tap_exacttarget/endpoints/events.py +++ b/tap_exacttarget/endpoints/events.py @@ -45,6 +45,8 @@ class EventDataAccessObject(DataAccessObject): TABLE = 'event' KEY_PROPERTIES = ['SendID', 'EventType', 'SubscriberKey', 'EventDate'] + REPLICATION_METHOD = 'INCREMENTAL' + REPLICATION_KEYS = ['EventDate'] @exacttarget_error_handling def sync_data(self): diff --git a/tap_exacttarget/endpoints/folders.py b/tap_exacttarget/endpoints/folders.py index 2c9c0d6..9f7bd6a 100644 --- a/tap_exacttarget/endpoints/folders.py +++ b/tap_exacttarget/endpoints/folders.py @@ -52,6 +52,8 @@ class FolderDataAccessObject(DataAccessObject): TABLE = 'folder' KEY_PROPERTIES = ['ID'] + REPLICATION_METHOD = 'INCREMENTAL' + REPLICATION_KEYS = ['ModifiedDate'] def parse_object(self, obj): to_return = obj.copy() diff --git a/tap_exacttarget/endpoints/list_sends.py b/tap_exacttarget/endpoints/list_sends.py index 3dc62cb..d414bca 100644 --- a/tap_exacttarget/endpoints/list_sends.py +++ b/tap_exacttarget/endpoints/list_sends.py @@ -6,8 +6,6 @@ from tap_exacttarget.schemas import ID_FIELD, CUSTOM_PROPERTY_LIST, \ CREATED_DATE_FIELD, CUSTOMER_KEY_FIELD, OBJECT_ID_FIELD, \ MODIFIED_DATE_FIELD, with_properties -from tap_exacttarget.state import incorporate, save_state, \ - get_last_record_value_for_table LOGGER = singer.get_logger() @@ -94,6 +92,7 @@ class ListSendDataAccessObject(DataAccessObject): TABLE = 'list_send' KEY_PROPERTIES = ['ListID', 'SendID'] + REPLICATION_METHOD = 'FULL_TABLE' def parse_object(self, obj): to_return = obj.copy() @@ -107,29 +106,13 @@ def sync_data(self): table = self.__class__.TABLE selector = FuelSDK.ET_ListSend - search_filter = None - retrieve_all_since = get_last_record_value_for_table(self.state, table) - - if retrieve_all_since is not None: - search_filter = { - 'Property': 'ModifiedDate', - 'SimpleOperator': 'greaterThan', - 'Value': retrieve_all_since - } - + # making this endpoint as FULL_TABLE, as 'ModifiedDate' is not retrievable as discussed + # here: https://salesforce.stackexchange.com/questions/354332/not-getting-modifieddate-for-listsend-endpoint stream = request('ListSend', selector, - self.auth_stub, - search_filter) + self.auth_stub) for list_send in stream: list_send = self.filter_keys_and_parse(list_send) - self.state = incorporate(self.state, - table, - 'ModifiedDate', - list_send.get('ModifiedDate')) - singer.write_records(table, [list_send]) - - save_state(self.state) diff --git a/tap_exacttarget/endpoints/list_subscribers.py b/tap_exacttarget/endpoints/list_subscribers.py index 75aa4c4..a8126b6 100644 --- a/tap_exacttarget/endpoints/list_subscribers.py +++ b/tap_exacttarget/endpoints/list_subscribers.py @@ -55,6 +55,8 @@ class ListSubscriberDataAccessObject(DataAccessObject): TABLE = 'list_subscriber' KEY_PROPERTIES = ['SubscriberKey', 'ListID'] + REPLICATION_METHOD = 'INCREMENTAL' + REPLICATION_KEYS = ['ModifiedDate'] def __init__(self, config, state, auth_stub, catalog): super().__init__( diff --git a/tap_exacttarget/endpoints/lists.py b/tap_exacttarget/endpoints/lists.py index 263beb4..8c2de28 100644 --- a/tap_exacttarget/endpoints/lists.py +++ b/tap_exacttarget/endpoints/lists.py @@ -49,6 +49,8 @@ class ListDataAccessObject(DataAccessObject): TABLE = 'list' KEY_PROPERTIES = ['ID'] + REPLICATION_METHOD = 'INCREMENTAL' + REPLICATION_KEYS = ['ModifiedDate'] @exacttarget_error_handling def sync_data(self): diff --git a/tap_exacttarget/endpoints/sends.py b/tap_exacttarget/endpoints/sends.py index 9a900cf..2edc4a2 100644 --- a/tap_exacttarget/endpoints/sends.py +++ b/tap_exacttarget/endpoints/sends.py @@ -81,6 +81,8 @@ class SendDataAccessObject(DataAccessObject): TABLE = 'send' KEY_PROPERTIES = ['ID'] + REPLICATION_METHOD = 'INCREMENTAL' + REPLICATION_KEYS = ['ModifiedDate'] def parse_object(self, obj): to_return = obj.copy() diff --git a/tap_exacttarget/endpoints/subscribers.py b/tap_exacttarget/endpoints/subscribers.py index a0b54a0..e5b1eef 100644 --- a/tap_exacttarget/endpoints/subscribers.py +++ b/tap_exacttarget/endpoints/subscribers.py @@ -102,6 +102,8 @@ class SubscriberDataAccessObject(DataAccessObject): SCHEMA = SCHEMA TABLE = 'subscriber' KEY_PROPERTIES = ['ID'] + REPLICATION_METHOD = 'INCREMENTAL' + REPLICATION_KEYS = ['ModifiedDate'] def parse_object(self, obj): to_return = obj.copy() diff --git a/tests/base.py b/tests/base.py index c8e3f69..f190f5b 100644 --- a/tests/base.py +++ b/tests/base.py @@ -109,8 +109,7 @@ def expected_metadata(self): }, "list_send":{ self.PRIMARY_KEYS: {"ListID", "SendID"}, - self.REPLICATION_METHOD: self.INCREMENTAL, - self.REPLICATION_KEYS: {"ModifiedDate"}, + self.REPLICATION_METHOD: self.FULL_TABLE, }, "list_subscriber":{ self.PRIMARY_KEYS: {"SubscriberKey", "ListID"}, @@ -130,7 +129,13 @@ def expected_metadata(self): } def streams_to_select(self): - return set(self.expected_metadata().keys()) - {'event', 'list_send'} + # events: there are 5 events and the API call window is of 10 minutes + # so there will be a lot of API calls for every test + # list_subscriber: as the API window is of 1 day, the tests took + # 30 minutes to run 3 tests, the test run time will be increased + # when all the tests are combined + # subscriber: it is the child stream of 'list_subscriber' + return set(self.expected_metadata().keys()) - {'event', 'list_subscriber', 'subscriber'} def expected_replication_keys(self): return {table: properties.get(self.REPLICATION_KEYS, set()) From fc9fa00c263dd3165278965c6f83d433fe5342eb Mon Sep 17 00:00:00 2001 From: harshpatel4_crest Date: Wed, 13 Oct 2021 15:07:48 +0530 Subject: [PATCH 35/36] updated the code --- tap_exacttarget/endpoints/list_sends.py | 7 ------- 1 file changed, 7 deletions(-) diff --git a/tap_exacttarget/endpoints/list_sends.py b/tap_exacttarget/endpoints/list_sends.py index 7c92ab4..faaf72b 100644 --- a/tap_exacttarget/endpoints/list_sends.py +++ b/tap_exacttarget/endpoints/list_sends.py @@ -31,17 +31,10 @@ def sync_data(self): stream = request('ListSend', selector, self.auth_stub) -<<<<<<< HEAD catalog_copy = copy.deepcopy(self.catalog) -======= ->>>>>>> ca2ca099106184e171a1f2dfe6c34cfa945e6bb2 for list_send in stream: list_send = self.filter_keys_and_parse(list_send) -<<<<<<< HEAD self.write_records_with_transform(list_send, catalog_copy, table) -======= - singer.write_records(table, [list_send]) ->>>>>>> ca2ca099106184e171a1f2dfe6c34cfa945e6bb2 From 0824e000320b47dc448332e2552594fe3f025752 Mon Sep 17 00:00:00 2001 From: harshpatel4_crest Date: Wed, 13 Oct 2021 15:20:14 +0530 Subject: [PATCH 36/36] resolved unittest case error --- tests/unittests/test_backoff.py | 64 ++++++++++++++++----------------- 1 file changed, 32 insertions(+), 32 deletions(-) diff --git a/tests/unittests/test_backoff.py b/tests/unittests/test_backoff.py index bf9daeb..5d5a480 100644 --- a/tests/unittests/test_backoff.py +++ b/tests/unittests/test_backoff.py @@ -38,7 +38,7 @@ def test_connection_reset_error_occurred__content_area(self, mocked_get, mocked_ self.assertEquals(mocked_get.call_count, 5) @mock.patch("FuelSDK.rest.ET_GetSupport.get") - @mock.patch("singer.write_records") + @mock.patch("tap_exacttarget.dao.DataAccessObject.write_records_with_transform") def test_no_connection_reset_error_occurred__content_area(self, mocked_write_records, mocked_get, mocked_sleep): # mock 'get' and return the dummy data mocked_get.side_effect = [get_response(True, [{ @@ -51,7 +51,7 @@ def test_no_connection_reset_error_occurred__content_area(self, mocked_write_rec obj = content_areas.ContentAreaDataAccessObject({}, {}, None, {}) # call sync obj.sync_data() - # verify if 'singer.write_records' was called + # verify if 'tap_exacttarget.dao.DataAccessObject.write_records_with_transform' was called # once as there is only one record self.assertEquals(mocked_write_records.call_count, 1) @@ -70,7 +70,7 @@ def test_connection_reset_error_occurred__campaign(self, mocked_get_rest, mocked self.assertEquals(mocked_get_rest.call_count, 5) @mock.patch("FuelSDK.rest.ET_GetSupportRest.get") - @mock.patch("singer.write_records") + @mock.patch("tap_exacttarget.dao.DataAccessObject.write_records_with_transform") def test_no_connection_reset_error_occurred__campaign(self, mocked_write_records, mocked_get_rest, mocked_sleep): # mock 'get' and return the dummy data mocked_get_rest.side_effect = [get_response(True, [{ @@ -83,7 +83,7 @@ def test_no_connection_reset_error_occurred__campaign(self, mocked_write_records obj = campaigns.CampaignDataAccessObject({}, {}, None, {}) # call sync obj.sync_data() - # verify if 'singer.write_records' was called + # verify if 'tap_exacttarget.dao.DataAccessObject.write_records_with_transform' was called # once as there is only one record self.assertEquals(mocked_write_records.call_count, 1) @@ -177,7 +177,7 @@ def test_connection_reset_error_occurred__email(self, mocked_get, mocked_sleep): self.assertEquals(mocked_get.call_count, 5) @mock.patch("FuelSDK.rest.ET_GetSupport.get") - @mock.patch("singer.write_records") + @mock.patch("tap_exacttarget.dao.DataAccessObject.write_records_with_transform") def test_no_connection_reset_error_occurred__email(self, mocked_write_records, mocked_get, mocked_sleep): # mock 'get' and return the dummy data mocked_get.side_effect = [get_response(True, [{ @@ -190,7 +190,7 @@ def test_no_connection_reset_error_occurred__email(self, mocked_write_records, m obj = emails.EmailDataAccessObject({}, {}, None, {}) # call sync obj.sync_data() - # verify if 'singer.write_records' was called + # verify if 'tap_exacttarget.dao.DataAccessObject.write_records_with_transform' was called # once as there is only one record self.assertEquals(mocked_write_records.call_count, 1) @@ -223,7 +223,7 @@ def test_connection_reset_error_occurred__folder(self, mocked_get, mocked_sleep) self.assertEquals(mocked_get.call_count, 5) @mock.patch("FuelSDK.rest.ET_GetSupport.get") - @mock.patch("singer.write_records") + @mock.patch("tap_exacttarget.dao.DataAccessObject.write_records_with_transform") def test_no_connection_reset_error_occurred__folder(self, mocked_write_records, mocked_get, mocked_sleep): # mock 'get' and return the dummy data mocked_get.side_effect = [get_response(True, [{ @@ -236,7 +236,7 @@ def test_no_connection_reset_error_occurred__folder(self, mocked_write_records, obj = folders.FolderDataAccessObject({}, {}, None, {}) # call sync obj.sync_data() - # verify if 'singer.write_records' was called + # verify if 'tap_exacttarget.dao.DataAccessObject.write_records_with_transform' was called # once as there is only one record self.assertEquals(mocked_write_records.call_count, 1) @@ -255,7 +255,7 @@ def test_connection_reset_error_occurred__list_send(self, mocked_get, mocked_sle self.assertEquals(mocked_get.call_count, 5) @mock.patch("FuelSDK.rest.ET_GetSupport.get") - @mock.patch("singer.write_records") + @mock.patch("tap_exacttarget.dao.DataAccessObject.write_records_with_transform") def test_no_connection_reset_error_occurred__list_send(self, mocked_write_records, mocked_get, mocked_sleep): # mock 'get' and return the dummy data mocked_get.side_effect = [get_response(True, [{ @@ -268,7 +268,7 @@ def test_no_connection_reset_error_occurred__list_send(self, mocked_write_record obj = list_sends.ListSendDataAccessObject({}, {}, None, {}) # call sync obj.sync_data() - # verify if 'singer.write_records' was called + # verify if 'tap_exacttarget.dao.DataAccessObject.write_records_with_transform' was called # once as there is only one record self.assertEquals(mocked_write_records.call_count, 1) @@ -332,7 +332,7 @@ def test_connection_reset_error_occurred__list(self, mocked_get, mocked_sleep): self.assertEquals(mocked_get.call_count, 5) @mock.patch("FuelSDK.rest.ET_GetSupport.get") - @mock.patch("singer.write_records") + @mock.patch("tap_exacttarget.dao.DataAccessObject.write_records_with_transform") def test_no_connection_reset_error_occurred__list(self, mocked_write_records, mocked_get, mocked_sleep): # mock 'get' and return the dummy data mocked_get.side_effect = [get_response(True, [{ @@ -345,7 +345,7 @@ def test_no_connection_reset_error_occurred__list(self, mocked_write_records, mo obj = lists.ListDataAccessObject({}, {}, None, {}) # call sync obj.sync_data() - # verify if 'singer.write_records' was called + # verify if 'tap_exacttarget.dao.DataAccessObject.write_records_with_transform' was called # once as there is only one record self.assertEquals(mocked_write_records.call_count, 1) @@ -364,7 +364,7 @@ def test_connection_reset_error_occurred__sends(self, mocked_get, mocked_sleep): self.assertEquals(mocked_get.call_count, 5) @mock.patch("FuelSDK.rest.ET_GetSupport.get") - @mock.patch("singer.write_records") + @mock.patch("tap_exacttarget.dao.DataAccessObject.write_records_with_transform") def test_no_connection_reset_error_occurred__sends(self, mocked_write_records, mocked_get, mocked_sleep): # mock 'get' and return the dummy data mocked_get.side_effect = [get_response(True, [{ @@ -377,7 +377,7 @@ def test_no_connection_reset_error_occurred__sends(self, mocked_write_records, m obj = sends.SendDataAccessObject({}, {}, None, {}) # call sync obj.sync_data() - # verify if 'singer.write_records' was called + # verify if 'tap_exacttarget.dao.DataAccessObject.write_records_with_transform' was called # once as there is only one record self.assertEquals(mocked_write_records.call_count, 1) @@ -396,7 +396,7 @@ def test_connection_reset_error_occurred__subscriber(self, mocked_get, mocked_sl self.assertEquals(mocked_get.call_count, 5) @mock.patch("FuelSDK.rest.ET_GetSupport.get") - @mock.patch("singer.write_records") + @mock.patch("tap_exacttarget.dao.DataAccessObject.write_records_with_transform") def test_no_connection_reset_error_occurred__subscriber(self, mocked_write_records, mocked_get, mocked_sleep): # mock 'get' and return the dummy data mocked_get.side_effect = [get_response(True, [{ @@ -409,7 +409,7 @@ def test_no_connection_reset_error_occurred__subscriber(self, mocked_write_recor obj = subscribers.SubscriberDataAccessObject({}, {}, None, {}) # call function obj.pull_subscribers_batch(['sub1']) - # verify if 'singer.write_records' was called + # verify if 'tap_exacttarget.dao.DataAccessObject.write_records_with_transform' was called # once as there is only one record self.assertEquals(mocked_write_records.call_count, 1) @@ -434,7 +434,7 @@ def test_socket_timeout_error_occurred__content_area(self, mocked_get, mocked_sl self.assertEquals(mocked_get.call_count, 5) @mock.patch("FuelSDK.rest.ET_GetSupport.get") - @mock.patch("singer.write_records") + @mock.patch("tap_exacttarget.dao.DataAccessObject.write_records_with_transform") def test_no_socket_timeout_error_occurred__content_area(self, mocked_write_records, mocked_get, mocked_sleep): # mock 'get' and return the dummy data mocked_get.side_effect = [get_response(True, [{ @@ -447,7 +447,7 @@ def test_no_socket_timeout_error_occurred__content_area(self, mocked_write_recor obj = content_areas.ContentAreaDataAccessObject({}, {}, None, {}) # call sync obj.sync_data() - # verify if 'singer.write_records' was called + # verify if 'tap_exacttarget.dao.DataAccessObject.write_records_with_transform' was called # once as there is only one record self.assertEquals(mocked_write_records.call_count, 1) @@ -466,7 +466,7 @@ def test_socket_timeout_error_occurred__campaign(self, mocked_get_rest, mocked_s self.assertEquals(mocked_get_rest.call_count, 5) @mock.patch("FuelSDK.rest.ET_GetSupportRest.get") - @mock.patch("singer.write_records") + @mock.patch("tap_exacttarget.dao.DataAccessObject.write_records_with_transform") def test_no_socket_timeout_error_occurred__campaign(self, mocked_write_records, mocked_get_rest, mocked_sleep): # mock 'get' and return the dummy data mocked_get_rest.side_effect = [get_response(True, [{ @@ -479,7 +479,7 @@ def test_no_socket_timeout_error_occurred__campaign(self, mocked_write_records, obj = campaigns.CampaignDataAccessObject({}, {}, None, {}) # call sync obj.sync_data() - # verify if 'singer.write_records' was called + # verify if 'tap_exacttarget.dao.DataAccessObject.write_records_with_transform' was called # once as there is only one record self.assertEquals(mocked_write_records.call_count, 1) @@ -573,7 +573,7 @@ def test_socket_timeout_error_occurred__email(self, mocked_get, mocked_sleep): self.assertEquals(mocked_get.call_count, 5) @mock.patch("FuelSDK.rest.ET_GetSupport.get") - @mock.patch("singer.write_records") + @mock.patch("tap_exacttarget.dao.DataAccessObject.write_records_with_transform") def test_no_socket_timeout_error_occurred__email(self, mocked_write_records, mocked_get, mocked_sleep): # mock 'get' and return the dummy data mocked_get.side_effect = [get_response(True, [{ @@ -586,7 +586,7 @@ def test_no_socket_timeout_error_occurred__email(self, mocked_write_records, moc obj = emails.EmailDataAccessObject({}, {}, None, {}) # call sync obj.sync_data() - # verify if 'singer.write_records' was called + # verify if 'tap_exacttarget.dao.DataAccessObject.write_records_with_transform' was called # once as there is only one record self.assertEquals(mocked_write_records.call_count, 1) @@ -619,7 +619,7 @@ def test_socket_timeout_error_occurred__folder(self, mocked_get, mocked_sleep): self.assertEquals(mocked_get.call_count, 5) @mock.patch("FuelSDK.rest.ET_GetSupport.get") - @mock.patch("singer.write_records") + @mock.patch("tap_exacttarget.dao.DataAccessObject.write_records_with_transform") def test_no_socket_timeout_error_occurred__folder(self, mocked_write_records, mocked_get, mocked_sleep): # mock 'get' and return the dummy data mocked_get.side_effect = [get_response(True, [{ @@ -632,7 +632,7 @@ def test_no_socket_timeout_error_occurred__folder(self, mocked_write_records, mo obj = folders.FolderDataAccessObject({}, {}, None, {}) # call sync obj.sync_data() - # verify if 'singer.write_records' was called + # verify if 'tap_exacttarget.dao.DataAccessObject.write_records_with_transform' was called # once as there is only one record self.assertEquals(mocked_write_records.call_count, 1) @@ -651,7 +651,7 @@ def test_socket_timeout_error_occurred__list_send(self, mocked_get, mocked_sleep self.assertEquals(mocked_get.call_count, 5) @mock.patch("FuelSDK.rest.ET_GetSupport.get") - @mock.patch("singer.write_records") + @mock.patch("tap_exacttarget.dao.DataAccessObject.write_records_with_transform") def test_no_socket_timeout_error_occurred__list_send(self, mocked_write_records, mocked_get, mocked_sleep): # mock 'get' and return the dummy data mocked_get.side_effect = [get_response(True, [{ @@ -664,7 +664,7 @@ def test_no_socket_timeout_error_occurred__list_send(self, mocked_write_records, obj = list_sends.ListSendDataAccessObject({}, {}, None, {}) # call sync obj.sync_data() - # verify if 'singer.write_records' was called + # verify if 'tap_exacttarget.dao.DataAccessObject.write_records_with_transform' was called # once as there is only one record self.assertEquals(mocked_write_records.call_count, 1) @@ -728,7 +728,7 @@ def test_socket_timeout_error_occurred__list(self, mocked_get, mocked_sleep): self.assertEquals(mocked_get.call_count, 5) @mock.patch("FuelSDK.rest.ET_GetSupport.get") - @mock.patch("singer.write_records") + @mock.patch("tap_exacttarget.dao.DataAccessObject.write_records_with_transform") def test_no_socket_timeout_error_occurred__list(self, mocked_write_records, mocked_get, mocked_sleep): # mock 'get' and return the dummy data mocked_get.side_effect = [get_response(True, [{ @@ -741,7 +741,7 @@ def test_no_socket_timeout_error_occurred__list(self, mocked_write_records, mock obj = lists.ListDataAccessObject({}, {}, None, {}) # call sync obj.sync_data() - # verify if 'singer.write_records' was called + # verify if 'tap_exacttarget.dao.DataAccessObject.write_records_with_transform' was called # once as there is only one record self.assertEquals(mocked_write_records.call_count, 1) @@ -760,7 +760,7 @@ def test_socket_timeout_error_occurred__sends(self, mocked_get, mocked_sleep): self.assertEquals(mocked_get.call_count, 5) @mock.patch("FuelSDK.rest.ET_GetSupport.get") - @mock.patch("singer.write_records") + @mock.patch("tap_exacttarget.dao.DataAccessObject.write_records_with_transform") def test_no_socket_timeout_error_occurred__sends(self, mocked_write_records, mocked_get, mocked_sleep): # mock 'get' and return the dummy data mocked_get.side_effect = [get_response(True, [{ @@ -773,7 +773,7 @@ def test_no_socket_timeout_error_occurred__sends(self, mocked_write_records, moc obj = sends.SendDataAccessObject({}, {}, None, {}) # call sync obj.sync_data() - # verify if 'singer.write_records' was called + # verify if 'tap_exacttarget.dao.DataAccessObject.write_records_with_transform' was called # once as there is only one record self.assertEquals(mocked_write_records.call_count, 1) @@ -792,7 +792,7 @@ def test_socket_timeout_error_occurred__subscriber(self, mocked_get, mocked_slee self.assertEquals(mocked_get.call_count, 5) @mock.patch("FuelSDK.rest.ET_GetSupport.get") - @mock.patch("singer.write_records") + @mock.patch("tap_exacttarget.dao.DataAccessObject.write_records_with_transform") def test_no_socket_timeout_error_occurred__subscriber(self, mocked_write_records, mocked_get, mocked_sleep): # mock 'get' and return the dummy data mocked_get.side_effect = [get_response(True, [{ @@ -805,6 +805,6 @@ def test_no_socket_timeout_error_occurred__subscriber(self, mocked_write_records obj = subscribers.SubscriberDataAccessObject({}, {}, None, {}) # call function obj.pull_subscribers_batch(['sub1']) - # verify if 'singer.write_records' was called + # verify if 'tap_exacttarget.dao.DataAccessObject.write_records_with_transform' was called # once as there is only one record self.assertEquals(mocked_write_records.call_count, 1)