Skip to content

Commit

Permalink
addressed review comments
Browse files Browse the repository at this point in the history
  • Loading branch information
“rdeshmukh15” committed Nov 19, 2024
1 parent f3b7c2e commit eb45758
Show file tree
Hide file tree
Showing 2 changed files with 20 additions and 41 deletions.
4 changes: 2 additions & 2 deletions tap_s3_csv/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -75,8 +75,8 @@ def main():
config = args.config

config['tables'] = validate_table_config(config)
now = datetime.now()
sync_start_time = singer_utils.strptime_with_tz(now.strftime("%Y-%m-%dT%H:%M:%SZ"))
now_str = datetime.now().strftime("%Y-%m-%dT%H:%M:%SZ")
sync_start_time = singer_utils.strptime_with_tz(now_str)

try:
for page in s3.list_files_in_bucket(config):
Expand Down
57 changes: 18 additions & 39 deletions tests/unittests/test_sync_stream.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,45 +2,28 @@
from unittest.mock import patch, MagicMock
from datetime import datetime
from tap_s3_csv import sync_stream
from parameterized import parameterized

class TestSyncStream(unittest.TestCase):

@parameterized.expand([
# Case when file is older than sync_start_time
("file_older_than_sync_start_time", datetime(2024, 8, 13, 12, 0, 0), datetime(2024, 8, 14, 12, 0, 0), '2024-08-13T12:00:00', 1),
# Case when file is newer than sync_start_time
("file_newer_than_sync_start_time", datetime(2024, 8, 15, 12, 0, 0), datetime(2024, 8, 14, 12, 0, 0), '2024-08-14T12:00:00', 1),
# Case when file is the same as sync_start_time
("file_same_as_sync_start_time", datetime(2024, 8, 14, 12, 0, 0), datetime(2024, 8, 14, 12, 0, 0), '2024-08-14T12:00:00', 1)
])
@patch('tap_s3_csv.s3.get_input_files_for_table')
@patch('tap_s3_csv.sync.sync_table_file')
@patch('tap_s3_csv.singer.get_bookmark')
@patch('tap_s3_csv.singer.write_bookmark')
@patch('tap_s3_csv.singer.write_state')
@patch('tap_s3_csv.LOGGER')
def test_sync_stream_with_files(self, mock_logger, mock_write_state, mock_write_bookmark, mock_get_bookmark, mock_sync_table_file, mock_get_input_files_for_table):
def test_sync_stream(self, name, file_last_modified, sync_start_time, expected_bookmark, expected_records_streamed, mock_logger, mock_write_state, mock_write_bookmark, mock_get_bookmark, mock_sync_table_file, mock_get_input_files_for_table):
"""
Tests the sync_stream function with various file modification times.
Depending on whether the last_modified date is earlier or later than sync_start_time,
the bookmark will either be updated to the file's last_modified or the sync_start_time.
Parameterized test for the sync_stream function with various file modification times.
"""
test_cases = [
# Case when file is older than sync_start_time
{
"file_last_modified": datetime(2024, 8, 13, 12, 0, 0),
"sync_start_time": datetime(2024, 8, 14, 12, 0, 0),
"expected_bookmark": '2024-08-13T12:00:00',
"expected_records_streamed": 1
},
# Case when file is newer than sync_start_time
{
"file_last_modified": datetime(2024, 8, 15, 12, 0, 0),
"sync_start_time": datetime(2024, 8, 14, 12, 0, 0),
"expected_bookmark": '2024-08-14T12:00:00',
"expected_records_streamed": 1
},
# Case when file is newer than sync_start_time
{
"file_last_modified": datetime(2024, 8, 14, 12, 0, 0),
"sync_start_time": datetime(2024, 8, 14, 12, 0, 0),
"expected_bookmark": '2024-08-14T12:00:00',
"expected_records_streamed": 1
}
]

mock_get_bookmark.return_value = '2024-01-01T00:00:00Z'
mock_sync_table_file.return_value = 1
mock_write_state.return_value = None
Expand All @@ -50,16 +33,12 @@ def test_sync_stream_with_files(self, mock_logger, mock_write_state, mock_write_
table_spec = {'table_name': 'test_table'}
stream = None

for case in test_cases:
with self.subTest(case=case):
mock_get_input_files_for_table.return_value = [{'key': 'file1.csv', 'last_modified': case["file_last_modified"]}]
mock_write_bookmark.return_value = case["expected_bookmark"]

records_streamed = sync_stream(config, state, table_spec, stream, case["sync_start_time"])
mock_get_input_files_for_table.return_value = [{'key': 'file1.csv', 'last_modified': file_last_modified}]
mock_write_bookmark.return_value = expected_bookmark

self.assertEqual(records_streamed, case["expected_records_streamed"])
mock_write_bookmark.assert_called_with(state, 'test_table', 'modified_since', case["expected_bookmark"])
records_streamed = sync_stream(config, state, table_spec, stream, sync_start_time)

# Ensure `write_state` is called exactly once for each test case
mock_write_state.assert_called_once()
mock_write_state.reset_mock() # Reset the mock call count for the next subtest
self.assertEqual(records_streamed, expected_records_streamed)
mock_write_bookmark.assert_called_with(state, 'test_table', 'modified_since', expected_bookmark)
mock_write_state.assert_called_once()
mock_write_state.reset_mock()

0 comments on commit eb45758

Please sign in to comment.