diff --git a/.github/workflows/codeql.yml b/.github/workflows/codeql.yml index 5d8b5e0..bb56da2 100644 --- a/.github/workflows/codeql.yml +++ b/.github/workflows/codeql.yml @@ -1,14 +1,3 @@ -# For most projects, this workflow file will not need changing; you simply need -# to commit it to your repository. -# -# You may wish to alter this file to override the set of languages analyzed, -# or to provide custom queries or build logic. -# -# ******** NOTE ******** -# We have attempted to detect the languages in your repository. Please check -# the `language` matrix defined below to confirm you have the correct set of -# supported CodeQL languages. -# name: "CodeQL" on: @@ -16,17 +5,10 @@ on: branches: [ "master" ] pull_request: branches: [ "master" ] - schedule: - - cron: '31 7 * * 5' jobs: analyze: name: Analyze - # Runner size impacts CodeQL analysis time. To learn more, please see: - # - https://gh.io/recommended-hardware-resources-for-running-codeql - # - https://gh.io/supported-runners-and-hardware-resources - # - https://gh.io/using-larger-runners - # Consider using larger runners for possible analysis time improvements. runs-on: ${{ (matrix.language == 'swift' && 'macos-latest') || 'ubuntu-latest' }} timeout-minutes: ${{ (matrix.language == 'swift' && 120) || 360 }} permissions: @@ -37,11 +19,7 @@ jobs: strategy: fail-fast: false matrix: - language: [ ] - # CodeQL supports [ 'c-cpp', 'csharp', 'go', 'java-kotlin', 'javascript-typescript', 'python', 'ruby', 'swift' ] - # Use only 'java-kotlin' to analyze code written in Java, Kotlin or both - # Use only 'javascript-typescript' to analyze code written in JavaScript, TypeScript or both - # Learn more about CodeQL language support at https://aka.ms/codeql-docs/language-support + language: [python] steps: - name: Checkout repository @@ -52,29 +30,10 @@ jobs: uses: github/codeql-action/init@v3 with: languages: ${{ matrix.language }} - # If you wish to specify custom queries, you can do so here or in a config file. - # By default, queries listed here will override any specified in a config file. - # Prefix the list here with "+" to use these queries and those in the config file. - # For more details on CodeQL's query packs, refer to: https://docs.github.com/en/code-security/code-scanning/automatically-scanning-your-code-for-vulnerabilities-and-errors/configuring-code-scanning#using-queries-in-ql-packs - # queries: security-extended,security-and-quality - - - # Autobuild attempts to build any compiled languages (C/C++, C#, Go, Java, or Swift). - # If this step fails, then you should remove it and run the build manually (see below) - name: Autobuild uses: github/codeql-action/autobuild@v3 - # ℹī¸ Command-line programs to run using the OS shell. - # 📚 See https://docs.github.com/en/actions/using-workflows/workflow-syntax-for-github-actions#jobsjob_idstepsrun - - # If the Autobuild fails above, remove it and uncomment the following three lines. - # modify them (or add more) to build your code if your project, please refer to the EXAMPLE below for guidance. - - # - run: | - # echo "Run, Build Application using script" - # ./location_of_script_within_repo/buildscript.sh - - name: Perform CodeQL Analysis uses: github/codeql-action/analyze@v3 with: diff --git a/.github/workflows/unit_test_execution.yml b/.github/workflows/unit_test_execution.yml new file mode 100644 index 0000000..85d8d6a --- /dev/null +++ b/.github/workflows/unit_test_execution.yml @@ -0,0 +1,33 @@ +name: UnitTestExecution + +on: + push: + branches: [ "master" ] + pull_request: + branches: [ "master" ] + + workflow_dispatch: + +jobs: + build: + runs-on: ubuntu-latest + strategy: + matrix: + python-version: [ '3.9', '3.10', '3.11' ] + name: Testing on python version - ${{ matrix.python-version }} + steps: + - uses: actions/checkout@v3 + + - name: Setup Python + uses: actions/setup-python@v5.0.0 + with: + python-version: ${{ matrix.python-version }} + architecture: x64 + + - name: Install dependencies + run: | + pip install -r requirements.txt + pip install -r test-requirements.txt + + - name: Run unit test cases + run: coverage run --source=src.pg_bulk_loader --module pytest --verbose && coverage report --show-missing diff --git a/.gitignore b/.gitignore index 68bc17f..4884d8e 100644 --- a/.gitignore +++ b/.gitignore @@ -2,6 +2,7 @@ __pycache__/ *.py[cod] *$py.class +.idea/ # C extensions *.so diff --git a/README.md b/README.md index d7754c6..ffa70a1 100644 --- a/README.md +++ b/README.md @@ -1,2 +1,197 @@ # pandas-to-postgres -A utility package to do bulk insertion faster from pandas dataframe to postgres table. + +

Overview

+ +**pandas-to-postgres** is a utility package designed to facilitate faster bulk insertion from pandas DataFrame to a PostgreSQL table. + +

Purpose

+ +This utility leverages the power of PostgreSQL in combination with Python to efficiently handle the bulk insertion of large datasets. The key features that contribute to its speed include: + +1. Utilization of Postgres' copy command +2. Integration of Psycopg3's pipeline feature +3. Implementation of Python's coroutines +4. Harnessing the power of multiprocessing +5. Capability to drop indexes during insertion and recreate them in parallel + +

Usage

+ +The utility provides the following useful functions and classes: + +1. **batch_insert_to_postgres** +2. **batch_insert_to_postgres_with_multi_process** +3. **BatchInsert** + + +

batch_insert_to_postgres() function

+ +- `pg_conn_details`: Instance of the PgConnectionDetail class containing PostgreSQL server connection details. +- `table_name`: Name of the table for bulk insertion. +- `data_df`: Data in the form of a pandas DataFrame. +- `batch_size`: Number of records to insert and commit at a time. +- `min_conn_pool_size`, `max_conn_pool_size`: Determine the number of PostgreSQL connections in the connection pool. +- `drop_and_create_index`: Set to True if indexes need to be dropped during insert and re-created once insertion is complete. +- `use_multi_process_for_create_index`: Set to True if indexes need to be re-created in parallel; otherwise, they will be created sequentially. + +

batch_insert_to_postgres_with_multi_process() function

+ +- `pg_conn_details`: Instance of the PgConnectionDetail class containing PostgreSQL server connection details. +- `table_name`: Name of the table for bulk insertion. +- `data_generator`: Python generator containing DataFrames. +- `batch_size`: Number of records to insert and commit at a time. +- `min_conn_pool_size`, `max_conn_pool_size`: Determine the number of PostgreSQL connections in the connection pool. +- `drop_and_create_index`: Set to True if indexes need to be dropped during insert and re-created once insertion is complete. +- `no_of_processes`: Specify the number of cores for multiprocessing. + +

BatchInsert class

+This class serves as the core logic for the utility and is wrapped by the first two utility functions. Users may find it useful if additional logic needs to be developed around the functionality or if a custom sequential or parallel computation logic is required. + +Properties to create an instance of BatchInsert class: +- `batch_size`:Number of records to insert and commit at a time. +- `table_name`: Name of the table for bulk insertion. +- `pg_conn_details`: Instance of the PgConnectionDetail class containing PostgreSQL server connection details. +- `min_conn`, `max_conn`: Determine the number of PostgreSQL connections in the connection pool. + +

Developer Notes:

+ +- The `min_conn` or `min_conn_pool_size` can be either equal to or less than the result of `ceil(total_data_size / batch_size)`. +- The `max_conn` or `max_conn_pool_size` can be either equal to or greater than the result of `ceil(total_data_size / batch_size)`. +- The `no_of_processes` can be set to the number of available cores or left as None for the system to determine the optimal number based on resource availability. +- The ideal `batch_size`, as observed during testing, typically falls within the range of 100,000 to 250,000. However, this recommendation is contingent upon the characteristics of the data and table structure. +The multiprocessing function execution must start in the __main__ block. + +

Package installation:

+ `pip install pg-bulk-loader` + +

Examples:

+ +1. Loading entire dataset once and sending for bulk insert in batches: + +```python +import pandas as pd +import asyncio +from pg_bulk_loader import PgConnectionDetail, batch_insert_to_postgres + + +async def run(): + # Read data. Let's suppose below DataFrame has 20M records + input_data_df = pd.DataFrame() + + # Create Postgres Connection Details object. This will help in creating and managing the database connections + pg_conn_details = PgConnectionDetail( + user="", + password="", + database="", + host="", + port="", + schema="" + ) + + # Data will be inserted and committed in the batch of 2,50,000 + await batch_insert_to_postgres( + pg_conn_details=pg_conn_details, + table_name="", + data_df=input_data_df, + batch_size=250000, + min_conn_pool_size=20, + max_conn_pool_size=25, + use_multi_process_for_create_index=True, + drop_and_create_index=True + ) + + +if __name__ == '__main__': + asyncio.run(run()) +``` + +2. Loading dataset in chunks and sending for bulk insert in batches: + +```python +import pandas as pd +import asyncio +from pg_bulk_loader import PgConnectionDetail, FastLoadHack, BatchInsert + + +async def run(): + # Create Postgres Connection Details object. This will help in creating and managing the database connections + pg_conn_details = PgConnectionDetail( + user="", + password="", + database="", + host="", + port="", + schema="" + ) + batch_ = BatchInsert( + batch_size=250000, + table_name="", + pg_conn_details=pg_conn_details, + min_conn=20, + max_conn=25 + ) + + # If index needs to be dropped before insertion + fast_load_hack = FastLoadHack(pg_conn_details=pg_conn_details, table_name=table_name) + indexes: dict = fast_load_hack.get_indexes() + fast_load_hack.drop_indexes(list(indexes.keys())) + + try: + # Open and create the connections in the connection pool + await batch_.open_connection_pool() + + # Lets load only a chunk of 1M from the csv file of 20M + for input_df in pd.read_csv("file-name.csv", chunksize=1000000): + # This will partition the 1M data into 4 partitions of size 250000 each as the batch_size is 250000. + await batch_.execute(input_df) + finally: + # Close the connection pool + await batch_.close_connection_pool() + # Re-create indexes once insertion is done + fast_load_hack.create_indexes(list(indexes.values()), use_multi_process=True) # Set to True if indexes need to be created parallely + + +if __name__ == '__main__': + asyncio.run(run()) + +``` + +3. Parallel insertion using multiprocessing: + +The below code uses 5 cores and processes 5M records parallely i.e. 1M on one core with 250000 records insertion at a time. + +```python +import pandas as pd +import asyncio +from pg_bulk_loader import PgConnectionDetail, batch_insert_to_postgres_with_multi_process + + +async def run(): + # Create Postgres Connection Details object. This will help in creating and managing the database connections + pg_conn_details = PgConnectionDetail( + user="", + password="", + database="", + host="", + port="", + schema="" + ) + + df_generator = pd.read_csv("20M-file.csv", chunksize=1000000) + + # Data will be inserted and committed in the batch of 2,50,000 + await batch_insert_to_postgres_with_multi_process( + pg_conn_details=pg_conn_details, + table_name="", + data_generator=df_generator, + batch_size=250000, + min_conn_pool_size=20, + max_conn_pool_size=25, + no_of_processes=5, + drop_and_create_index=True + ) + + +# The multiprocessing execution must start in the __main__. +if __name__ == '__main__': + asyncio.run(run()) +``` diff --git a/pyproject.toml b/pyproject.toml new file mode 100644 index 0000000..9e31432 --- /dev/null +++ b/pyproject.toml @@ -0,0 +1,27 @@ +[project] +name = "pg_bulk_loader" +version = "1.0.0" +authors = [ + { name="Aditya Jaroli", email="adityajaroli@gmail.com" }, +] +description = "A utility package to do bulk insertion faster from pandas dataframe to postgres table." +readme = "README.md" +requires-python = ">=3.9" +classifiers = [ + "Programming Language :: Python :: 3.9", + "Programming Language :: Python :: 3.10", + "Programming Language :: Python :: 3.11", + "License :: OSI Approved :: Apache Software License" +] + +dependencies = [ + "pandas", + "psycopg[binary]", + "asyncio", + "psycopg_pool", + "retry" +] + +[project.urls] +Homepage = "https://github.com/adityajaroli/pandas-to-postgres.git" +Issues = "https://github.com/adityajaroli/pandas-to-postgres/issues" \ No newline at end of file diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..2d4f8cf --- /dev/null +++ b/requirements.txt @@ -0,0 +1,5 @@ +pandas +psycopg[binary] +asyncio +psycopg_pool +retry \ No newline at end of file diff --git a/src/pg_bulk_loader/__init__.py b/src/pg_bulk_loader/__init__.py new file mode 100644 index 0000000..a0d4b22 --- /dev/null +++ b/src/pg_bulk_loader/__init__.py @@ -0,0 +1,6 @@ +from .batch.batch_insert import BatchInsert +from .batch.batch_insert_wrapper import batch_insert_to_postgres +from .batch.batch_insert_wrapper import batch_insert_to_postgres_with_multi_process +from .batch.batch_insert_wrapper import run_batch_task +from .batch.fast_load_hack import FastLoadHack +from .batch.pg_connection_detail import PgConnectionDetail diff --git a/src/pg_bulk_loader/batch/__init__.py b/src/pg_bulk_loader/batch/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/src/pg_bulk_loader/batch/batch_insert.py b/src/pg_bulk_loader/batch/batch_insert.py new file mode 100644 index 0000000..70db041 --- /dev/null +++ b/src/pg_bulk_loader/batch/batch_insert.py @@ -0,0 +1,94 @@ +import io +import pandas as pd +import asyncio +from .pg_connection_detail import PgConnectionDetail +from ..utils.dataframe_utils import get_ranges +from ..utils.time_it_decorator import time_it +from retry import retry + + +class BatchInsert: + + def __init__( + self, + batch_size: int, + table_name: str, + pg_conn_details: PgConnectionDetail, + min_conn: int = 5, + max_conn: int = 10 + ): + """ + :param batch_size: Number of records to insert at a time + :param table_name: Name of the table + :param pg_conn_details: Instance of PgConnectionDetail class which contains postgres connection details + :param min_conn: Min PG connections created and saved in connection pool + :param max_conn: Max PG connections created and saved in connection pool + """ + self.batch_size = batch_size + self.pg_conn_details = pg_conn_details + self.table_name = table_name + self.min_conn = min_conn + self.max_conn = max_conn + self.data_df = None + self.pool = self.pg_conn_details.create_connection_pool(min_size=self.min_conn, max_size=self.max_conn) + + @retry(Exception, tries=3, delay=2, backoff=1) + async def open_connection_pool(self): + await self.pool.open(wait=True) + + @retry(Exception, tries=3, delay=2, backoff=1) + async def close_connection_pool(self): + await self.pool.close() + + @time_it + async def execute(self, data_df: pd.DataFrame, col_names: list = None): + """ + :param data_df: Data to be inserted + :param col_names: column(s) to be considered for insert from the data_df + :return: Boolean - indicating whether the insertion was successful or not + """ + try: + partition_ranges = get_ranges(data_df.shape[0], self.batch_size) + print(f"Created {len(partition_ranges)} partitions!") + + if not partition_ranges: + print("warning: No data found to be inserted!") + return + + if col_names: + data_df = data_df[col_names] + + col_names = ",".join(col_names if col_names else data_df.columns) + + # Sharing the data among all processes + self.data_df = data_df + await self.handle_csv_bulk_insert(partition_ranges, col_names) + except Exception as e: + raise e + finally: + self.data_df = None + + async def handle_csv_bulk_insert(self, partition_ranges, col_names): + tasks = [] + # At a time only self.min_conn async threads are allowed to execute + semaphore = asyncio.Semaphore(self.min_conn) + for range_ in partition_ranges: + tasks.append( + self.bulk_load( + range_, f"{self.pg_conn_details.schema}.{self.table_name}", col_names, self.pool, semaphore + ) + ) + await asyncio.gather(*tasks) + + @retry(Exception, tries=3, delay=2, backoff=1) + async def bulk_load(self, range_, table_name: str, col_names: list[str], pool, semaphore): + async with semaphore: + copy_query = f"""COPY {table_name} ({col_names}) FROM STDIN WITH (FORMAT CSV, DELIMITER ',')""" + async with pool.connection(timeout=60) as pg_session: + async with pg_session.cursor() as acur: + async with acur.copy(copy_query) as copy: + with io.StringIO() as io_buffer: + data_df = self.data_df[range_[0]: range_[1]] + data_df.to_csv(io_buffer, header=False, index=False) + io_buffer.seek(0) + await copy.write(io_buffer.read()) diff --git a/src/pg_bulk_loader/batch/batch_insert_wrapper.py b/src/pg_bulk_loader/batch/batch_insert_wrapper.py new file mode 100644 index 0000000..04f1ccc --- /dev/null +++ b/src/pg_bulk_loader/batch/batch_insert_wrapper.py @@ -0,0 +1,145 @@ +from .pg_connection_detail import PgConnectionDetail +from .fast_load_hack import FastLoadHack +from .batch_insert import BatchInsert +import pandas as pd +from ..utils.time_it_decorator import time_it +import asyncio +from concurrent.futures import ProcessPoolExecutor +import math + + +def __optimize_connection_pool_size(min_conn, total_data_size, batch_size): + """ + In case the min connection is given way higher than required, the below logic optimizes the number. + The total number of insert tasks running in coroutines at a time are = (total_data_size / batch_size). + So we need (total_data_size / batch_size) number of minimum connection in the connection pool open and ready. + """ + return min(min_conn, math.ceil(total_data_size/batch_size)) + + +def run_batch_task(data_df, batch_size, pg_conn_details, table_name, min_conn, max_conn): # pragma: no cover + """ + Helper method to achieve multiprocess execution with ProcessPoolExecutor class. + This method can be executed per process. + """ + asyncio.run(run(data_df, batch_size, pg_conn_details, table_name, min_conn, max_conn)) + + +async def run(data_df, batch_size, pg_conn_details, table_name, min_conn, max_conn): + min_conn = __optimize_connection_pool_size(min_conn, data_df.shape[0], batch_size) + + batch_ = BatchInsert( + batch_size=batch_size, + pg_conn_details=pg_conn_details, + table_name=table_name, + min_conn=min_conn, + max_conn=max_conn + ) + try: + await batch_.open_connection_pool() + await batch_.execute(data_df) + finally: + await batch_.close_connection_pool() + + +@time_it +async def batch_insert_to_postgres( + pg_conn_details: PgConnectionDetail, + table_name: str, + data_df: pd.DataFrame, + batch_size: int, + min_conn_pool_size: int = 5, + max_conn_pool_size: int = 10, + use_multi_process_for_create_index: bool = True, + drop_and_create_index: bool = True +): + """ + :param pg_conn_details: Instance of PgConnectionDetail class which contains postgres connection details + :param table_name: Name of the table + :param data_df: Data to be inserted + :param batch_size: Number of records to insert at a time + :param min_conn_pool_size: Min PG connections created and saved in connection pool + :param max_conn_pool_size: Max PG connections created and saved in connection pool + :param use_multi_process_for_create_index: This being True, makes the index(es) creation in parallel + :param drop_and_create_index: This being True, drops the indexes from the table, inserts data and crates them back + Note: Only non-pk indexes are dropped and re-created. + :return: + """ + if not isinstance(data_df, pd.DataFrame) or data_df.empty: + return + + fast_load_hack = FastLoadHack(pg_conn_details=pg_conn_details, table_name=table_name) + indexes = {} + if drop_and_create_index: + indexes: dict = fast_load_hack.get_indexes() + print(f'Indexes to be dropped and re-created: {indexes.keys()}') + fast_load_hack.drop_indexes(list(indexes.keys())) + + try: + await run(data_df, batch_size, pg_conn_details, table_name, min_conn_pool_size, max_conn_pool_size) + except Exception as e: + raise e + finally: + if drop_and_create_index: + fast_load_hack.create_indexes(list(indexes.values()), use_multi_process_for_create_index) + + +@time_it +async def batch_insert_to_postgres_with_multi_process( + pg_conn_details: PgConnectionDetail, + table_name: str, + data_generator, + batch_size: int, + min_conn_pool_size: int = 5, + max_conn_pool_size: int = 10, + no_of_processes: int = 1, + drop_and_create_index: bool = True +): + """ + This wrapper function is useful when you have a data generator on Dataframes + The data_generator is iterated over a loop and every df is given to a separate process. + + :param pg_conn_details: Instance of PgConnectionDetail class which contains postgres connection details + :param table_name: Name of the table + :param data_generator: generator to provide dataset per process + :param batch_size: Number of records to insert at a time + :param min_conn_pool_size: Min PG connections created and saved in connection pool + :param max_conn_pool_size: Max PG connections created and saved in connection pool + :param no_of_processes: int = 1 + :param drop_and_create_index: This being True, drops the indexes from the table, inserts data and crates them back + Note: Only non-pk indexes are dropped and re-created. + :return: + """ + if not data_generator: + raise Exception("Invalid data input!") + + fast_load_hack = FastLoadHack(pg_conn_details=pg_conn_details, table_name=table_name) + indexes = {} + if drop_and_create_index: + indexes = fast_load_hack.get_indexes() + print(f'Indexes to be dropped and re-created: {indexes.keys()}') + fast_load_hack.drop_indexes(list(indexes.keys())) + + try: + loop = asyncio.get_running_loop() + with ProcessPoolExecutor(max_workers=no_of_processes) as executor: + tasks = [] + for df in data_generator: + tasks.append( + loop.run_in_executor( + executor, + run_batch_task, + df, + batch_size, + pg_conn_details, + table_name, + min_conn_pool_size, + max_conn_pool_size + ) + ) + await asyncio.gather(*tasks) + except Exception as e: + raise e + finally: + if drop_and_create_index: + fast_load_hack.create_indexes(list(indexes.values()), use_multi_process=True) diff --git a/src/pg_bulk_loader/batch/fast_load_hack.py b/src/pg_bulk_loader/batch/fast_load_hack.py new file mode 100644 index 0000000..1c56dec --- /dev/null +++ b/src/pg_bulk_loader/batch/fast_load_hack.py @@ -0,0 +1,83 @@ +from .pg_connection_detail import PgConnectionDetail +from concurrent.futures import ProcessPoolExecutor +from ..utils.time_it_decorator import time_it + + +class FastLoadHack: + + def __init__(self, pg_conn_details: PgConnectionDetail, table_name: str): + self.pg_conn_details = pg_conn_details + self.schema = self.pg_conn_details.schema + self.table_name = table_name + + @time_it + def set_table_unlogged(self): # pragma: no cover + pg_session = self.pg_conn_details.get_psycopg_connection() + try: + with pg_session.cursor() as cursor: + query = f"Alter table {self.schema}.{self.table_name} SET UNLOGGED;" + cursor.execute(query) + pg_session.commit() + finally: + pg_session.close() + + @time_it + def set_table_logged(self): # pragma: no cover + pg_session = self.pg_conn_details.get_psycopg_connection() + try: + with pg_session.cursor() as cursor: + query = f"Alter table {self.schema}.{self.table_name} SET LOGGED;" + cursor.execute(query) + pg_session.commit() + finally: + pg_session.close() + + @time_it + def drop_indexes(self, index_names: list[str]): + if not index_names: + return + + pg_session = self.pg_conn_details.get_psycopg_connection() + try: + with pg_session.cursor() as cursor: + query = f"DROP INDEX IF EXISTS {','.join(index_names)};" + cursor.execute(query) + pg_session.commit() + finally: + pg_session.close() + + def create_index(self, index_query: str): + pg_session = self.pg_conn_details.get_psycopg_connection() + try: + with pg_session.cursor() as cursor: + cursor.execute(index_query) + pg_session.commit() + finally: + pg_session.close() + + @time_it + def create_indexes(self, index_queries: list[str], use_multi_process=False): + if use_multi_process: + with ProcessPoolExecutor() as executor: + for index_query in index_queries: + executor.submit(self.create_index, index_query) + else: + for index_query in index_queries: + self.create_index(index_query) + + def get_indexes(self): + pg_session = self.pg_conn_details.get_psycopg_connection() + try: + with pg_session.cursor() as cursor: + query = f""" + select indexname, indexdef from pg_indexes where + tablename='{self.table_name}' and indexdef like 'CREATE INDEX %' + """ + results = cursor.execute(query) + indexes = {} + for result in results: + # Adding schema in front of index name is needed to find and drop the index + indexes[f"{self.schema}.{result[0]}"] = result[1] + return indexes + finally: + pg_session.close() diff --git a/src/pg_bulk_loader/batch/pg_connection_detail.py b/src/pg_bulk_loader/batch/pg_connection_detail.py new file mode 100644 index 0000000..2f080c1 --- /dev/null +++ b/src/pg_bulk_loader/batch/pg_connection_detail.py @@ -0,0 +1,32 @@ +import psycopg +from psycopg_pool import AsyncConnectionPool +from ..utils.constants import SSL_MODE + + +class PgConnectionDetail: + def __init__( + self, user: str, password: str, database: str, schema: str, host: str = "localhost", port: int = 5432 + ): + self.user = user + self.password = password + self.db = database + self.host = host + self.port = port + self.schema = schema + + def create_connection_pool(self, min_size=5, max_size=10): + if not min_size or not max_size: + raise Exception("min and max connection pool size cannot be null or zero!") + + conn_str = f"host={self.host} user={self.user} password={self.password} dbname={self.db} port={self.port} sslmode={SSL_MODE}" + return AsyncConnectionPool(conninfo=conn_str, min_size=min_size, max_size=max_size, open=False) + + def get_psycopg_connection(self): + return psycopg.connect( + host=self.host, + port=self.port, + dbname=self.db, + user=self.user, + password=self.password, + sslmode=SSL_MODE + ) diff --git a/src/pg_bulk_loader/utils/__init__.py b/src/pg_bulk_loader/utils/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/src/pg_bulk_loader/utils/constants.py b/src/pg_bulk_loader/utils/constants.py new file mode 100644 index 0000000..55093ad --- /dev/null +++ b/src/pg_bulk_loader/utils/constants.py @@ -0,0 +1,2 @@ + +SSL_MODE = "prefer" diff --git a/src/pg_bulk_loader/utils/dataframe_utils.py b/src/pg_bulk_loader/utils/dataframe_utils.py new file mode 100644 index 0000000..13b6605 --- /dev/null +++ b/src/pg_bulk_loader/utils/dataframe_utils.py @@ -0,0 +1,34 @@ +import pandas as pd + + +def is_empty(df: pd.DataFrame): + if df is None: + return True + + if not isinstance(df, pd.DataFrame): + raise Exception("Invalid parameter! Data type should be pandas DataFrame") + return df.empty + + +def partition_df(df: pd.DataFrame, partition_size: int): + if not is_empty(df): + if not partition_size: + raise Exception("Invalid partition size.") + + df_size = df.shape[0] + if partition_size > df_size: + partition_size = df_size + + return [df[i:i + partition_size] for i in range(0, df_size, partition_size)] + + +def get_ranges(data_size: int, batch_size: int): + ranges = [] + if isinstance(data_size, int) and isinstance(batch_size, int) and data_size > 0 and batch_size > 0: + start = 0 + end = min(data_size, batch_size) + while start < data_size: + ranges.append((start, end)) + start = end + end = min(data_size, batch_size+end) + return ranges diff --git a/src/pg_bulk_loader/utils/time_it_decorator.py b/src/pg_bulk_loader/utils/time_it_decorator.py new file mode 100644 index 0000000..1996d0b --- /dev/null +++ b/src/pg_bulk_loader/utils/time_it_decorator.py @@ -0,0 +1,25 @@ +import asyncio +import functools +import time +from contextlib import contextmanager + + +def time_it(func): + + @contextmanager + def wrapping_logic(): + start_time = time.time() + yield + print(f'Function {func.__name__} executed in {(time.time() - start_time):.4f}s') + + @functools.wraps(func) + def wrapper(*args, **kwargs): + if not asyncio.iscoroutinefunction(func): + with wrapping_logic(): + return func(*args, **kwargs) + else: + async def async_func(): + with wrapping_logic(): + return await func(*args, **kwargs) + return async_func() + return wrapper diff --git a/test-requirements.txt b/test-requirements.txt new file mode 100644 index 0000000..fdbc304 --- /dev/null +++ b/test-requirements.txt @@ -0,0 +1,3 @@ +pytest +testing.postgresql +pytest-cov \ No newline at end of file diff --git a/tests/__init__.py b/tests/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/tests/unit/__init__.py b/tests/unit/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/tests/unit/aopd-1k.csv b/tests/unit/aopd-1k.csv new file mode 100644 index 0000000..20c1a00 --- /dev/null +++ b/tests/unit/aopd-1k.csv @@ -0,0 +1,1001 @@ +p_code,s_code,_from,upto,mean,ss +109150723,739,2022-05-13,2022-05-14,0.0,0.0 +109150723,739,2022-05-12,2022-05-13,0.0,0.0 +109150723,739,2022-05-11,2022-05-12,0.0,0.0 +110268016,739,2022-05-05,2022-05-06,0.0082453763346883,0.47 +109150723,739,2022-09-12,2022-09-19,0.0978812016546726,0.21 +109150723,739,2022-09-19,2022-09-26,0.0979216635460034,0.21 +109150723,739,2022-09-26,2022-10-03,0.0994180503766983,0.22 +109150723,739,2022-10-03,2022-10-10,0.0984541843645274,0.22 +109150766,739,2022-06-06,2022-06-13,0.0832208071762429,0.2 +109150766,739,2022-05-30,2022-06-06,0.0914846558209531,0.21 +109150766,739,2022-05-23,2022-05-30,0.0932753320711836,0.21 +109150766,739,2022-05-18,2022-05-23,0.0489516025343807,0.18 +109150766,739,2022-05-17,2022-05-18,0.0,0.0 +109150766,739,2022-05-16,2022-05-17,0.0,0.0 +109150766,739,2022-05-15,2022-05-16,0.0,0.0 +109150766,739,2022-05-14,2022-05-15,0.0,0.0 +109150766,739,2022-05-13,2022-05-14,0.0,0.0 +109150766,739,2022-05-12,2022-05-13,0.0,0.0 +109150766,739,2022-05-11,2022-05-12,0.0,0.0 +109150766,739,2022-05-10,2022-05-11,0.0,0.0 +109150766,739,2022-06-20,2022-06-27,0.082907002302746,0.2 +109150766,739,2022-05-09,2022-05-10,0.0,0.0 +109150766,739,2022-05-07,2022-05-08,0.0,0.0 +109150766,739,2022-05-06,2022-05-07,0.0,0.0 +109150766,739,2022-05-05,2022-05-06,0.0,0.0 +109150766,739,2022-05-04,2022-05-05,16.0,10.25 +109150723,739,2022-11-28,2022-11-29,0.0153780167456716,0.23 +109150723,739,2022-11-21,2022-11-28,0.1016436667414382,0.22 +109150723,739,2022-11-14,2022-11-21,0.1002560807392001,0.22 +109150723,739,2022-11-07,2022-11-14,0.1000105660641565,0.22 +109150723,739,2022-10-31,2022-11-07,0.1002542023779824,0.22 +109150723,739,2022-10-24,2022-10-31,0.0993918041931465,0.22 +109150723,739,2022-10-17,2022-10-24,0.0996524725342169,0.22 +109150723,739,2022-10-10,2022-10-17,0.0988076773937791,0.22 +109150766,739,2022-05-08,2022-05-09,0.0,0.0 +109675829,739,2022-08-01,2022-08-08,0.09961079119239,0.22 +109155031,739,2022-10-17,2022-10-24,0.0844521189574152,0.2 +109155031,739,2022-10-31,2022-11-07,0.0844602733850479,0.2 +109589269,739,2022-10-17,2022-10-24,0.0846250071190297,0.2 +109589269,739,2022-10-10,2022-10-17,0.0843005670467391,0.2 +109589269,739,2022-10-03,2022-10-10,0.0844715965213254,0.2 +109589269,739,2022-09-26,2022-10-03,0.0854427951853722,0.2 +109589269,739,2022-09-19,2022-09-26,0.0845987130887806,0.2 +109589269,739,2022-09-12,2022-09-19,0.0849001915194094,0.2 +109589269,739,2022-09-05,2022-09-12,0.0859538302756846,0.2 +109589269,739,2022-08-29,2022-09-05,0.0856689328793436,0.2 +109589269,739,2022-08-22,2022-08-29,0.0841114720096811,0.2 +109589269,739,2022-08-15,2022-08-22,0.0849197034258395,0.2 +109589269,739,2022-08-08,2022-08-15,0.0849159327335655,0.2 +109589269,739,2022-08-01,2022-08-08,0.0851634622085839,0.2 +109589269,739,2022-07-25,2022-08-01,0.084855246823281,0.2 +109589269,739,2022-07-18,2022-07-25,0.0851541478186845,0.2 +109589269,739,2022-07-11,2022-07-18,0.084344775066711,0.2 +109589269,739,2022-07-04,2022-07-11,0.0831982772797346,0.2 +109589269,739,2022-06-27,2022-07-04,0.0833700085058807,0.2 +109589269,739,2022-06-20,2022-06-27,0.0836058153071494,0.2 +109589269,739,2022-06-13,2022-06-20,0.081362164599905,0.2 +109589269,739,2022-06-06,2022-06-13,0.083369270083703,0.2 +109589269,739,2022-05-30,2022-06-06,0.0917130832220066,0.21 +109589269,739,2022-05-23,2022-05-30,0.0914965536330191,0.21 +109589269,739,2022-05-18,2022-05-23,0.0482658544678572,0.18 +109589269,739,2022-05-17,2022-05-18,0.0,0.0 +109589269,739,2022-05-16,2022-05-17,0.0,0.0 +109589269,739,2022-10-24,2022-10-31,0.0838667562929913,0.2 +109589269,739,2022-10-31,2022-11-07,0.0837847992079332,0.2 +109589269,739,2022-11-07,2022-11-14,0.0831224721623584,0.2 +109589269,739,2022-11-14,2022-11-21,0.0829029905144125,0.2 +109675829,739,2022-07-18,2022-07-25,0.1001523316372186,0.22 +109675829,739,2022-07-11,2022-07-18,0.0990969648119062,0.22 +109675829,739,2022-07-04,2022-07-11,0.0975623258855193,0.21 +109675829,739,2022-06-27,2022-07-04,0.0975572842871769,0.21 +109675829,739,2022-06-20,2022-06-27,0.0974552547558216,0.21 +109675829,739,2022-06-13,2022-06-20,0.0943866496361455,0.21 +109675829,739,2022-06-06,2022-06-13,0.0962238443526624,0.21 +109675829,739,2022-05-30,2022-06-06,0.1038794486087643,0.22 +109675829,739,2022-05-23,2022-05-30,0.1035010503686434,0.22 +109675829,739,2022-05-18,2022-05-23,0.0550199110117693,0.19 +109675829,739,2022-05-17,2022-05-18,0.0,0.0 +109675829,739,2022-05-16,2022-05-17,0.0,0.0 +109589269,739,2022-05-15,2022-05-16,0.0,0.0 +109675829,739,2022-05-15,2022-05-16,0.0,0.0 +109675829,739,2022-05-13,2022-05-14,0.0,0.0 +109675829,739,2022-05-12,2022-05-13,0.0,0.0 +109675829,739,2022-05-11,2022-05-12,0.0,0.0 +109675829,739,2022-05-10,2022-05-11,0.0,0.0 +109675829,739,2022-05-09,2022-05-10,0.0,0.0 +109675829,739,2022-05-08,2022-05-09,0.0,0.0 +109675829,739,2022-05-07,2022-05-08,0.0,0.0 +109675829,739,2022-05-06,2022-05-07,0.0,0.0 +109675829,739,2022-05-05,2022-05-06,0.0,0.0 +109675829,739,2022-05-04,2022-05-05,6.0,3.84 +109589269,739,2022-11-28,2022-11-29,0.0125226398231461,0.2 +109589269,739,2022-11-21,2022-11-28,0.0834363886388018,0.2 +109675829,739,2022-05-14,2022-05-15,0.0,0.0 +109155031,739,2022-10-24,2022-10-31,0.0841359675396233,0.2 +109589269,739,2022-05-14,2022-05-15,0.0,0.0 +109589269,739,2022-05-12,2022-05-13,0.0,0.0 +109589234,739,2022-06-27,2022-07-04,0.0974994044518097,0.21 +109589234,739,2022-06-20,2022-06-27,0.097746436134814,0.21 +109589234,739,2022-06-13,2022-06-20,0.0958331376403611,0.21 +109589234,739,2022-06-06,2022-06-13,0.0974191894191227,0.21 +109589234,739,2022-05-30,2022-06-06,0.1050753719614819,0.22 +109589234,739,2022-05-23,2022-05-30,0.1048266658117137,0.22 +109589234,739,2022-05-18,2022-05-23,0.0623407636345918,0.25 +109589234,739,2022-05-17,2022-05-18,0.0126811663872699,0.5 +109589234,739,2022-05-16,2022-05-17,0.0,0.0 +109589234,739,2022-05-15,2022-05-16,0.0125521247232309,0.5 +109589234,739,2022-05-14,2022-05-15,0.0,0.0 +109589234,739,2022-05-13,2022-05-14,0.0061335615618323,0.35 +109589234,739,2022-05-12,2022-05-13,0.0184585677148911,0.6 +109589234,739,2022-05-11,2022-05-12,0.006105302457545,0.35 +109589234,739,2022-05-10,2022-05-11,0.0185838307188273,0.6 +109589234,739,2022-05-09,2022-05-10,0.0063895907991293,0.35 +109589234,739,2022-05-08,2022-05-09,0.0254301767273854,0.71 +109589234,739,2022-05-07,2022-05-08,0.0,0.0 +109589234,739,2022-05-06,2022-05-07,0.0063331264382251,0.35 +109589234,739,2022-05-05,2022-05-06,0.0063581981816192,0.35 +109589234,739,2022-05-04,2022-05-05,6.0,3.84 +109155031,739,2022-11-28,2022-11-29,0.0126545421080663,0.2 +109155031,739,2022-11-21,2022-11-28,0.0848973679821938,0.2 +109155031,739,2022-11-14,2022-11-21,0.0841585349990055,0.2 +109155031,739,2022-11-07,2022-11-14,0.0841271069366484,0.2 +109589234,739,2022-07-04,2022-07-11,0.097214794717729,0.21 +109589234,739,2022-07-11,2022-07-18,0.0985644776374101,0.22 +109589234,739,2022-07-18,2022-07-25,0.0994999340036884,0.22 +109589234,739,2022-07-25,2022-08-01,0.0990446821087971,0.22 +109589269,739,2022-05-11,2022-05-12,0.0,0.0 +109589269,739,2022-05-10,2022-05-11,0.0,0.0 +109589269,739,2022-05-09,2022-05-10,0.0,0.0 +109589269,739,2022-05-08,2022-05-09,0.0,0.0 +109589269,739,2022-05-07,2022-05-08,0.0,0.0 +109589269,739,2022-05-06,2022-05-07,0.0,0.0 +109589269,739,2022-05-05,2022-05-06,0.0,0.0 +109589269,739,2022-05-04,2022-05-05,24.0,15.38 +109589234,739,2022-11-28,2022-11-29,0.01481117925141,0.22 +109589234,739,2022-11-21,2022-11-28,0.1006175657967105,0.22 +109589234,739,2022-11-14,2022-11-21,0.0995113085955381,0.22 +109589234,739,2022-11-07,2022-11-14,0.0996564836241304,0.22 +109589269,739,2022-05-13,2022-05-14,0.0,0.0 +109589234,739,2022-10-31,2022-11-07,0.1002299031242728,0.22 +109589234,739,2022-10-17,2022-10-24,0.1001109345816075,0.22 +109589234,739,2022-10-10,2022-10-17,0.0994364008074626,0.22 +109589234,739,2022-10-03,2022-10-10,0.0992104132892564,0.22 +109589234,739,2022-09-26,2022-10-03,0.1002826859476044,0.22 +109589234,739,2022-09-19,2022-09-26,0.0987874733982607,0.22 +109589234,739,2022-09-12,2022-09-19,0.0986997067229822,0.22 +109589234,739,2022-09-05,2022-09-12,0.0998490400379523,0.22 +109589234,739,2022-08-29,2022-09-05,0.0996849157381802,0.22 +109589234,739,2022-08-22,2022-08-29,0.0976282729534432,0.21 +109589234,739,2022-08-15,2022-08-22,0.098370891646482,0.22 +109589234,739,2022-08-08,2022-08-15,0.0985164324520155,0.22 +109589234,739,2022-08-01,2022-08-08,0.0988995265215635,0.22 +109589234,739,2022-10-24,2022-10-31,0.0996903333580121,0.22 +110268016,739,2022-05-06,2022-05-07,0.0084249448788398,0.47 +111502610,739,2022-05-09,2022-05-10,0.0,0.0 +110268016,739,2022-05-08,2022-05-09,0.0,0.0 +111536140,739,2022-09-19,2022-09-26,0.0156842798460274,0.09 +111536140,739,2022-09-12,2022-09-19,0.0155156560940667,0.09 +111536140,739,2022-09-05,2022-09-12,0.0157209873432293,0.09 +111536140,739,2022-08-29,2022-09-05,0.0158969917101785,0.09 +111536140,739,2022-08-22,2022-08-29,0.0157827695365995,0.09 +111536140,739,2022-08-15,2022-08-22,0.0157625485444441,0.09 +111536140,739,2022-08-08,2022-08-15,0.0159852813230827,0.09 +111536140,739,2022-08-01,2022-08-08,0.0161642609164118,0.09 +111536140,739,2022-07-25,2022-08-01,0.016328401165083,0.09 +111536140,739,2022-07-18,2022-07-25,0.0162041431758552,0.09 +111536140,739,2022-07-11,2022-07-18,0.016258523100987,0.09 +111536140,739,2022-07-04,2022-07-11,0.0161987981991842,0.09 +111536140,739,2022-06-27,2022-07-04,0.0163215219508856,0.09 +111536140,739,2022-06-20,2022-06-27,0.0163770980434344,0.09 +111536140,739,2022-06-13,2022-06-20,0.0156305627735131,0.09 +111536140,739,2022-06-06,2022-06-13,0.0164337454036251,0.09 +111536140,739,2022-05-30,2022-06-06,0.0168352932916422,0.09 +111536140,739,2022-05-23,2022-05-30,0.0158091317402392,0.09 +111536140,739,2022-05-18,2022-05-23,0.0094381572726267,0.08 +111536140,739,2022-05-17,2022-05-18,0.0,0.0 +111536140,739,2022-05-16,2022-05-17,0.0,0.0 +111536140,739,2022-05-15,2022-05-16,0.0,0.0 +111536140,739,2022-05-13,2022-05-14,0.0,0.0 +111536140,739,2022-05-12,2022-05-13,0.0,0.0 +111536140,739,2022-05-11,2022-05-12,0.0,0.0 +111536140,739,2022-09-26,2022-10-03,0.0160677097737789,0.09 +111536140,739,2022-10-03,2022-10-10,0.0160232232883572,0.09 +111536140,739,2022-10-10,2022-10-17,0.0160919990157708,0.09 +111536140,739,2022-10-17,2022-10-24,0.0163452158449217,0.09 +110289079,739,2022-05-12,2022-05-13,0.0,0.0 +111536191,739,2022-07-04,2022-07-11,0.0166924218647182,0.09 +111536191,739,2022-06-27,2022-07-04,0.0166890122927725,0.09 +111536191,739,2022-06-20,2022-06-27,0.0166130644501965,0.09 +111536191,739,2022-06-13,2022-06-20,0.0159846442816487,0.09 +111536191,739,2022-06-06,2022-06-13,0.01647167922421,0.09 +111536191,739,2022-05-30,2022-06-06,0.0167164986126059,0.09 +111536191,739,2022-05-23,2022-05-30,0.0156705170350496,0.09 +111536191,739,2022-05-18,2022-05-23,0.00946403147299,0.08 +111536191,739,2022-05-17,2022-05-18,0.0,0.0 +111536191,739,2022-05-16,2022-05-17,0.006333736604819,0.35 +111536191,739,2022-05-15,2022-05-16,0.0124146667723972,0.49 +111536191,739,2022-05-13,2022-05-14,0.0,0.0 +111536140,739,2022-05-10,2022-05-11,0.0,0.0 +111536191,739,2022-05-12,2022-05-13,0.0123059323578189,0.49 +111536191,739,2022-05-10,2022-05-11,0.0060481086724197,0.35 +111536191,739,2022-05-09,2022-05-10,0.0065370818737215,0.36 +111536191,739,2022-05-08,2022-05-09,0.0123879460882125,0.49 +111536191,739,2022-05-06,2022-05-07,0.0,0.0 +111536191,739,2022-05-05,2022-05-06,0.0,0.0 +111536191,739,2022-05-04,2022-05-05,0.0,0.0 +111536140,739,2022-11-28,2022-11-29,0.0024928790517151,0.09 +111536140,739,2022-11-21,2022-11-28,0.0171406961744651,0.09 +111536140,739,2022-11-14,2022-11-21,0.0167495094938203,0.09 +111536140,739,2022-11-07,2022-11-14,0.016774044604972,0.09 +111536140,739,2022-10-31,2022-11-07,0.0166998178465291,0.09 +111536140,739,2022-10-24,2022-10-31,0.0170125904260203,0.09 +111536191,739,2022-05-11,2022-05-12,0.0,0.0 +111536191,739,2022-07-11,2022-07-18,0.0168829971225932,0.09 +111536140,739,2022-05-09,2022-05-10,0.0,0.0 +111536140,739,2022-05-06,2022-05-07,0.0,0.0 +111502610,739,2022-05-18,2022-05-23,0.0135686564536009,0.09 +111502610,739,2022-05-17,2022-05-18,0.0,0.0 +111502610,739,2022-05-16,2022-05-17,0.0,0.0 +111502610,739,2022-05-15,2022-05-16,0.0,0.0 +111502610,739,2022-05-13,2022-05-14,0.0,0.0 +111502610,739,2022-05-12,2022-05-13,0.0,0.0 +111502610,739,2022-05-11,2022-05-12,0.0,0.0 +111502610,739,2022-05-10,2022-05-11,0.0,0.0 +111502610,739,2022-05-08,2022-05-09,0.0,0.0 +111502610,739,2022-05-06,2022-05-07,0.0,0.0 +111502610,739,2022-05-05,2022-05-06,0.0,0.0 +111502610,739,2022-05-04,2022-05-05,0.0,0.0 +111009930,739,2022-11-28,2022-11-29,0.0132533906726166,0.21 +111009930,739,2022-11-21,2022-11-28,0.0897767633432522,0.21 +111009930,739,2022-11-14,2022-11-21,0.0889660846441984,0.2 +111009930,739,2022-11-07,2022-11-14,0.0889716584933921,0.2 +111009930,739,2022-10-31,2022-11-07,0.0893941131653264,0.21 +111009930,739,2022-10-24,2022-10-31,0.089220373542048,0.2 +111009930,739,2022-10-17,2022-10-24,0.0898433647817,0.21 +111009930,739,2022-10-10,2022-10-17,0.0893804191146045,0.21 +111009930,739,2022-10-03,2022-10-10,0.0893603379372507,0.21 +111009930,739,2022-09-26,2022-10-03,0.090198609046638,0.21 +111009930,739,2022-09-19,2022-09-26,0.0891662868671119,0.2 +111009930,739,2022-09-12,2022-09-19,0.0893547526793554,0.21 +111009930,739,2022-09-05,2022-09-12,0.090381403802894,0.21 +111502610,739,2022-05-23,2022-05-30,0.0295641981844956,0.12 +111502610,739,2022-05-30,2022-06-06,0.0269688569518647,0.11 +111502610,739,2022-06-06,2022-06-13,0.0243480293029357,0.11 +111502610,739,2022-06-13,2022-06-20,0.0233754171484649,0.1 +111536140,739,2022-05-05,2022-05-06,0.0,0.0 +111536140,739,2022-05-04,2022-05-05,0.0,0.0 +111502610,739,2022-11-28,2022-11-29,0.0037754056975245,0.11 +111502610,739,2022-11-21,2022-11-28,0.02545342000667,0.11 +111502610,739,2022-11-14,2022-11-21,0.0250848738942295,0.11 +111502610,739,2022-11-07,2022-11-14,0.0254965212661772,0.11 +111502610,739,2022-10-31,2022-11-07,0.0252293503144755,0.11 +111502610,739,2022-10-24,2022-10-31,0.0257901484146714,0.11 +111502610,739,2022-10-17,2022-10-24,0.0256695265416055,0.11 +111502610,739,2022-10-10,2022-10-17,0.0253414378967136,0.11 +111502610,739,2022-10-03,2022-10-10,0.0252624880522489,0.11 +111502610,739,2022-09-26,2022-10-03,0.0253287703962996,0.11 +111536140,739,2022-05-08,2022-05-09,0.0,0.0 +111502610,739,2022-09-19,2022-09-26,0.0250339955091476,0.11 +111502610,739,2022-09-05,2022-09-12,0.0250628622015938,0.11 +111502610,739,2022-08-29,2022-09-05,0.0249254339141771,0.11 +111502610,739,2022-08-22,2022-08-29,0.0246386846993118,0.11 +111502610,739,2022-08-15,2022-08-22,0.0244375594193115,0.11 +111502610,739,2022-08-08,2022-08-15,0.0244136783294379,0.11 +111502610,739,2022-08-01,2022-08-08,0.0242815412348136,0.11 +111502610,739,2022-07-25,2022-08-01,0.0242067648796364,0.11 +111502610,739,2022-07-18,2022-07-25,0.0239915903657674,0.11 +111502610,739,2022-07-11,2022-07-18,0.0238956415560096,0.11 +111502610,739,2022-07-04,2022-07-11,0.0237326703025028,0.11 +111502610,739,2022-06-27,2022-07-04,0.0237209424376487,0.11 +111502610,739,2022-06-20,2022-06-27,0.0240458481712024,0.11 +111502610,739,2022-09-12,2022-09-19,0.0247613521059975,0.11 +111009930,739,2022-08-29,2022-09-05,0.0899830118287354,0.21 +111536191,739,2022-07-18,2022-07-25,0.0169458166928961,0.09 +111536191,739,2022-08-01,2022-08-08,0.0170946201542392,0.09 +111785592,739,2022-07-18,2022-07-25,0.0960307719865813,0.21 +111785592,739,2022-07-11,2022-07-18,0.0953207767084241,0.21 +111785592,739,2022-07-04,2022-07-11,0.0942573420898988,0.21 +111785592,739,2022-06-27,2022-07-04,0.0946943045817314,0.21 +111785592,739,2022-06-20,2022-06-27,0.0953218171809912,0.21 +111785592,739,2022-06-13,2022-06-20,0.0932994596882265,0.21 +111785592,739,2022-06-06,2022-06-13,0.0956083452079284,0.21 +111785592,739,2022-05-30,2022-06-06,0.1043532444937624,0.22 +111785592,739,2022-05-23,2022-05-30,0.1055482039522894,0.22 +111785592,739,2022-05-18,2022-05-23,0.0565788382080914,0.19 +111785592,739,2022-05-17,2022-05-18,0.0,0.0 +111785592,739,2022-05-16,2022-05-17,0.0,0.0 +111785592,739,2022-05-15,2022-05-16,0.0,0.0 +111785592,739,2022-05-14,2022-05-15,0.0,0.0 +111785592,739,2022-05-13,2022-05-14,0.0,0.0 +111785592,739,2022-05-12,2022-05-13,0.0,0.0 +111785592,739,2022-05-11,2022-05-12,0.0,0.0 +111785592,739,2022-05-10,2022-05-11,0.0,0.0 +111785592,739,2022-05-09,2022-05-10,0.0,0.0 +111785592,739,2022-05-08,2022-05-09,0.0,0.0 +111785592,739,2022-05-07,2022-05-08,0.0,0.0 +111785592,739,2022-05-06,2022-05-07,0.0,0.0 +111785592,739,2022-05-05,2022-05-06,0.0,0.0 +111785592,739,2022-05-04,2022-05-05,24.0,15.38 +111547586,739,2022-11-28,2022-11-29,0.0130801668856292,0.21 +111785592,739,2022-07-25,2022-08-01,0.0956966977789998,0.21 +111785592,739,2022-08-01,2022-08-08,0.095628856094554,0.21 +111785592,739,2022-08-08,2022-08-15,0.0952591101583093,0.21 +111785592,739,2022-08-15,2022-08-22,0.0951009591761976,0.21 +109150723,739,2022-05-10,2022-05-11,0.0,0.0 +111999654,739,2022-05-14,2022-05-16,0.0010491333826643,0.04 +111999654,739,2022-05-13,2022-05-14,0.0,0.0 +111999654,739,2022-05-12,2022-05-13,0.0,0.0 +111999654,739,2022-05-11,2022-05-12,0.0,0.0 +111999654,739,2022-05-10,2022-05-11,0.0,0.0 +111999654,739,2022-05-09,2022-05-10,0.0,0.0 +111999654,739,2022-05-07,2022-05-08,0.0,0.0 +111999654,739,2022-05-06,2022-05-07,0.0,0.0 +111999654,739,2022-05-05,2022-05-06,0.0,0.0 +111999654,739,2022-05-04,2022-05-05,0.0,0.0 +111785592,739,2022-11-28,2022-11-29,0.0145125775597989,0.22 +111547586,739,2022-11-21,2022-11-28,0.0875210228841751,0.2 +111785592,739,2022-11-21,2022-11-28,0.0962243915526196,0.21 +111785592,739,2022-11-07,2022-11-14,0.0954434075132012,0.21 +111785592,739,2022-10-31,2022-11-07,0.09607977266144,0.21 +111785592,739,2022-10-24,2022-10-31,0.0956817366378381,0.21 +111785592,739,2022-10-17,2022-10-24,0.0963063214868307,0.21 +111785592,739,2022-10-10,2022-10-17,0.0957244431329891,0.21 +111785592,739,2022-10-03,2022-10-10,0.0955824819821864,0.21 +111785592,739,2022-09-26,2022-10-03,0.0966134455045685,0.21 +111785592,739,2022-09-19,2022-09-26,0.0953411895055323,0.21 +111785592,739,2022-09-12,2022-09-19,0.0952997149862349,0.21 +111785592,739,2022-09-05,2022-09-12,0.0963932016594335,0.21 +111785592,739,2022-08-29,2022-09-05,0.0961450963513925,0.21 +111785592,739,2022-08-22,2022-08-29,0.0943036465169862,0.21 +111785592,739,2022-11-14,2022-11-21,0.0952233079113066,0.21 +111536191,739,2022-07-25,2022-08-01,0.0171808692393824,0.09 +111547586,739,2022-11-14,2022-11-21,0.0866678413003683,0.2 +111547586,739,2022-10-31,2022-11-07,0.0864398425910621,0.2 +111547586,739,2022-05-11,2022-05-12,0.0252094056623999,0.81 +111547586,739,2022-05-10,2022-05-11,0.0089639518826327,0.49 +111547586,739,2022-05-09,2022-05-10,0.02632808357294,0.83 +111547586,739,2022-05-08,2022-05-09,0.0081522732765177,0.46 +111547586,739,2022-05-07,2022-05-08,0.0170885455474278,0.67 +111547586,739,2022-05-06,2022-05-07,0.0170706947939249,0.67 +111547586,739,2022-05-05,2022-05-06,0.016751077156683,0.66 +111547586,739,2022-05-04,2022-05-05,0.0,0.0 +111536191,739,2022-11-28,2022-11-29,0.0024855061201378,0.09 +111536191,739,2022-11-21,2022-11-28,0.0166791449300944,0.09 +111536191,739,2022-11-14,2022-11-21,0.0163297313265502,0.09 +111536191,739,2022-11-07,2022-11-14,0.016420497209765,0.09 +111536191,739,2022-10-31,2022-11-07,0.0164596157846972,0.09 +111536191,739,2022-10-24,2022-10-31,0.0168790522729977,0.09 +111536191,739,2022-10-17,2022-10-24,0.0163680594414472,0.09 +111536191,739,2022-10-10,2022-10-17,0.0162704137619584,0.09 +111536191,739,2022-10-03,2022-10-10,0.01635962666478,0.09 +111536191,739,2022-09-26,2022-10-03,0.0165627858368679,0.09 +111536191,739,2022-09-19,2022-09-26,0.0163127720588818,0.09 +111536191,739,2022-09-12,2022-09-19,0.0162632534047588,0.09 +111536191,739,2022-09-05,2022-09-12,0.0165834726067259,0.09 +111536191,739,2022-08-29,2022-09-05,0.0168446659808978,0.09 +111536191,739,2022-08-22,2022-08-29,0.0167606549803167,0.09 +111536191,739,2022-08-15,2022-08-22,0.0167479751398786,0.09 +111536191,739,2022-08-08,2022-08-15,0.0169585653347894,0.09 +111547586,739,2022-05-12,2022-05-13,0.0088857576975165,0.48 +111547586,739,2022-05-13,2022-05-14,16.08152344527531,10.41 +111547586,739,2022-05-14,2022-05-15,0.0091708469100852,0.49 +111547586,739,2022-05-15,2022-05-16,0.0,0.0 +111547586,739,2022-10-24,2022-10-31,0.0857262898935005,0.2 +111547586,739,2022-10-17,2022-10-24,0.0857499378034845,0.2 +111547586,739,2022-10-10,2022-10-17,0.0846008488442748,0.2 +111547586,739,2022-10-03,2022-10-10,0.083818997722119,0.2 +111547586,739,2022-09-26,2022-10-03,0.0838389500277116,0.2 +111547586,739,2022-09-19,2022-09-26,0.0821787598542869,0.2 +111547586,739,2022-09-12,2022-09-19,0.08174076420255,0.2 +111547586,739,2022-09-05,2022-09-12,0.082119247294031,0.2 +111547586,739,2022-08-29,2022-09-05,0.0813240549759939,0.2 +111547586,739,2022-08-22,2022-08-29,0.0795165194431319,0.19 +111547586,739,2022-08-15,2022-08-22,0.0801344270585104,0.19 +111547586,739,2022-08-08,2022-08-15,0.0801195475505665,0.19 +111547586,739,2022-11-07,2022-11-14,0.0864445411134511,0.2 +111547586,739,2022-08-01,2022-08-08,0.0804602483985945,0.19 +111547586,739,2022-07-18,2022-07-25,0.0810309443622827,0.2 +111547586,739,2022-07-11,2022-07-18,0.0808458901010453,0.2 +111547586,739,2022-07-04,2022-07-11,0.0803333383519202,0.19 +111547586,739,2022-06-27,2022-07-04,0.0810382672352715,0.2 +111547586,739,2022-06-20,2022-06-27,0.0818534037097509,0.2 +111547586,739,2022-06-13,2022-06-20,0.080797869858898,0.19 +111547586,739,2022-06-06,2022-06-13,0.0827273372127684,0.2 +111547586,739,2022-05-30,2022-06-06,0.0910916870264447,0.21 +111547586,739,2022-05-23,2022-05-30,0.0919728469169981,0.21 +111547586,739,2022-05-18,2022-05-23,0.0892697426295186,0.5 +111547586,739,2022-05-17,2022-05-18,64.00856771760935,41.03 +111547586,739,2022-05-16,2022-05-17,0.0420407051135329,1.05 +111547586,739,2022-07-25,2022-08-01,0.0804131058976054,0.19 +110268016,739,2022-05-07,2022-05-08,0.008569859672133,0.47 +111009930,739,2022-08-22,2022-08-29,0.0881934948265552,0.2 +111009930,739,2022-08-08,2022-08-15,0.0888655370799824,0.2 +110305171,739,2022-05-08,2022-05-09,0.0,0.0 +110305171,739,2022-05-07,2022-05-08,0.0,0.0 +110305171,739,2022-05-06,2022-05-07,0.0,0.0 +110305171,739,2022-05-05,2022-05-06,0.0,0.0 +110305171,739,2022-05-04,2022-05-05,0.0,0.0 +110289079,739,2022-11-28,2022-11-29,0.0038511757738888,0.11 +110289079,739,2022-11-21,2022-11-28,0.0263730062870308,0.11 +110289079,739,2022-11-14,2022-11-21,0.0258616969222202,0.11 +110289079,739,2022-11-07,2022-11-14,0.0261578903300687,0.11 +110289079,739,2022-10-31,2022-11-07,0.0257797332014888,0.11 +110289079,739,2022-10-24,2022-10-31,0.0262931636534631,0.11 +110289079,739,2022-10-17,2022-10-24,0.02607882686425,0.11 +110289079,739,2022-10-10,2022-10-17,0.0257543593179434,0.11 +110289079,739,2022-10-03,2022-10-10,0.025726439198479,0.11 +110289079,739,2022-09-26,2022-10-03,0.0258947889087721,0.11 +110289079,739,2022-09-19,2022-09-26,0.0257438763510435,0.11 +110289079,739,2022-09-12,2022-09-19,0.0256576196989044,0.11 +110289079,739,2022-09-05,2022-09-12,0.02620756754186,0.11 +110289079,739,2022-08-29,2022-09-05,0.0263383314013481,0.11 +110289079,739,2022-08-22,2022-08-29,0.026333054411225,0.11 +110289079,739,2022-08-15,2022-08-22,0.0264115764293819,0.11 +110289079,739,2022-08-08,2022-08-15,0.0266757555073127,0.11 +110289079,739,2022-08-01,2022-08-08,0.0267938085598871,0.11 +110289079,739,2022-07-25,2022-08-01,0.0269442602293565,0.11 +110289079,739,2022-07-18,2022-07-25,0.026889426051639,0.11 +110305171,739,2022-05-09,2022-05-10,0.0,0.0 +110305171,739,2022-05-10,2022-05-11,0.0,0.0 +110305171,739,2022-05-11,2022-05-12,0.0,0.0 +110305171,739,2022-05-12,2022-05-13,0.0,0.0 +110305171,739,2022-10-03,2022-10-10,0.1004337870981544,0.22 +110305171,739,2022-09-26,2022-10-03,0.1014251877786591,0.22 +110305171,739,2022-09-19,2022-09-26,0.09980856673792,0.22 +110305171,739,2022-09-12,2022-09-19,0.0996236879145726,0.22 +110305171,739,2022-09-05,2022-09-12,0.1006606509909033,0.22 +110305171,739,2022-08-29,2022-09-05,0.1003841110505163,0.22 +110305171,739,2022-08-22,2022-08-29,0.0981635018251836,0.21 +110305171,739,2022-08-15,2022-08-22,0.0987561446381732,0.22 +110305171,739,2022-08-08,2022-08-15,0.0987386805936694,0.22 +110305171,739,2022-08-01,2022-08-08,0.0989755452610552,0.22 +110305171,739,2022-07-25,2022-08-01,0.0990039751632139,0.22 +110305171,739,2022-07-18,2022-07-25,0.0993274996289983,0.22 +110289079,739,2022-07-11,2022-07-18,0.0268849428975954,0.11 +110305171,739,2022-07-11,2022-07-18,0.0983466032193973,0.22 +110305171,739,2022-06-27,2022-07-04,0.0972549534635617,0.21 +110305171,739,2022-06-20,2022-06-27,0.0975179313681115,0.21 +110305171,739,2022-06-13,2022-06-20,0.0955372367925323,0.21 +110305171,739,2022-06-06,2022-06-13,0.0974004992747245,0.21 +110305171,739,2022-05-30,2022-06-06,0.1053881751784698,0.22 +110305171,739,2022-05-23,2022-05-30,0.1057811241779228,0.22 +110305171,739,2022-05-18,2022-05-23,0.0568186226690339,0.19 +110305171,739,2022-05-17,2022-05-18,0.0,0.0 +110305171,739,2022-05-16,2022-05-17,0.0,0.0 +110305171,739,2022-05-15,2022-05-16,0.0,0.0 +110305171,739,2022-05-14,2022-05-15,0.0,0.0 +110305171,739,2022-05-13,2022-05-14,0.0,0.0 +110305171,739,2022-07-04,2022-07-11,0.0969709511846304,0.21 +110305171,739,2022-10-10,2022-10-17,0.1007343413075432,0.22 +110289079,739,2022-07-04,2022-07-11,0.0267053869320079,0.11 +110289079,739,2022-06-20,2022-06-27,0.0269677202449842,0.11 +110268016,739,2022-08-29,2022-09-05,0.1023401721613481,0.22 +110268016,739,2022-08-22,2022-08-29,0.1006203828146681,0.22 +110268016,739,2022-08-15,2022-08-22,0.1016283947974443,0.22 +110268016,739,2022-08-08,2022-08-15,0.1019599633291363,0.22 +110268016,739,2022-08-01,2022-08-08,0.1024977444903925,0.22 +110268016,739,2022-07-25,2022-08-01,0.1026758037041872,0.22 +110268016,739,2022-07-18,2022-07-25,0.1030800748849287,0.22 +110268016,739,2022-07-11,2022-07-18,0.1019864070694893,0.22 +110268016,739,2022-07-04,2022-07-11,0.1004043701104819,0.22 +110268016,739,2022-06-27,2022-07-04,0.1004612166434525,0.22 +110268016,739,2022-06-20,2022-06-27,0.1004279666812155,0.22 +110268016,739,2022-06-13,2022-06-20,0.0975434018910594,0.21 +110268016,739,2022-06-06,2022-06-13,0.0994420507494677,0.22 +110268016,739,2022-05-30,2022-06-06,0.1068594309257631,0.22 +110268016,739,2022-05-23,2022-05-30,0.1067442618093146,0.22 +110268016,739,2022-05-18,2022-05-23,0.0658804499120977,0.29 +110268016,739,2022-05-17,2022-05-18,0.0082192376026632,0.46 +110268016,739,2022-05-16,2022-05-17,0.0172935517326671,0.67 +110268016,739,2022-05-15,2022-05-16,0.0,0.0 +110268016,739,2022-05-14,2022-05-15,0.0085068029291512,0.47 +110268016,739,2022-05-13,2022-05-14,0.0083656493306651,0.47 +110268016,739,2022-05-12,2022-05-13,0.0084480505587238,0.47 +110268016,739,2022-05-11,2022-05-12,0.00884103739455,0.48 +110268016,739,2022-05-10,2022-05-11,0.0082636258800095,0.47 +110268016,739,2022-05-09,2022-05-10,0.0175615080845181,0.68 +110268016,739,2022-09-05,2022-09-12,0.1020171951968222,0.22 +110268016,739,2022-09-12,2022-09-19,0.1003746242495253,0.22 +110268016,739,2022-09-19,2022-09-26,0.0999828798230737,0.22 +110268016,739,2022-09-26,2022-10-03,0.1010238999733701,0.22 +110289079,739,2022-06-13,2022-06-20,0.0260306974809138,0.11 +110289079,739,2022-06-06,2022-06-13,0.0269273211722564,0.11 +110289079,739,2022-05-30,2022-06-06,0.0293913681098477,0.12 +110289079,739,2022-05-23,2022-05-30,0.0317709832403452,0.12 +110289079,739,2022-05-18,2022-05-23,0.0145361629229862,0.1 +110289079,739,2022-05-17,2022-05-18,0.0,0.0 +110289079,739,2022-05-16,2022-05-17,0.0,0.0 +110289079,739,2022-05-15,2022-05-16,0.0,0.0 +110289079,739,2022-05-13,2022-05-14,0.0,0.0 +110289079,739,2022-05-11,2022-05-12,0.0,0.0 +110289079,739,2022-05-10,2022-05-11,0.0,0.0 +110289079,739,2022-06-27,2022-07-04,0.0266972036333754,0.11 +110289079,739,2022-05-09,2022-05-10,0.0,0.0 +110289079,739,2022-05-06,2022-05-07,0.0,0.0 +110289079,739,2022-05-05,2022-05-06,0.0,0.0 +110289079,739,2022-05-04,2022-05-05,0.0,0.0 +110268016,739,2022-11-28,2022-11-29,0.0149898917879909,0.22 +110268016,739,2022-11-21,2022-11-28,0.1002636256162077,0.22 +110268016,739,2022-11-14,2022-11-21,0.098916413844563,0.22 +110268016,739,2022-11-07,2022-11-14,0.0988273902330547,0.22 +110268016,739,2022-10-31,2022-11-07,0.0994292796822264,0.22 +110268016,739,2022-10-24,2022-10-31,0.0989964039763435,0.22 +110268016,739,2022-10-17,2022-10-24,0.0996624095132574,0.22 +110268016,739,2022-10-10,2022-10-17,0.0993380630388856,0.22 +110268016,739,2022-10-03,2022-10-10,0.0995210289256647,0.22 +110289079,739,2022-05-08,2022-05-09,0.0,0.0 +111009930,739,2022-08-15,2022-08-22,0.088954035192728,0.2 +110305171,739,2022-10-17,2022-10-24,0.1014461138984188,0.22 +110305171,739,2022-10-31,2022-11-07,0.101736381649971,0.22 +111009817,739,2022-10-31,2022-11-07,0.0328219773946329,0.12 +111009817,739,2022-10-24,2022-10-31,0.0331158117623999,0.12 +111009817,739,2022-10-17,2022-10-24,0.0330403671832755,0.12 +111009817,739,2022-10-10,2022-10-17,0.0328262663679197,0.12 +111009817,739,2022-10-03,2022-10-10,0.0328047103248536,0.12 +111009817,739,2022-09-26,2022-10-03,0.033082649577409,0.12 +111009817,739,2022-09-19,2022-09-26,0.032735553337261,0.12 +111009817,739,2022-09-12,2022-09-19,0.0326417639153078,0.12 +111009817,739,2022-09-05,2022-09-12,0.033191449008882,0.12 +111009817,739,2022-08-29,2022-09-05,0.0331380368443205,0.12 +111009817,739,2022-08-22,2022-08-29,0.0329416305758059,0.12 +111009817,739,2022-08-15,2022-08-22,0.0330235995352268,0.12 +111009817,739,2022-08-08,2022-08-15,0.0333018787205219,0.13 +111009817,739,2022-08-01,2022-08-08,0.0333230718970298,0.13 +111009817,739,2022-07-25,2022-08-01,0.0334672615863382,0.13 +111009817,739,2022-07-18,2022-07-25,0.0334166878601536,0.13 +111009817,739,2022-07-11,2022-07-18,0.033440058119595,0.13 +111009817,739,2022-07-04,2022-07-11,0.0331749938195571,0.12 +111009817,739,2022-06-27,2022-07-04,0.0331248850561678,0.12 +111009817,739,2022-06-20,2022-06-27,0.0333320942008656,0.13 +111009817,739,2022-06-13,2022-06-20,0.0323566234203776,0.12 +111009817,739,2022-06-06,2022-06-13,0.0334416133235138,0.13 +111009817,739,2022-05-30,2022-06-06,0.0356829921776073,0.13 +111009817,739,2022-05-23,2022-05-30,0.0386533740721933,0.13 +111009817,739,2022-05-18,2022-05-23,0.0198061081336104,0.11 +111009817,739,2022-11-07,2022-11-14,0.0331462821923196,0.12 +111009817,739,2022-11-14,2022-11-21,0.0327622856711968,0.12 +111009817,739,2022-11-21,2022-11-28,0.0333191397367045,0.13 +111009817,739,2022-11-28,2022-11-29,0.0048721926286816,0.13 +111009930,739,2022-08-01,2022-08-08,0.0890235833358019,0.2 +111009930,739,2022-07-25,2022-08-01,0.0886015522992238,0.2 +111009930,739,2022-07-18,2022-07-25,0.0887547570746392,0.2 +111009930,739,2022-07-11,2022-07-18,0.0879795515211299,0.2 +111009930,739,2022-07-04,2022-07-11,0.0867363364668563,0.2 +111009930,739,2022-06-27,2022-07-04,0.0868691456271334,0.2 +111009930,739,2022-06-20,2022-06-27,0.0871158655499273,0.2 +111009930,739,2022-06-13,2022-06-20,0.0853256703445407,0.2 +111009930,739,2022-06-06,2022-06-13,0.0870048083177959,0.2 +111009930,739,2022-05-30,2022-06-06,0.0953151967500349,0.21 +111009930,739,2022-05-23,2022-05-30,0.0964612411849376,0.21 +111009930,739,2022-05-18,2022-05-23,0.0516766113411266,0.18 +111009817,739,2022-05-17,2022-05-18,0.0,0.0 +111009930,739,2022-05-17,2022-05-18,0.0,0.0 +111009930,739,2022-05-15,2022-05-16,0.0,0.0 +111009930,739,2022-05-14,2022-05-15,0.0,0.0 +111009930,739,2022-05-13,2022-05-14,0.0,0.0 +111009930,739,2022-05-12,2022-05-13,0.0,0.0 +111009930,739,2022-05-11,2022-05-12,0.0,0.0 +111009930,739,2022-05-10,2022-05-11,0.0,0.0 +111009930,739,2022-05-09,2022-05-10,0.0,0.0 +111009930,739,2022-05-08,2022-05-09,0.0,0.0 +111009930,739,2022-05-07,2022-05-08,0.0,0.0 +111009930,739,2022-05-06,2022-05-07,0.0,0.0 +111009930,739,2022-05-05,2022-05-06,0.0,0.0 +111009930,739,2022-05-04,2022-05-05,0.0,0.0 +111009930,739,2022-05-16,2022-05-17,0.0,0.0 +110305171,739,2022-10-24,2022-10-31,0.1010722721694037,0.22 +111009817,739,2022-05-16,2022-05-17,0.0,0.0 +111009817,739,2022-05-14,2022-05-15,0.0,0.0 +110742021,739,2022-07-11,2022-07-18,0.0164417366031557,0.09 +110742021,739,2022-07-04,2022-07-11,0.0162829699693247,0.09 +110742021,739,2022-06-27,2022-07-04,0.0163257956737652,0.09 +110742021,739,2022-06-20,2022-06-27,0.0163176661589964,0.09 +110742021,739,2022-06-13,2022-06-20,0.0155948517912523,0.09 +110742021,739,2022-06-06,2022-06-13,0.0163224361036496,0.09 +110742021,739,2022-05-30,2022-06-06,0.0167115394564345,0.09 +110742021,739,2022-05-23,2022-05-30,0.0157292127983583,0.09 +110742021,739,2022-05-18,2022-05-23,0.0094948059561763,0.08 +110742021,739,2022-05-17,2022-05-18,0.0,0.0 +110742021,739,2022-05-16,2022-05-17,0.0,0.0 +110742021,739,2022-05-15,2022-05-16,0.0,0.0 +110742021,739,2022-05-13,2022-05-14,0.0,0.0 +110742021,739,2022-05-12,2022-05-13,32.0,20.5 +110742021,739,2022-05-11,2022-05-12,0.0,0.0 +110742021,739,2022-05-10,2022-05-11,0.0,0.0 +110742021,739,2022-05-09,2022-05-10,0.0,0.0 +110742021,739,2022-05-08,2022-05-09,0.0,0.0 +110742021,739,2022-05-06,2022-05-07,0.0,0.0 +110742021,739,2022-05-05,2022-05-06,0.0,0.0 +110742021,739,2022-05-04,2022-05-05,0.0,0.0 +110305171,739,2022-11-28,2022-11-29,0.0152440480887889,0.22 +110305171,739,2022-11-21,2022-11-28,0.1025894422782585,0.22 +110305171,739,2022-11-14,2022-11-21,0.1013380159856751,0.22 +110305171,739,2022-11-07,2022-11-14,0.10126182960812,0.22 +110742021,739,2022-07-18,2022-07-25,0.0164928971789777,0.09 +110742021,739,2022-07-25,2022-08-01,0.0167213381500914,0.09 +110742021,739,2022-08-01,2022-08-08,0.0166619517840445,0.09 +110742021,739,2022-08-08,2022-08-15,0.0165735214250162,0.09 +111009817,739,2022-05-13,2022-05-14,0.0,0.0 +111009817,739,2022-05-12,2022-05-13,0.0,0.0 +111009817,739,2022-05-11,2022-05-12,0.0,0.0 +111009817,739,2022-05-10,2022-05-11,0.0,0.0 +111009817,739,2022-05-09,2022-05-10,0.0,0.0 +111009817,739,2022-05-08,2022-05-09,0.0,0.0 +111009817,739,2022-05-07,2022-05-08,0.0,0.0 +111009817,739,2022-05-06,2022-05-07,0.0,0.0 +111009817,739,2022-05-05,2022-05-06,0.0,0.0 +111009817,739,2022-05-04,2022-05-05,0.0,0.0 +110742021,739,2022-11-28,2022-11-29,0.0025184931000694,0.09 +110742021,739,2022-11-21,2022-11-28,0.0172417795984074,0.09 +111009817,739,2022-05-15,2022-05-16,0.0,0.0 +110742021,739,2022-11-14,2022-11-21,0.0169051418779417,0.09 +110742021,739,2022-10-31,2022-11-07,0.0170285666827112,0.09 +110742021,739,2022-10-24,2022-10-31,0.0174311851151287,0.09 +110742021,739,2022-10-17,2022-10-24,0.0168579686433076,0.09 +110742021,739,2022-10-10,2022-10-17,0.0166946559911593,0.09 +110742021,739,2022-10-03,2022-10-10,0.0167057358194142,0.09 +110742021,739,2022-09-26,2022-10-03,0.0168217966565862,0.09 +110742021,739,2022-09-19,2022-09-26,0.0164684206247329,0.09 +110742021,739,2022-09-12,2022-09-19,0.0163143021054565,0.09 +110742021,739,2022-09-05,2022-09-12,0.0165335000492632,0.09 +110742021,739,2022-08-29,2022-09-05,0.0166957508772611,0.09 +110742021,739,2022-08-22,2022-08-29,0.0165183792123571,0.09 +110742021,739,2022-08-15,2022-08-22,0.016429838258773,0.09 +110742021,739,2022-11-07,2022-11-14,0.0170054098125547,0.09 +109150723,739,2022-05-09,2022-05-10,0.0,0.0 +100396479,739,2022-05-14,2022-05-15,0.0,0.0 +109150723,739,2022-05-07,2022-05-08,0.0,0.0 +100396575,739,2022-05-12,2022-05-13,0.0,0.0 +100396575,739,2022-05-11,2022-05-12,0.0,0.0 +100396575,739,2022-05-10,2022-05-11,0.0,0.0 +100396575,739,2022-05-09,2022-05-10,0.0,0.0 +100396575,739,2022-05-08,2022-05-09,0.0,0.0 +100396575,739,2022-05-07,2022-05-08,0.0,0.0 +100396575,739,2022-05-06,2022-05-07,0.0,0.0 +100396575,739,2022-05-05,2022-05-06,0.0,0.0 +100396575,739,2022-05-04,2022-05-05,0.0,0.0 +100396559,739,2022-11-28,2022-11-29,0.0150377167155966,0.22 +100396559,739,2022-11-21,2022-11-28,0.1014169577974826,0.22 +100396559,739,2022-11-14,2022-11-21,0.1000049628783017,0.22 +100396559,739,2022-11-07,2022-11-14,0.0999166385736316,0.22 +100396575,739,2022-05-13,2022-05-14,0.0,0.0 +100396559,739,2022-10-31,2022-11-07,0.1004046867601573,0.22 +100396559,739,2022-10-17,2022-10-24,0.0997895280597731,0.22 +100396559,739,2022-10-10,2022-10-17,0.0989600504981353,0.22 +100396559,739,2022-10-03,2022-10-10,0.0987165153492242,0.22 +100396559,739,2022-09-26,2022-10-03,0.099844554788433,0.22 +100396559,739,2022-09-19,2022-09-26,0.0984458571765571,0.22 +100396559,739,2022-09-12,2022-09-19,0.0985079759266227,0.22 +100396559,739,2022-09-05,2022-09-12,0.0997954725753516,0.22 +100396559,739,2022-08-29,2022-09-05,0.0998424831777811,0.22 +100396559,739,2022-08-22,2022-08-29,0.0979871553136035,0.21 +100396559,739,2022-08-15,2022-08-22,0.0988232556264847,0.22 +100396559,739,2022-08-08,2022-08-15,0.0990393668180331,0.22 +100396559,739,2022-08-01,2022-08-08,0.0994760850444436,0.22 +100396559,739,2022-07-25,2022-08-01,0.0996214685728773,0.22 +100396559,739,2022-10-24,2022-10-31,0.099687076290138,0.22 +100396575,739,2022-05-14,2022-05-15,0.0,0.0 +100396575,739,2022-05-15,2022-05-16,0.0,0.0 +100396575,739,2022-05-16,2022-05-17,0.0,0.0 +100396575,739,2022-11-14,2022-11-21,0.032351333880797,0.12 +100396575,739,2022-11-07,2022-11-14,0.0328615379985421,0.12 +100396575,739,2022-10-31,2022-11-07,0.0326048902934417,0.12 +100396575,739,2022-10-24,2022-10-31,0.0329217406688258,0.12 +100396575,739,2022-10-17,2022-10-24,0.0328522697091102,0.12 +100396575,739,2022-10-10,2022-10-17,0.0325868718791753,0.12 +100396575,739,2022-10-03,2022-10-10,0.0325280204415321,0.12 +100396575,739,2022-09-26,2022-10-03,0.0327591834357008,0.12 +100396575,739,2022-09-19,2022-09-26,0.0323797323508188,0.12 +100396575,739,2022-09-12,2022-09-19,0.0322359092533588,0.12 +100396575,739,2022-09-05,2022-09-12,0.0327117553679272,0.12 +100396575,739,2022-08-29,2022-09-05,0.0325768459588289,0.12 +100396575,739,2022-08-22,2022-08-29,0.0323462262749671,0.12 +100396575,739,2022-08-15,2022-08-22,0.0323730774689465,0.12 +100396575,739,2022-08-08,2022-08-15,0.0325976298190653,0.12 +100396575,739,2022-08-01,2022-08-08,0.0325632176827639,0.12 +100396575,739,2022-07-25,2022-08-01,0.0326679735444486,0.12 +100396575,739,2022-07-18,2022-07-25,0.0325921132462099,0.12 +100396575,739,2022-07-11,2022-07-18,0.032586935092695,0.12 +100396575,739,2022-07-04,2022-07-11,0.0323317738948389,0.12 +100396575,739,2022-06-27,2022-07-04,0.0322705414146184,0.12 +100396575,739,2022-06-20,2022-06-27,0.0324772532330539,0.12 +100396575,739,2022-06-13,2022-06-20,0.0313029824441192,0.12 +100396575,739,2022-06-06,2022-06-13,0.0325527976340057,0.12 +100396575,739,2022-05-30,2022-06-06,0.0349769092964922,0.13 +100396575,739,2022-05-23,2022-05-30,0.037384561472062,0.13 +100396575,739,2022-05-19,2022-05-23,0.0166853918195714,0.12 +100396575,739,2022-05-18,2022-05-19,0.0,0.0 +100396575,739,2022-05-17,2022-05-18,0.0,0.0 +100396559,739,2022-07-18,2022-07-25,0.0998147014761343,0.22 +100396575,739,2022-11-21,2022-11-28,0.0327920868294313,0.12 +100396559,739,2022-07-11,2022-07-18,0.0988115166546776,0.22 +100396559,739,2022-06-27,2022-07-04,0.097444278653711,0.21 +100396541,739,2022-08-29,2022-09-05,0.1002258728258311,0.22 +100396541,739,2022-08-22,2022-08-29,0.098661393742077,0.22 +100396541,739,2022-08-15,2022-08-22,0.0998448466416448,0.22 +100396541,739,2022-08-08,2022-08-15,0.1003837578464299,0.22 +100396541,739,2022-08-01,2022-08-08,0.1011442118324339,0.22 +100396541,739,2022-07-25,2022-08-01,0.1016077452804893,0.22 +100396541,739,2022-07-18,2022-07-25,0.1021418129093945,0.22 +100396541,739,2022-07-11,2022-07-18,0.1013958879048004,0.22 +100396541,739,2022-07-04,2022-07-11,0.100152090890333,0.22 +100396541,739,2022-06-27,2022-07-04,0.1003985403804107,0.22 +100396541,739,2022-06-20,2022-06-27,0.1005186910041631,0.22 +100396541,739,2022-06-13,2022-06-20,0.0973866820744911,0.21 +100396541,739,2022-06-06,2022-06-13,0.0996305781873773,0.22 +100396541,739,2022-09-05,2022-09-12,0.0999118917388841,0.22 +100396541,739,2022-05-30,2022-06-06,0.1070928642775352,0.22 +100396541,739,2022-05-18,2022-05-23,0.0571438158718763,0.19 +100396541,739,2022-05-17,2022-05-18,48.0,30.77 +100396541,739,2022-05-16,2022-05-17,0.0,0.0 +100396541,739,2022-05-15,2022-05-16,0.0,0.0 +100396541,739,2022-05-14,2022-05-15,0.0,0.0 +100396541,739,2022-05-13,2022-05-14,24.0,15.38 +100396541,739,2022-05-12,2022-05-13,24.0,15.38 +100396541,739,2022-05-11,2022-05-12,12.0,7.69 +100396541,739,2022-05-10,2022-05-11,0.0,0.0 +100396541,739,2022-05-09,2022-05-10,0.0,0.0 +100396541,739,2022-05-08,2022-05-09,0.0,0.0 +100396541,739,2022-05-07,2022-05-08,0.0,0.0 +100396541,739,2022-05-06,2022-05-07,0.0,0.0 +100396541,739,2022-05-23,2022-05-30,0.1069858173913349,0.22 +100396541,739,2022-09-12,2022-09-19,0.098363310098648,0.22 +100396541,739,2022-09-19,2022-09-26,0.0980614746222272,0.21 +100396541,739,2022-09-26,2022-10-03,0.0992307560518384,0.22 +100396559,739,2022-06-20,2022-06-27,0.0973923045675329,0.21 +100396559,739,2022-06-13,2022-06-20,0.0947113239884674,0.21 +100396559,739,2022-06-06,2022-06-13,0.0964940801281015,0.21 +100396559,739,2022-05-30,2022-06-06,0.1037873407916849,0.22 +100396559,739,2022-05-23,2022-05-30,0.1040002482227804,0.22 +100396559,739,2022-05-18,2022-05-23,0.0558872070664618,0.19 +100396559,739,2022-05-17,2022-05-18,71.84659887811866,46.1 +100396559,739,2022-05-16,2022-05-17,0.0131120960064223,0.72 +100396559,739,2022-05-15,2022-05-16,0.0134101677374292,0.73 +100396559,739,2022-05-14,2022-05-15,0.0125461541134543,0.7 +100396559,739,2022-05-13,2022-05-14,12.012239432647922,7.73 +100396559,739,2022-05-12,2022-05-13,12.012674006292904,7.73 +100396559,739,2022-05-11,2022-05-12,12.012914657028077,7.73 +100396559,739,2022-05-10,2022-05-11,0.0128467004843102,0.71 +100396559,739,2022-05-09,2022-05-10,0.0134870900272017,0.73 +100396559,739,2022-05-08,2022-05-09,0.0133336617778057,0.72 +100396559,739,2022-05-07,2022-05-08,0.0124587675359575,0.7 +100396559,739,2022-05-06,2022-05-07,0.0121497366995781,0.69 +100396559,739,2022-05-05,2022-05-06,0.0122286515302949,0.69 +100396559,739,2022-05-04,2022-05-05,12.0,7.69 +100396541,739,2022-11-28,2022-11-29,0.0146726202219724,0.22 +100396541,739,2022-11-21,2022-11-28,0.099516631802544,0.22 +100396541,739,2022-11-14,2022-11-21,0.0982649803627282,0.22 +100396541,739,2022-11-07,2022-11-14,0.0983151699183508,0.22 +100396541,739,2022-10-31,2022-11-07,0.0989150642417371,0.22 +100396541,739,2022-10-24,2022-10-31,0.0983413899084553,0.22 +100396541,739,2022-10-17,2022-10-24,0.0986764867557212,0.22 +100396541,739,2022-10-10,2022-10-17,0.098005119129084,0.21 +100396541,739,2022-10-03,2022-10-10,0.0979207707569003,0.21 +100396559,739,2022-07-04,2022-07-11,0.0973775658058002,0.21 +100396541,739,2022-05-05,2022-05-06,0.0,0.0 +100396575,739,2022-11-28,2022-11-29,0.0049168086843565,0.13 +100396591,739,2022-05-05,2022-05-06,0.0,0.0 +107532234,739,2022-05-12,2022-05-13,0.0,0.0 +107532234,739,2022-05-11,2022-05-12,0.0,0.0 +107532234,739,2022-05-10,2022-05-11,0.0,0.0 +107532234,739,2022-05-09,2022-05-10,0.0,0.0 +107532234,739,2022-05-08,2022-05-09,0.0,0.0 +107532234,739,2022-05-07,2022-05-08,0.0,0.0 +107532234,739,2022-05-06,2022-05-07,0.0,0.0 +107532234,739,2022-05-05,2022-05-06,0.0,0.0 +107532234,739,2022-05-04,2022-05-05,0.0,0.0 +100396671,739,2022-11-28,2022-11-29,0.0123546592658385,0.2 +100396671,739,2022-11-21,2022-11-28,0.0839361227117478,0.2 +100396671,739,2022-11-14,2022-11-21,0.0831684038275852,0.2 +100396671,739,2022-11-07,2022-11-14,0.0832294233841821,0.2 +107532234,739,2022-05-13,2022-05-14,0.0,0.0 +100396671,739,2022-10-31,2022-11-07,0.0836028802441433,0.2 +100396671,739,2022-10-17,2022-10-24,0.0836353567428886,0.2 +100396671,739,2022-10-10,2022-10-17,0.0829348119441419,0.2 +100396671,739,2022-10-03,2022-10-10,0.0827640197239816,0.2 +100396671,739,2022-09-26,2022-10-03,0.0834443448111414,0.2 +100396671,739,2022-09-19,2022-09-26,0.0824222897645086,0.2 +100396671,739,2022-09-12,2022-09-19,0.0825729088392108,0.2 +100396671,739,2022-09-05,2022-09-12,0.0834911958081647,0.2 +100396671,739,2022-08-29,2022-09-05,0.0831434761639684,0.2 +100396671,739,2022-08-22,2022-08-29,0.0816286872141063,0.2 +100396671,739,2022-08-15,2022-08-22,0.0824020917061716,0.2 +100396671,739,2022-08-08,2022-08-15,0.0823961170390248,0.2 +100396671,739,2022-08-01,2022-08-08,0.0826093488140031,0.2 +100396671,739,2022-07-25,2022-08-01,0.0822713692905381,0.2 +100396671,739,2022-10-24,2022-10-31,0.0832907013827934,0.2 +107532234,739,2022-05-14,2022-05-15,0.0,0.0 +107532234,739,2022-05-15,2022-05-16,0.0,0.0 +107532234,739,2022-05-16,2022-05-17,0.0,0.0 +107532234,739,2022-11-21,2022-11-28,0.1021623632404953,0.22 +107532234,739,2022-11-14,2022-11-21,0.1009086669655516,0.22 +107532234,739,2022-11-07,2022-11-14,0.100819751038216,0.22 +107532234,739,2022-10-31,2022-11-07,0.1013216734863817,0.22 +107532234,739,2022-10-24,2022-10-31,0.1007011493202298,0.22 +107532234,739,2022-10-17,2022-10-24,0.1010550787905231,0.22 +107532234,739,2022-10-10,2022-10-17,0.1003367023076862,0.22 +107532234,739,2022-10-03,2022-10-10,0.1001440429827198,0.22 +107532234,739,2022-09-26,2022-10-03,0.1013161522569134,0.22 +107532234,739,2022-09-19,2022-09-26,0.0999390808865428,0.22 +107532234,739,2022-09-12,2022-09-19,0.1000603840220719,0.22 +107532234,739,2022-09-05,2022-09-12,0.1014644419774413,0.22 +107532234,739,2022-08-29,2022-09-05,0.1016227431828156,0.22 +107532234,739,2022-08-22,2022-08-29,0.0998797315405681,0.22 +107532234,739,2022-08-15,2022-08-22,0.1009495435282588,0.22 +107532234,739,2022-08-08,2022-08-15,0.1014271668391302,0.22 +107532234,739,2022-08-01,2022-08-08,0.1021661170525476,0.22 +107532234,739,2022-07-25,2022-08-01,0.1026366620790213,0.22 +107532234,739,2022-07-18,2022-07-25,0.103282744763419,0.22 +107532234,739,2022-07-11,2022-07-18,0.1025614012032747,0.22 +107532234,739,2022-07-04,2022-07-11,0.1013415589695796,0.22 +107532234,739,2022-06-27,2022-07-04,0.1017041931627317,0.22 +107532234,739,2022-06-20,2022-06-27,0.1019496077205029,0.22 +107532234,739,2022-06-13,2022-06-20,0.0995213111282323,0.22 +107532234,739,2022-06-06,2022-06-13,0.1012865767950571,0.22 +107532234,739,2022-05-30,2022-06-06,0.1088787386801454,0.23 +107532234,739,2022-05-23,2022-05-30,0.1083609645170395,0.23 +107532234,739,2022-05-18,2022-05-23,0.0575384380794971,0.19 +107532234,739,2022-05-17,2022-05-18,0.0,0.0 +100396671,739,2022-07-18,2022-07-25,0.0824157755123451,0.2 +100396591,739,2022-05-04,2022-05-05,0.0,0.0 +100396671,739,2022-07-11,2022-07-18,0.0816441107308492,0.2 +100396671,739,2022-06-27,2022-07-04,0.0805028555914759,0.19 +100396591,739,2022-08-29,2022-09-05,0.100669740117155,0.22 +100396591,739,2022-08-22,2022-08-29,0.0991422650404274,0.22 +100396591,739,2022-08-15,2022-08-22,0.1002865260234102,0.22 +100396591,739,2022-08-08,2022-08-15,0.1007848003646358,0.22 +100396591,739,2022-08-01,2022-08-08,0.1014435065444558,0.22 +100396591,739,2022-07-25,2022-08-01,0.1017520787427201,0.22 +100396591,739,2022-07-18,2022-07-25,0.1020252234302461,0.22 +100396591,739,2022-07-11,2022-07-18,0.100984911667183,0.22 +100396591,739,2022-07-04,2022-07-11,0.0994452591985464,0.22 +100396591,739,2022-06-27,2022-07-04,0.0993501316988839,0.22 +100396591,739,2022-06-20,2022-06-27,0.0991181142161626,0.22 +100396591,739,2022-06-13,2022-06-20,0.0957458028751853,0.21 +100396591,739,2022-06-06,2022-06-13,0.0976231679779964,0.21 +100396591,739,2022-09-05,2022-09-12,0.1002643187530338,0.22 +100396591,739,2022-05-30,2022-06-06,0.1050022440446454,0.22 +100396591,739,2022-05-18,2022-05-23,0.0800794679328618,0.48 +100396591,739,2022-05-17,2022-05-18,48.0,30.77 +100396591,739,2022-05-16,2022-05-17,0.0123264107550875,0.7 +100396591,739,2022-05-15,2022-05-16,0.0122585477166601,0.69 +100396591,739,2022-05-14,2022-05-15,0.0120636379655825,0.69 +100396591,739,2022-05-13,2022-05-14,12.024350165402328,7.77 +100396591,739,2022-05-12,2022-05-13,48.0,30.77 +100396591,739,2022-05-11,2022-05-12,12.085575227890072,7.96 +100396591,739,2022-05-10,2022-05-11,0.0,0.0 +100396591,739,2022-05-09,2022-05-10,0.0,0.0 +100396591,739,2022-05-08,2022-05-09,0.0,0.0 +100396591,739,2022-05-07,2022-05-08,0.0,0.0 +100396591,739,2022-05-06,2022-05-07,0.0,0.0 +100396591,739,2022-05-23,2022-05-30,0.1048437199654075,0.22 +100396591,739,2022-09-12,2022-09-19,0.0986334085464477,0.22 +100396591,739,2022-09-19,2022-09-26,0.098288022680208,0.22 +100396591,739,2022-09-26,2022-10-03,0.0994372575078159,0.22 +100396671,739,2022-06-20,2022-06-27,0.0805552756645932,0.19 +100396671,739,2022-06-13,2022-06-20,0.0782682215014341,0.19 +100396671,739,2022-06-06,2022-06-13,0.0799262602569281,0.19 +100396671,739,2022-05-30,2022-06-06,0.0873177574185245,0.2 +100396671,739,2022-05-23,2022-05-30,0.0882416874410345,0.2 +100396671,739,2022-05-18,2022-05-23,0.059118423716144,0.36 +100396671,739,2022-05-17,2022-05-18,0.0120006500848619,0.69 +100396671,739,2022-05-16,2022-05-17,0.0252971575383305,1.0 +100396671,739,2022-05-15,2022-05-16,0.0,0.0 +100396671,739,2022-05-14,2022-05-15,0.0123763642697962,0.7 +100396671,739,2022-05-13,2022-05-14,0.0123198565014051,0.7 +100396671,739,2022-05-12,2022-05-13,0.0121018424499359,0.69 +100396671,739,2022-05-11,2022-05-12,0.0127580902258195,0.71 +100396671,739,2022-05-10,2022-05-11,0.0120612062341529,0.69 +100396671,739,2022-05-09,2022-05-10,0.0256877413500999,1.01 +100396671,739,2022-05-08,2022-05-09,0.0,0.0 +100396671,739,2022-05-07,2022-05-08,0.0124552402825695,0.7 +100396671,739,2022-05-06,2022-05-07,0.0,0.0 +100396671,739,2022-05-05,2022-05-06,0.0,0.0 +100396671,739,2022-05-04,2022-05-05,60.0,38.44 +100396591,739,2022-11-28,2022-11-29,0.0152235297719016,0.22 +100396591,739,2022-11-21,2022-11-28,0.102077312534675,0.22 +100396591,739,2022-11-14,2022-11-21,0.100248422822915,0.22 +100396591,739,2022-11-07,2022-11-14,0.0998387259896844,0.22 +100396591,739,2022-10-31,2022-11-07,0.100035123177804,0.22 +100396591,739,2022-10-24,2022-10-31,0.0990915974834933,0.22 +100396591,739,2022-10-17,2022-10-24,0.0991405858658254,0.22 +100396591,739,2022-10-10,2022-10-17,0.0983069805661216,0.22 +100396591,739,2022-10-03,2022-10-10,0.0981517834588885,0.21 +100396671,739,2022-07-04,2022-07-11,0.0805030304472893,0.19 +107532234,739,2022-11-28,2022-11-29,0.0152279078029096,0.22 +100396541,739,2022-05-04,2022-05-05,12.0,7.69 +100396532,739,2022-11-21,2022-11-28,0.0257535090204328,0.11 +100396487,739,2022-05-11,2022-05-12,0.0,0.0 +100396487,739,2022-05-10,2022-05-11,0.012440671186805,0.7 +100396487,739,2022-05-09,2022-05-10,0.0129663043405265,0.71 +100396487,739,2022-05-08,2022-05-09,0.0123890701062219,0.7 +100396487,739,2022-05-07,2022-05-08,0.0121564196376411,0.69 +100396487,739,2022-05-06,2022-05-07,0.0,0.0 +100396487,739,2022-05-05,2022-05-06,0.0,0.0 +100396487,739,2022-05-04,2022-05-05,0.0,0.0 +100396479,739,2022-11-28,2022-11-29,0.0160979810170829,0.23 +100396479,739,2022-11-21,2022-11-28,0.1063689773436635,0.22 +100396479,739,2022-11-14,2022-11-21,0.1048017611028626,0.22 +100396479,739,2022-11-07,2022-11-14,0.1044900777051225,0.22 +100396479,739,2022-10-31,2022-11-07,0.1048475529532879,0.22 +100396487,739,2022-05-12,2022-05-13,0.0249514421460688,0.99 +100396479,739,2022-10-24,2022-10-31,0.1040980984689667,0.22 +100396479,739,2022-10-10,2022-10-17,0.103831437183544,0.22 +100396479,739,2022-10-03,2022-10-10,0.1037008635466918,0.22 +100396479,739,2022-09-26,2022-10-03,0.1049512976314872,0.22 +100396479,739,2022-09-19,2022-09-26,0.1035510025685653,0.22 +100396479,739,2022-09-12,2022-09-19,0.1036691220942884,0.22 +100396479,739,2022-09-05,2022-09-12,0.1050996154081076,0.22 +100396479,739,2022-08-29,2022-09-05,0.1052025601966306,0.22 +100396479,739,2022-08-22,2022-08-29,0.103293061023578,0.22 +100396479,739,2022-08-15,2022-08-22,0.1042049245443195,0.22 +100396479,739,2022-08-08,2022-08-15,0.1044885483570396,0.22 +100396479,739,2022-08-01,2022-08-08,0.1050404207780957,0.22 +100396479,739,2022-07-25,2022-08-01,0.1052910145372152,0.22 +100396479,739,2022-07-18,2022-07-25,0.1058271499350667,0.22 +100396479,739,2022-10-17,2022-10-24,0.1045127415563911,0.22 +100396487,739,2022-05-13,2022-05-14,0.0,0.0 +100396487,739,2022-05-14,2022-05-15,0.0120727801718985,0.69 +100396487,739,2022-05-15,2022-05-16,0.0122334371756362,0.69 +100396487,739,2022-11-14,2022-11-21,0.1042204619152471,0.22 +100396487,739,2022-11-07,2022-11-14,0.1045287036104127,0.22 +100396487,739,2022-10-31,2022-11-07,0.1055661692516878,0.22 +100396487,739,2022-10-24,2022-10-31,0.1054518695455044,0.22 +100396487,739,2022-10-17,2022-10-24,0.1064227047609165,0.22 +100396487,739,2022-10-10,2022-10-17,0.1062754801241681,0.22 +100396487,739,2022-10-03,2022-10-10,0.1065493781352415,0.22 +100396487,739,2022-09-26,2022-10-03,0.108153547742404,0.23 +100396487,739,2022-09-19,2022-09-26,0.106891693547368,0.22 +100396487,739,2022-09-12,2022-09-19,0.1070729322964325,0.22 +100396487,739,2022-09-05,2022-09-12,0.1084498827112838,0.23 +100396487,739,2022-08-29,2022-09-05,0.1083589137997478,0.23 +100396487,739,2022-08-22,2022-08-29,0.1060248876456171,0.22 +100396487,739,2022-08-15,2022-08-22,0.1066071182722225,0.22 +100396487,739,2022-08-08,2022-08-15,0.1064241623971611,0.22 +100396487,739,2022-08-01,2022-08-08,0.1064824861241504,0.22 +100396487,739,2022-07-25,2022-08-01,0.1062249904498457,0.22 +100396487,739,2022-07-18,2022-07-25,0.106247499003075,0.22 +100396487,739,2022-07-11,2022-07-18,0.1048091462580487,0.22 +100396487,739,2022-07-04,2022-07-11,0.1029402557760477,0.22 +100396487,739,2022-06-27,2022-07-04,0.1029121337924151,0.22 +100396487,739,2022-06-20,2022-06-27,0.102843655273228,0.22 +100396487,739,2022-06-13,2022-06-20,0.1003225965137803,0.22 +100396487,739,2022-06-06,2022-06-13,0.1021792289581489,0.22 +100396487,739,2022-05-30,2022-06-06,0.1101713633033115,0.23 +100396487,739,2022-05-23,2022-05-30,0.1106371881822012,0.23 +100396487,739,2022-05-18,2022-05-23,0.0594816686544962,0.2 +100396487,739,2022-05-17,2022-05-18,0.0120146830871061,0.69 +100396487,739,2022-05-16,2022-05-17,0.0124947863701887,0.7 +100396479,739,2022-07-11,2022-07-18,0.1048225617269054,0.22 +100396487,739,2022-11-21,2022-11-28,0.1051516975276172,0.22 +100396479,739,2022-07-04,2022-07-11,0.103350123623386,0.22 +100396479,739,2022-06-20,2022-06-27,0.103835356654508,0.22 +100396461,739,2022-08-15,2022-08-22,0.1032234829617664,0.22 +100396461,739,2022-08-08,2022-08-15,0.1035141822649166,0.22 +100396461,739,2022-08-01,2022-08-08,0.1040873340098187,0.22 +100396461,739,2022-07-25,2022-08-01,0.1043383840005844,0.22 +100396461,739,2022-07-18,2022-07-25,0.104860172374174,0.22 +100396461,739,2022-07-11,2022-07-18,0.1038694831077009,0.22 +100396461,739,2022-07-04,2022-07-11,0.1023878687992692,0.22 +100396461,739,2022-06-27,2022-07-04,0.1026471928926184,0.22 +100396461,739,2022-06-20,2022-06-27,0.1028324728830769,0.22 +100396461,739,2022-06-13,2022-06-20,0.0990003746437232,0.22 +100396461,739,2022-06-06,2022-06-13,0.1019996702322237,0.22 +100396461,739,2022-05-30,2022-06-06,0.1105854812984791,0.23 +100396461,739,2022-05-23,2022-05-30,0.1108166203299999,0.23 +100396461,739,2022-08-22,2022-08-29,0.1022980733541771,0.22 +100396461,739,2022-05-18,2022-05-23,0.0585720855887766,0.2 +100396461,739,2022-05-16,2022-05-17,0.0,0.0 +100396461,739,2022-05-15,2022-05-16,0.0,0.0 +100396461,739,2022-05-14,2022-05-15,0.0,0.0 +100396461,739,2022-05-13,2022-05-14,0.0,0.0 +100396461,739,2022-05-12,2022-05-13,0.0,0.0 +100396461,739,2022-05-11,2022-05-12,0.0,0.0 +100396461,739,2022-05-10,2022-05-11,0.0,0.0 +100396461,739,2022-05-09,2022-05-10,0.0,0.0 +100396461,739,2022-05-08,2022-05-09,0.0,0.0 +100396461,739,2022-05-07,2022-05-08,0.0,0.0 +100396461,739,2022-05-06,2022-05-07,0.0,0.0 +100396461,739,2022-05-05,2022-05-06,0.0,0.0 +100396461,739,2022-05-04,2022-05-05,12.0,7.69 +100396461,739,2022-05-17,2022-05-18,0.0,0.0 +100396461,739,2022-08-29,2022-09-05,0.1041600308381021,0.22 +100396461,739,2022-09-05,2022-09-12,0.1040244790492579,0.22 +100396461,739,2022-09-12,2022-09-19,0.1025763668585568,0.22 +100396479,739,2022-06-13,2022-06-20,0.1005348976639572,0.22 +100396479,739,2022-06-06,2022-06-13,0.1032082717205956,0.22 +100396479,739,2022-05-30,2022-06-06,0.1116176289994296,0.23 +100396479,739,2022-05-23,2022-05-30,0.1117993329775647,0.23 +100396479,739,2022-05-18,2022-05-23,0.0591807348528745,0.2 +109150723,739,2022-05-08,2022-05-09,0.0,0.0 +100396479,739,2022-05-16,2022-05-17,0.0,0.0 +100396479,739,2022-05-15,2022-05-16,0.0,0.0 +100396479,739,2022-05-13,2022-05-14,0.0,0.0 +100396479,739,2022-05-12,2022-05-13,0.0,0.0 +100396479,739,2022-05-11,2022-05-12,0.0,0.0 +100396479,739,2022-05-10,2022-05-11,0.0,0.0 +100396479,739,2022-05-09,2022-05-10,0.0,0.0 +100396479,739,2022-05-08,2022-05-09,0.0,0.0 +100396479,739,2022-05-07,2022-05-08,0.0,0.0 diff --git a/tests/unit/pg_helper.py b/tests/unit/pg_helper.py new file mode 100644 index 0000000..472af67 --- /dev/null +++ b/tests/unit/pg_helper.py @@ -0,0 +1,91 @@ +import psycopg +import pandas as pd +from src.pg_bulk_loader.batch.pg_connection_detail import PgConnectionDetail + + +def init_db(postgresql): + args = postgresql.dsn() + conn = psycopg.connect(host=args['host'], port=args['port'], dbname='postgres', user=args['user'], password='') + cursor = conn.cursor() + create_table_query = """CREATE TABLE public.aop_dummy ( + p_code text NOT NULL, + s_code text NOT NULL, + _from date NOT NULL, + upto date NOT NULL, + mean numeric NOT NULL, + ss numeric NOT NULL DEFAULT 0.0, + CONSTRAINT aggregated_order_projections_dummy_pk PRIMARY KEY (p_code, s_code, _from) + );""" + cursor.execute(create_table_query) + + cursor.close() + conn.commit() + conn.close() + + +def create_indexes(pg_connection: PgConnectionDetail): + pg_conn = pg_connection.get_psycopg_connection() + try: + create_index1 = "CREATE INDEX aop_dummy_batch_scope_index ON public.aop_dummy USING btree (upto);" + create_index2 = "CREATE INDEX p_s_aopd_index ON public.aop_dummy USING btree (p_code, s_code);" + + curser = pg_conn.cursor() + curser.execute(create_index1) + curser.execute(create_index2) + curser.close() + pg_conn.commit() + finally: + pg_conn.close() + + +def drop_indexes(pg_connection: PgConnectionDetail): + pg_conn = pg_connection.get_psycopg_connection() + try: + query = "DROP INDEX aop_dummy_batch_scope_index, p_s_aopd_index" + + curser = pg_conn.cursor() + curser.execute(query) + curser.close() + pg_conn.commit() + finally: + pg_conn.close() + + +def fetch_result(postgresql, query): + args = postgresql.dsn() + conn = psycopg.connect(host=args['host'], port=args['port'], dbname='postgres', user=args['user'], password='') + cursor = conn.cursor() + cursor.execute(query) + result = cursor.fetchall() + cursor.close() + return pd.DataFrame(result, columns=["p_code", "s_code", "_from", "upto", "mean", "ss"]) + + +def fetch_rows_count_and_assert(pg_conn_details: PgConnectionDetail, table_name: str, expected): + pg_conn = pg_conn_details.get_psycopg_connection() + try: + curser = pg_conn.cursor() + result = curser.execute(f"select count(1) from {table_name}").fetchone() + curser.close() + pg_conn.commit() + return result[0] + finally: + pg_conn.close() + + +def truncate_table_and_assert(pg_conn_details: PgConnectionDetail, table_name: str): + pg_conn = pg_conn_details.get_psycopg_connection() + try: + curser = pg_conn.cursor() + curser.execute(f"truncate table {table_name};") + curser.close() + pg_conn.commit() + + # Validate from DB + fetch_rows_count_and_assert(pg_conn_details, table_name, expected=0) + finally: + pg_conn.close() + + +def assert_data_count(result_count, expected_count): + assert result_count == expected_count, f"Expected: {expected_count}, Actual: {result_count}" diff --git a/tests/unit/test_batch_insert.py b/tests/unit/test_batch_insert.py new file mode 100644 index 0000000..94e3c2b --- /dev/null +++ b/tests/unit/test_batch_insert.py @@ -0,0 +1,233 @@ +import unittest +import pytest +import psycopg +import testing.postgresql +import pandas as pd +from src.pg_bulk_loader.batch.batch_insert import BatchInsert +from src.pg_bulk_loader.batch.pg_connection_detail import PgConnectionDetail + + +def init_db(postgresql): + args = postgresql.dsn() + conn = psycopg.connect(host=args['host'], port=args['port'], dbname='postgres', user=args['user'], password='') + cursor = conn.cursor() + create_table_query = """CREATE TABLE public.test_batch ( + test_id int4 NOT NULL, + test_name varchar NOT NULL, + CONSTRAINT test_batch_pk PRIMARY KEY (test_id) + ); + """ + cursor.execute(create_table_query) + cursor.close() + conn.commit() + conn.close() + + +def fetch_result(postgresql, query): + args = postgresql.dsn() + conn = psycopg.connect(host=args['host'], port=args['port'], dbname='postgres', user=args['user'], password='') + cursor = conn.cursor() + cursor.execute(query) + result = cursor.fetchall() + cursor.close() + return pd.DataFrame(result, columns=["test_id", "test_name"]) + + +def assert_data_count(data, expected_count): + assert len(data) == expected_count, f"Expected: {expected_count}, Actual: {(len(data))}" + + +class TestBatchInsert(unittest.IsolatedAsyncioTestCase): + + postgres_ = None + + @classmethod + def setUpClass(cls): + Postgresql = testing.postgresql.PostgresqlFactory(cache_initialized_db=True, on_initialized=init_db) + cls.postgres_ = Postgresql() + params = cls.postgres_.dsn() + params['password'] = "" + params['schema'] = "public" + params['database'] = "postgres" + cls.pg_connection = PgConnectionDetail(**params) + + @classmethod + def tearDownClass(cls) -> None: + cls.postgres_.stop() + + async def test_batch_insert_when_data_is_empty_df(self): + """ + The process doesn't fail. It just doesn't insert anything + """ + input_df = pd.DataFrame({ + 'test_id': [], + 'test_name': [], + }) + + batch_ = BatchInsert( + batch_size=1, table_name="test_batch", pg_conn_details=self.pg_connection, min_conn=1, max_conn=1 + ) + await batch_.open_connection_pool() + await batch_.execute(input_df) + await batch_.close_connection_pool() + + # After process over the data should be wiped off from the batch_ object + assert batch_.data_df is None + + async def test_batch_insert_when_invalid_pg_connection_details_object_passed(self): + with pytest.raises(Exception) as e: + batch_ = BatchInsert( + batch_size=1, table_name="test_batch", pg_conn_details=None, min_conn=1, max_conn=1 + ) + assert batch_ is None + assert e is not None + + async def test_batch_insert_when_min_conn_passed_as_0(self): + with pytest.raises(Exception) as e: + batch_ = BatchInsert( + batch_size=2, table_name="test_batch", pg_conn_details=self.pg_connection, min_conn=0, max_conn=1 + ) + assert batch_ is None + + assert str(e.value) == "min and max connection pool size cannot be null or zero!" + + async def test_batch_insert_when_min_conn_passed_as_null(self): + with pytest.raises(Exception) as e: + batch_ = BatchInsert( + batch_size=2, table_name="test_batch", pg_conn_details=self.pg_connection, min_conn=None, max_conn=1 + ) + assert batch_ is None + + assert str(e.value) == "min and max connection pool size cannot be null or zero!" + + async def test_batch_insert_when_max_conn_passed_as_0(self): + with pytest.raises(Exception) as e: + batch_ = BatchInsert( + batch_size=2, table_name="test_batch", pg_conn_details=self.pg_connection, min_conn=1, max_conn=0 + ) + assert batch_ is None + + assert str(e.value) == "min and max connection pool size cannot be null or zero!" + + async def test_batch_insert_when_max_conn_passed_as_null(self): + with pytest.raises(Exception) as e: + batch_ = BatchInsert( + batch_size=2, table_name="test_batch", pg_conn_details=self.pg_connection, min_conn=1, max_conn=None + ) + assert batch_ is None + assert str(e.value) == "min and max connection pool size cannot be null or zero!" + + async def test_batch_insert_when_both_min_and_max_conn_passed_as_null(self): + with pytest.raises(Exception) as e: + batch_ = BatchInsert( + batch_size=2, table_name="test_batch", pg_conn_details=self.pg_connection, min_conn=None, max_conn=None + ) + assert batch_ is None + assert str(e.value) == "min and max connection pool size cannot be null or zero!" + + async def test_batch_insert_when_both_min_and_max_conn_passed_as_0(self): + with pytest.raises(Exception) as e: + batch_ = BatchInsert( + batch_size=2, table_name="test_batch", pg_conn_details=self.pg_connection, min_conn=0, max_conn=0 + ) + assert batch_ is None + assert str(e.value) == "min and max connection pool size cannot be null or zero!" + + async def test_batch_insert_when_both_min_conn_is_greater_than_max_conn(self): + with pytest.raises(Exception) as e: + batch_ = BatchInsert( + batch_size=2, table_name="test_batch", pg_conn_details=self.pg_connection, min_conn=3, max_conn=1 + ) + assert batch_ is None + assert str(e.value) == "max_size must be greater or equal than min_size" + + async def test_batch_insert(self): + input_df = pd.DataFrame({ + 'test_id': [1, 2, 3], + 'test_name': ["aditya", "adam", "lalu"], + }) + + batch_ = BatchInsert( + batch_size=2, table_name="test_batch", pg_conn_details=self.pg_connection, min_conn=1, max_conn=1 + ) + await batch_.open_connection_pool() + await batch_.execute(input_df) + await batch_.close_connection_pool() + + # After process over the data should be wiped off from the batch_ object + assert batch_.data_df is None + + # Validate from DB + data = fetch_result(self.postgres_, "select * from test_batch where test_id in (1, 2, 3)") + assert_data_count(data, 3) + + async def test_batch_insert_when_columns_to_be_inserted_are_passed(self): + input_df = pd.DataFrame({ + 'test_id': [7, 8, 9], + 'test_name': ["aditya", "adam", "lalu"], + 'test_class': ['h1', 'h2', 'h3'] + }) + + batch_ = BatchInsert( + batch_size=2, table_name="test_batch", pg_conn_details=self.pg_connection, min_conn=1, max_conn=1 + ) + await batch_.open_connection_pool() + await batch_.execute(input_df, col_names=["test_id", "test_name"]) + await batch_.close_connection_pool() + + # Validate from DB + data = fetch_result(self.postgres_, "select * from test_batch where test_id in (7, 8, 9)") + assert_data_count(data, 3) + + async def test_batch_insert_where_pk_constraint_violates(self): + input_df = pd.DataFrame({ + 'test_id': [4, 5, 6], + 'test_name': ["aditya", "adam", "lalu"], + }) + + batch_ = BatchInsert( + batch_size=2, table_name="test_batch", pg_conn_details=self.pg_connection, min_conn=1, max_conn=1 + ) + await batch_.open_connection_pool() + + await batch_.execute(input_df) + + # Inserting the same data again + input_df = pd.DataFrame({ + 'test_id': [4, 5, 6], + 'test_name': ["aditya", "adam", "lalu"], + }) + + with pytest.raises(Exception) as e: + await batch_.execute(input_df) + + await batch_.close_connection_pool() + assert 'duplicate key value violates unique constraint "test_batch_pk"' in str(e) + + # Validate from DB + data = fetch_result(self.postgres_, "select * from test_batch where test_id in (4, 5, 6)") + assert_data_count(data, 3) + + async def test_batch_insert_when_pk_constraint_fails_in_last_batch(self): + """ + First three batches will get inserted and only last batch will get rolled back + """ + input_df = pd.DataFrame({ + 'test_id': [10, 11, 12, 10], + 'test_name': ["Hari", "adam", "lalu", "Hari"], + }) + + batch_ = BatchInsert( + batch_size=1, table_name="test_batch", pg_conn_details=self.pg_connection, min_conn=1, max_conn=1 + ) + await batch_.open_connection_pool() + + with pytest.raises(Exception) as e: + await batch_.execute(input_df) + + await batch_.close_connection_pool() + assert 'duplicate key value violates unique constraint "test_batch_pk"' in str(e) + + # Validate from DB + data = fetch_result(self.postgres_, "select * from test_batch where test_id in (10, 11, 12)") + assert_data_count(data, 3) diff --git a/tests/unit/test_batch_insert_multi_process_wrapper.py b/tests/unit/test_batch_insert_multi_process_wrapper.py new file mode 100644 index 0000000..6a3b6d3 --- /dev/null +++ b/tests/unit/test_batch_insert_multi_process_wrapper.py @@ -0,0 +1,231 @@ +import unittest +import pytest +import testing.postgresql +import pandas as pd +from src.pg_bulk_loader.batch.batch_insert_wrapper import batch_insert_to_postgres_with_multi_process +from src.pg_bulk_loader.batch.pg_connection_detail import PgConnectionDetail +from .pg_helper import init_db, fetch_rows_count_and_assert, truncate_table_and_assert, create_indexes, drop_indexes + + +class TestBatchInsertMultiProcessWrapper(unittest.IsolatedAsyncioTestCase): + + postgres_ = None + + @classmethod + def setUpClass(cls): + Postgresql = testing.postgresql.PostgresqlFactory(cache_initialized_db=True, on_initialized=init_db) + cls.postgres_ = Postgresql() + params = cls.postgres_.dsn() + params['password'] = "" + params['schema'] = "public" + params['database'] = "postgres" + cls.pg_connection = PgConnectionDetail(**params) + + @classmethod + def tearDownClass(cls) -> None: + cls.postgres_.stop() + + async def test_batch_insert_ms_when_generated_data_is_empty(self): + df_data_generator = [] + with pytest.raises(Exception) as e: + await batch_insert_to_postgres_with_multi_process( + pg_conn_details=self.pg_connection, + table_name="aop_dummy", + data_generator=df_data_generator, + batch_size=100, + min_conn_pool_size=3, + max_conn_pool_size=5, + no_of_processes=2, + drop_and_create_index=False + ) + + assert str(e.value) == "Invalid data input!" + + # Validate from DB + fetch_rows_count_and_assert(self.pg_connection, "aop_dummy", expected=0) + + async def test_batch_insert_ms_when_generated_data_is_null(self): + df_data_generator = None + with pytest.raises(Exception) as e: + await batch_insert_to_postgres_with_multi_process( + pg_conn_details=self.pg_connection, + table_name="aop_dummy", + data_generator=df_data_generator, + batch_size=100, + min_conn_pool_size=3, + max_conn_pool_size=5, + no_of_processes=2, + drop_and_create_index=False + ) + + assert str(e.value) == "Invalid data input!" + + # Validate from DB + fetch_rows_count_and_assert(self.pg_connection, "aop_dummy", expected=0) + + async def test_batch_insert_ms_when_no_of_processes_are_null(self): + """ + no_of_processes can be null. The ProcessPoolExecutor can determine the ideal value based on the number + of cores available on the machine where test case is being executed. + """ + df_data_generator = pd.read_csv("tests/unit/aopd-1k.csv", chunksize=300) + await batch_insert_to_postgres_with_multi_process( + pg_conn_details=self.pg_connection, + table_name="aop_dummy", + data_generator=df_data_generator, + batch_size=100, + min_conn_pool_size=3, + max_conn_pool_size=5, + no_of_processes=None, + drop_and_create_index=True + ) + + # Validate from DB + fetch_rows_count_and_assert(self.pg_connection, "aop_dummy", expected=1000) + + # Truncate table and assert + truncate_table_and_assert(self.pg_connection, "aop_dummy") + + async def test_batch_insert_ms_when_no_of_processes_is_zero(self): + """ + no_of_processes can't be 0. + """ + df_data_generator = pd.read_csv("tests/unit/aopd-1k.csv", chunksize=300) + with pytest.raises(Exception) as e: + await batch_insert_to_postgres_with_multi_process( + pg_conn_details=self.pg_connection, + table_name="aop_dummy", + data_generator=df_data_generator, + batch_size=100, + min_conn_pool_size=3, + max_conn_pool_size=5, + no_of_processes=0, + drop_and_create_index=True + ) + + assert str(e.value) == "max_workers must be greater than 0" + + # Validate from DB + fetch_rows_count_and_assert(self.pg_connection, "aop_dummy", expected=0) + + async def test_batch_insert_ms_when_table_does_not_have_indexes_and_drop_and_create_index_is_true(self): + """ + The total records are 1000 + The chunksize is 300. That means the generator will generate 4 dfs. First three will have data size of 300 and + last one will have 100 records. + + Now the batch size is given as 100. That means in each generated df, the data will be inserted in the batch of + 100. + + Here every generated df will be processed in a separate process. So it depends on how many processes + (no of cores) available to process in parallel. In the test cases, we are giving 2. + """ + df_data_generator = pd.read_csv("tests/unit/aopd-1k.csv", chunksize=300) + await batch_insert_to_postgres_with_multi_process( + pg_conn_details=self.pg_connection, + table_name="aop_dummy", + data_generator=df_data_generator, + batch_size=100, + min_conn_pool_size=3, + max_conn_pool_size=5, + no_of_processes=2, + drop_and_create_index=True + ) + + # Validate from DB + fetch_rows_count_and_assert(self.pg_connection, "aop_dummy", expected=1000) + + # Truncate table and assert + truncate_table_and_assert(self.pg_connection, "aop_dummy") + + async def test_batch_insert_ms_when_table_does_not_have_indexes_and_drop_and_create_index_is_false(self): + """ + The total records are 1000 + The chunksize is 300. That means the generator will generate 4 dfs. First three will have data size of 300 and + last one will have 100 records. + + Now the batch size is given as 100. That means in each generated df, the data will be inserted in the batch of + 100. + + Here every generated df will be processed in a separate process. So it depends on how many processes + (no of cores) available to process in parallel. In the test cases, we are giving 2. + """ + df_data_generator = pd.read_csv("tests/unit/aopd-1k.csv", chunksize=300) + await batch_insert_to_postgres_with_multi_process( + pg_conn_details=self.pg_connection, + table_name="aop_dummy", + data_generator=df_data_generator, + batch_size=100, + min_conn_pool_size=3, + max_conn_pool_size=5, + no_of_processes=2, + drop_and_create_index=False + ) + + # Validate from DB + fetch_rows_count_and_assert(self.pg_connection, "aop_dummy", expected=1000) + + # Truncate table and assert + truncate_table_and_assert(self.pg_connection, "aop_dummy") + + async def test_batch_insert_ms_when_table_have_indexes_and_drop_and_create_index_is_false(self): + """ + The total records are 1000 + The chunksize is 300. That means the generator will generate 4 dfs. First three will have data size of 300 and + last one will have 100 records. + + Now the batch size is given as 100. That means in each generated df, the data will be inserted in the batch of + 100. + + Here every generated df will be processed in a separate process. So it depends on how many processes + (no of cores) available to process in parallel. In the test cases, we are giving 2. + """ + df_data_generator = pd.read_csv("tests/unit/aopd-1k.csv", chunksize=300) + create_indexes(self.pg_connection) + await batch_insert_to_postgres_with_multi_process( + pg_conn_details=self.pg_connection, + table_name="aop_dummy", + data_generator=df_data_generator, + batch_size=100, + min_conn_pool_size=3, + max_conn_pool_size=5, + no_of_processes=2, + drop_and_create_index=False + ) + drop_indexes(self.pg_connection) + # Validate from DB + fetch_rows_count_and_assert(self.pg_connection, "aop_dummy", expected=1000) + + # Truncate table and assert + truncate_table_and_assert(self.pg_connection, "aop_dummy") + + async def test_batch_insert_ms_when_table_have_indexes_and_drop_and_create_index_is_true(self): + """ + The total records are 1000 + The chunksize is 300. That means the generator will generate 4 dfs. First three will have data size of 300 and + last one will have 100 records. + + Now the batch size is given as 100. That means in each generated df, the data will be inserted in the batch of + 100. + + Here every generated df will be processed in a separate process. So it depends on how many processes + (no of cores) available to process in parallel. In the test cases, we are giving 2. + """ + df_data_generator = pd.read_csv("tests/unit/aopd-1k.csv", chunksize=300) + create_indexes(self.pg_connection) + await batch_insert_to_postgres_with_multi_process( + pg_conn_details=self.pg_connection, + table_name="aop_dummy", + data_generator=df_data_generator, + batch_size=100, + min_conn_pool_size=3, + max_conn_pool_size=5, + no_of_processes=2, + drop_and_create_index=True + ) + drop_indexes(self.pg_connection) + # Validate from DB + fetch_rows_count_and_assert(self.pg_connection, "aop_dummy", expected=1000) + + # Truncate table and assert + truncate_table_and_assert(self.pg_connection, "aop_dummy") diff --git a/tests/unit/test_batch_insert_wrapper.py b/tests/unit/test_batch_insert_wrapper.py new file mode 100644 index 0000000..92746db --- /dev/null +++ b/tests/unit/test_batch_insert_wrapper.py @@ -0,0 +1,215 @@ +import unittest +from unittest.mock import patch + +import pytest +import testing.postgresql +import pandas as pd + +from src.pg_bulk_loader.batch.batch_insert_wrapper import batch_insert_to_postgres +from src.pg_bulk_loader.batch.pg_connection_detail import PgConnectionDetail +from .pg_helper import init_db, fetch_rows_count_and_assert, truncate_table_and_assert, create_indexes, drop_indexes + + +class TestBatchInsertWrapper(unittest.IsolatedAsyncioTestCase): + + postgres_ = None + + async def test_batch_insert_when_data_is_null(self): + input_df = None + + await batch_insert_to_postgres( + pg_conn_details=self.pg_connection, + table_name="aop_dummy", + data_df=input_df, + batch_size=200, + min_conn_pool_size=5, + max_conn_pool_size=7, + use_multi_process_for_create_index=False, + drop_and_create_index=False + ) + # Validate from DB + fetch_rows_count_and_assert(self.pg_connection, "aop_dummy", expected=0) + + async def test_batch_insert_when_data_is_empty_dataframe(self): + input_df = pd.DataFrame() + + await batch_insert_to_postgres( + pg_conn_details=self.pg_connection, + table_name="aop_dummy", + data_df=input_df, + batch_size=200, + min_conn_pool_size=5, + max_conn_pool_size=7, + use_multi_process_for_create_index=False, + drop_and_create_index=False + ) + # Validate from DB + fetch_rows_count_and_assert(self.pg_connection, "aop_dummy", expected=0) + + @classmethod + def setUpClass(cls): + Postgresql = testing.postgresql.PostgresqlFactory(cache_initialized_db=True, on_initialized=init_db) + cls.postgres_ = Postgresql() + params = cls.postgres_.dsn() + params['password'] = "" + params['schema'] = "public" + params['database'] = "postgres" + cls.pg_connection = PgConnectionDetail(**params) + + @classmethod + def tearDownClass(cls) -> None: + cls.postgres_.stop() + + @patch("src.pg_bulk_loader.batch.batch_insert_wrapper.run") + async def test_batch_insert_when_exception_is_thrown(self, mock_run): + mock_run.side_effect = Exception("Custom Exception!") + + input_df = pd.read_csv("tests/unit/aopd-1k.csv") + + with pytest.raises(Exception) as e: + await batch_insert_to_postgres( + pg_conn_details=self.pg_connection, + table_name="aop_dummy", + data_df=input_df, + batch_size=200, + min_conn_pool_size=5, + max_conn_pool_size=7, + use_multi_process_for_create_index=True, + drop_and_create_index=True + ) + + assert str(e.value) == "Custom Exception!" + + # Validate from DB + fetch_rows_count_and_assert(self.pg_connection, "aop_dummy", expected=0) + + async def test_batch_insert_when_table_does_not_have_indexes_and_drop_and_create_index_is_true(self): + input_df = pd.read_csv("tests/unit/aopd-1k.csv") + + await batch_insert_to_postgres( + pg_conn_details=self.pg_connection, + table_name="aop_dummy", + data_df=input_df, + batch_size=200, + min_conn_pool_size=5, + max_conn_pool_size=7, + use_multi_process_for_create_index=True, + drop_and_create_index=True + ) + + # Validate from DB + fetch_rows_count_and_assert(self.pg_connection, "aop_dummy", expected=1000) + + # Truncate table and assert + truncate_table_and_assert(self.pg_connection, "aop_dummy") + + async def test_batch_insert_when_table_does_not_have_indexes_and_drop_and_create_index_is_false(self): + input_df = pd.read_csv("tests/unit/aopd-1k.csv") + + await batch_insert_to_postgres( + pg_conn_details=self.pg_connection, + table_name="aop_dummy", + data_df=input_df, + batch_size=200, + min_conn_pool_size=5, + max_conn_pool_size=7, + use_multi_process_for_create_index=True, + drop_and_create_index=False + ) + + # Validate from DB + fetch_rows_count_and_assert(self.pg_connection, "aop_dummy", expected=1000) + + # Truncate table and assert + truncate_table_and_assert(self.pg_connection, "aop_dummy") + + async def test_batch_insert_when_table_have_indexes_and_drop_and_create_index_is_true(self): + input_df = pd.read_csv("tests/unit/aopd-1k.csv") + create_indexes(self.pg_connection) + + await batch_insert_to_postgres( + pg_conn_details=self.pg_connection, + table_name="aop_dummy", + data_df=input_df, + batch_size=200, + min_conn_pool_size=5, + max_conn_pool_size=7, + use_multi_process_for_create_index=True, + drop_and_create_index=True + ) + drop_indexes(self.pg_connection) + # Validate from DB + fetch_rows_count_and_assert(self.pg_connection, "aop_dummy", expected=1000) + + # Truncate table and assert + truncate_table_and_assert(self.pg_connection, "aop_dummy") + + async def test_batch_insert_when_table_have_indexes_and_drop_and_create_index_is_false(self): + input_df = pd.read_csv("tests/unit/aopd-1k.csv") + create_indexes(self.pg_connection) + + await batch_insert_to_postgres( + pg_conn_details=self.pg_connection, + table_name="aop_dummy", + data_df=input_df, + batch_size=200, + min_conn_pool_size=5, + max_conn_pool_size=7, + use_multi_process_for_create_index=True, + drop_and_create_index=False + ) + drop_indexes(self.pg_connection) + # Validate from DB + fetch_rows_count_and_assert(self.pg_connection, "aop_dummy", expected=1000) + + # Truncate table and assert + truncate_table_and_assert(self.pg_connection, "aop_dummy") + + async def test_batch_insert_when_table_have_indexes_and_drop_and_create_index_happens_sequentially(self): + input_df = pd.read_csv("tests/unit/aopd-1k.csv") + create_indexes(self.pg_connection) + + await batch_insert_to_postgres( + pg_conn_details=self.pg_connection, + table_name="aop_dummy", + data_df=input_df, + batch_size=200, + min_conn_pool_size=5, + max_conn_pool_size=7, + use_multi_process_for_create_index=False, + drop_and_create_index=True + ) + drop_indexes(self.pg_connection) + # Validate from DB + fetch_rows_count_and_assert(self.pg_connection, "aop_dummy", expected=1000) + + # Truncate table and assert + truncate_table_and_assert(self.pg_connection, "aop_dummy") + + async def test_batch_insert_when_conn_pool_has_less_connection_than_total_batches(self): + """ + Total data size is: 1000 + batch size is: 200 + So total number of batches will be 5. + We are keeping max 3 connections in the pool. + + :expect - The code to pass + """ + input_df = pd.read_csv("tests/unit/aopd-1k.csv") + + await batch_insert_to_postgres( + pg_conn_details=self.pg_connection, + table_name="aop_dummy", + data_df=input_df, + batch_size=200, + min_conn_pool_size=2, + max_conn_pool_size=3, + use_multi_process_for_create_index=True, + drop_and_create_index=True + ) + + # Validate from DB + fetch_rows_count_and_assert(self.pg_connection, "aop_dummy", expected=1000) + + # Truncate table and assert + truncate_table_and_assert(self.pg_connection, "aop_dummy") diff --git a/tests/unit/test_dataframe_utils.py b/tests/unit/test_dataframe_utils.py new file mode 100644 index 0000000..1f7f12b --- /dev/null +++ b/tests/unit/test_dataframe_utils.py @@ -0,0 +1,128 @@ +import unittest +import pytest +import pandas as pd +from src.pg_bulk_loader.utils.dataframe_utils import partition_df, get_ranges + + +class TestDataFrameUtils(unittest.TestCase): + + def test_partition_df_with_invalid_partition_size(self): + with pytest.raises(Exception) as e: + input_df = pd.DataFrame({ + 'test': [1, 2, 3] + }) + partition_df(df=input_df, partition_size=0) + assert str(e.value) == "Invalid partition size." + + def test_partition_df_with_invalid_input_df(self): + with pytest.raises(Exception) as e: + input_df = "invalid" + partition_df(df=input_df, partition_size=0) + assert str(e.value) == "Invalid parameter! Data type should be pandas DataFrame" + + def test_partition_df_when_input_df_is_none(self): + input_df = None + result = partition_df(df=input_df, partition_size=0) + assert result is None + + def test_partition_df_when_input_df_is_empty(self): + input_df = pd.DataFrame() + result = partition_df(df=input_df, partition_size=0) + assert result is None + + def test_partition_df_when_partition_size_is_more_than_df_length(self): + input_df = pd.DataFrame({ + 'test': [1, 2, 3] + }) + result = partition_df(df=input_df, partition_size=5) + assert len(result) == 1 + assert result[0].shape == (3, 1) + + def test_partition_df_when_partition_size_is_equal_to_df_length(self): + input_df = pd.DataFrame({ + 'test': [1, 2, 3] + }) + result = partition_df(df=input_df, partition_size=3) + assert len(result) == 1 + assert result[0].shape == (3, 1) + + def test_partition_df_when_partition_size_is_less_than_df_length(self): + input_df = pd.DataFrame({ + 'test': [1, 2, 3] + }) + result = partition_df(df=input_df, partition_size=2) + assert len(result) == 2 + assert result[0].shape == (2, 1) + assert result[1].shape == (1, 1) + + def test_get_ranges_when_data_size_is_zero(self): + data_size = 0 + batch_size = 100 + ranges = get_ranges(data_size, batch_size) + assert ranges == [] + + def test_get_ranges_when_data_size_is_null(self): + data_size = None + batch_size = 100 + ranges = get_ranges(data_size, batch_size) + assert ranges == [] + + def test_get_ranges_when_data_size_is_negative(self): + data_size = -1 + batch_size = 100 + ranges = get_ranges(data_size, batch_size) + assert ranges == [] + + def test_get_ranges_when_data_size_is_string_value(self): + data_size = "hello" + batch_size = 100 + ranges = get_ranges(data_size, batch_size) + assert ranges == [] + + def test_get_ranges_when_batch_size_is_zero(self): + data_size = 100 + batch_size = 0 + ranges = get_ranges(data_size, batch_size) + assert ranges == [] + + def test_get_ranges_when_batch_size_is_null(self): + data_size = 100 + batch_size = None + ranges = get_ranges(data_size, batch_size) + assert ranges == [] + + def test_get_ranges_when_batch_size_is_negative(self): + data_size = 100 + batch_size = -1 + ranges = get_ranges(data_size, batch_size) + assert ranges == [] + + def test_get_ranges_when_batch_size_is_string_value(self): + data_size = 100 + batch_size = "hell0" + ranges = get_ranges(data_size, batch_size) + assert ranges == [] + + def test_get_ranges_when_batch_size_is_greater_than_data_size(self): + data_size = 100 + batch_size = 150 + ranges = get_ranges(data_size, batch_size) + assert ranges == [(0, 100)] + + def test_get_ranges_when_batch_size_is_equal_to_data_size(self): + data_size = 99 + batch_size = 99 + ranges = get_ranges(data_size, batch_size) + assert ranges == [(0, 99)] + + def test_get_ranges_when_batch_size_is_less_than_data_size(self): + data_size = 101 + batch_size = 50 + ranges = get_ranges(data_size, batch_size) + assert ranges == [(0, 50), (50, 100), (100, 101)] + + def test_get_ranges(self): + data_size = 59 + batch_size = 9 + ranges = get_ranges(data_size, batch_size) + assert ranges == [(0, 9), (9, 18), (18, 27), (27, 36), (36, 45), (45, 54), (54, 59)]