From 225de1f1700a011e225fc42b830b6e82cd9792a2 Mon Sep 17 00:00:00 2001 From: AndreasG Date: Tue, 19 Jan 2021 14:02:35 +0200 Subject: [PATCH] Add additional flag to ignore tables in data comparison --- pgdatadiff/main.py | 6 ++++-- pgdatadiff/pgdatadiff.py | 8 ++++++-- 2 files changed, 10 insertions(+), 4 deletions(-) diff --git a/pgdatadiff/main.py b/pgdatadiff/main.py index db41464..eff46a2 100644 --- a/pgdatadiff/main.py +++ b/pgdatadiff/main.py @@ -1,6 +1,6 @@ """ Usage: - pgdatadiff --firstdb= --seconddb= [--only-data|--only-sequences] [--count-only] [--chunk-size=] + pgdatadiff --firstdb= --seconddb= [--only-data|--only-sequences] [--count-only] [--chunk-size=] [--exclude-tables=] pgdatadiff --version Options: @@ -10,6 +10,7 @@ --seconddb=postgres://postgres:password@localhost/seconddb The connection string of the second DB --only-data Only compare data, exclude sequences --only-sequences Only compare seqences, exclude data + --exclude-tables="" Exclude tables from data comparison Must be a comma separated string [default: empty string] --count-only Do a quick test based on counts alone --chunk-size=10000 The chunk size when comparing data [default: 10000] """ @@ -33,7 +34,8 @@ def main(): differ = DBDiff(first_db_connection_string, second_db_connection_string, chunk_size=arguments['--chunk-size'], - count_only=arguments['--count-only']) + count_only=arguments['--count-only'], + exclude_tables=arguments['--exclude-tables']) if not arguments['--only-sequences']: if differ.diff_all_table_data(): diff --git a/pgdatadiff/pgdatadiff.py b/pgdatadiff/pgdatadiff.py index 1bb9be1..cfaff37 100644 --- a/pgdatadiff/pgdatadiff.py +++ b/pgdatadiff/pgdatadiff.py @@ -1,6 +1,6 @@ import warnings -from fabulous.color import bold, green, red +from fabulous.color import bold, green, red, yellow from halo import Halo from sqlalchemy import exc as sa_exc from sqlalchemy.engine import create_engine @@ -19,7 +19,7 @@ def make_session(connection_string): class DBDiff(object): - def __init__(self, firstdb, seconddb, chunk_size=10000, count_only=False): + def __init__(self, firstdb, seconddb, chunk_size=10000, count_only=False, exclude_tables=""): firstsession, firstengine = make_session(firstdb) secondsession, secondengine = make_session(seconddb) self.firstsession = firstsession @@ -32,6 +32,7 @@ def __init__(self, firstdb, seconddb, chunk_size=10000, count_only=False): self.secondinspector = inspect(secondengine) self.chunk_size = int(chunk_size) self.count_only = count_only + self.exclude_tables = exclude_tables.split(',') def diff_table_data(self, tablename): try: @@ -142,6 +143,9 @@ def diff_all_table_data(self): tables = sorted( self.firstinspector.get_table_names(schema="public")) for table in tables: + if table in self.exclude_tables: + print(bold(yellow(f"Ignoring table {table}"))) + continue with Halo( text=f"Analysing table {table}. " f"[{tables.index(table) + 1}/{len(tables)}]",