dmarkey · andrikoz · Jan 19, 2021 · Feb 9, 2021 · pavlospt · Feb 9, 2021
diff --git a/pgdatadiff/main.py b/pgdatadiff/main.py
@@ -1,15 +1,17 @@
 """
 Usage:
-  pgdatadiff --firstdb=<firstconnectionstring> --seconddb=<secondconnectionstring> [--only-data|--only-sequences] [--count-only] [--chunk-size=<size>]
+  pgdatadiff --firstdb=<firstconnectionstring> --seconddb=<secondconnectionstring> [--schema=<schema>] [--only-data|--only-sequences] [--count-only] [--chunk-size=<size>] [--exclude-tables=<table1,table2>]
   pgdatadiff --version
 
 Options:
   -h --help          Show this screen.
   --version          Show version.
   --firstdb=postgres://postgres:password@localhost/firstdb        The connection string of the first DB
   --seconddb=postgres://postgres:password@localhost/seconddb         The connection string of the second DB
+  --schema="public"         The schema of tables in comparison
   --only-data        Only compare data, exclude sequences
   --only-sequences   Only compare seqences, exclude data
+  --exclude-tables=""   Exclude tables from data comparison         Must be a comma separated string [default: empty string]
   --count-only       Do a quick test based on counts alone
   --chunk-size=10000       The chunk size when comparing data [default: 10000]
 """
@@ -33,7 +35,9 @@ def main():
 
     differ = DBDiff(first_db_connection_string, second_db_connection_string,
                     chunk_size=arguments['--chunk-size'],
-                    count_only=arguments['--count-only'])
+                    count_only=arguments['--count-only'],
+                    exclude_tables=arguments['--exclude-tables'],
+                    schema=arguments['--schema'])
 
     if not arguments['--only-sequences']:
         if differ.diff_all_table_data():

diff --git a/pgdatadiff/pgdatadiff.py b/pgdatadiff/pgdatadiff.py
@@ -1,6 +1,6 @@
 import warnings
 
-from fabulous.color import bold, green, red
+from fabulous.color import bold, green, red, yellow
 from halo import Halo
 from sqlalchemy import exc as sa_exc
 from sqlalchemy.engine import create_engine
@@ -19,7 +19,7 @@ def make_session(connection_string):
 
 class DBDiff(object):
 
-    def __init__(self, firstdb, seconddb, chunk_size=10000, count_only=False):
+    def __init__(self, firstdb, seconddb, schema, chunk_size=10000, count_only=False, exclude_tables=""):
         firstsession, firstengine = make_session(firstdb)
         secondsession, secondengine = make_session(seconddb)
         self.firstsession = firstsession
@@ -32,6 +32,8 @@ def __init__(self, firstdb, seconddb, chunk_size=10000, count_only=False):
         self.secondinspector = inspect(secondengine)
         self.chunk_size = int(chunk_size)
         self.count_only = count_only
+        self.exclude_tables = exclude_tables.split(',')
+        self.schema = schema or 'public'
 
     def diff_table_data(self, tablename):
         try:
@@ -61,7 +63,7 @@ def diff_table_data(self, tablename):
         SELECT md5(array_agg(md5((t.*)::varchar))::varchar)
         FROM (
                 SELECT *
-                FROM {tablename}
+                FROM {self.schema}.{tablename}
                 ORDER BY {pk} limit :row_limit offset :row_offset
             ) AS t;
                         """
@@ -90,7 +92,7 @@ def get_all_sequences(self):
                 self.firstsession.execute(GET_SEQUENCES_SQL).fetchall()]
 
     def diff_sequence(self, seq_name):
-        GET_SEQUENCES_VALUE_SQL = f"SELECT last_value FROM {seq_name};"
+        GET_SEQUENCES_VALUE_SQL = f"SELECT last_value FROM {self.schema}.{seq_name};"
 
         try:
             firstvalue = \
@@ -140,8 +142,11 @@ def diff_all_table_data(self):
         with warnings.catch_warnings():
             warnings.simplefilter("ignore", category=sa_exc.SAWarning)
             tables = sorted(
-                self.firstinspector.get_table_names(schema="public"))
+                self.firstinspector.get_table_names(schema=self.schema))
             for table in tables:
+                if table in self.exclude_tables:
+                    print(bold(yellow(f"Ignoring table {table}")))
+                    continue
                 with Halo(
                         text=f"Analysing table {table}. "
                              f"[{tables.index(table) + 1}/{len(tables)}]",