Skip to content
This repository has been archived by the owner on Jun 27, 2024. It is now read-only.

Feature/add schema selection #7

Open
wants to merge 2 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 6 additions & 2 deletions pgdatadiff/main.py
Original file line number Diff line number Diff line change
@@ -1,15 +1,17 @@
"""
Usage:
pgdatadiff --firstdb=<firstconnectionstring> --seconddb=<secondconnectionstring> [--only-data|--only-sequences] [--count-only] [--chunk-size=<size>]
pgdatadiff --firstdb=<firstconnectionstring> --seconddb=<secondconnectionstring> [--schema=<schema>] [--only-data|--only-sequences] [--count-only] [--chunk-size=<size>] [--exclude-tables=<table1,table2>]
pgdatadiff --version

Options:
-h --help Show this screen.
--version Show version.
--firstdb=postgres://postgres:password@localhost/firstdb The connection string of the first DB
--seconddb=postgres://postgres:password@localhost/seconddb The connection string of the second DB
--schema="public" The schema of tables in comparison
--only-data Only compare data, exclude sequences
--only-sequences Only compare seqences, exclude data
--exclude-tables="" Exclude tables from data comparison Must be a comma separated string [default: empty string]
--count-only Do a quick test based on counts alone
--chunk-size=10000 The chunk size when comparing data [default: 10000]
"""
Expand All @@ -33,7 +35,9 @@ def main():

differ = DBDiff(first_db_connection_string, second_db_connection_string,
chunk_size=arguments['--chunk-size'],
count_only=arguments['--count-only'])
count_only=arguments['--count-only'],
exclude_tables=arguments['--exclude-tables'],
schema=arguments['--schema'])

if not arguments['--only-sequences']:
if differ.diff_all_table_data():
Expand Down
15 changes: 10 additions & 5 deletions pgdatadiff/pgdatadiff.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
import warnings

from fabulous.color import bold, green, red
from fabulous.color import bold, green, red, yellow
from halo import Halo
from sqlalchemy import exc as sa_exc
from sqlalchemy.engine import create_engine
Expand All @@ -19,7 +19,7 @@ def make_session(connection_string):

class DBDiff(object):

def __init__(self, firstdb, seconddb, chunk_size=10000, count_only=False):
def __init__(self, firstdb, seconddb, schema, chunk_size=10000, count_only=False, exclude_tables=""):
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@andrikoz Shall we follow the exclude_tables pattern here instead?

schema="public"

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yup, I agree

firstsession, firstengine = make_session(firstdb)
secondsession, secondengine = make_session(seconddb)
self.firstsession = firstsession
Expand All @@ -32,6 +32,8 @@ def __init__(self, firstdb, seconddb, chunk_size=10000, count_only=False):
self.secondinspector = inspect(secondengine)
self.chunk_size = int(chunk_size)
self.count_only = count_only
self.exclude_tables = exclude_tables.split(',')
self.schema = schema or 'public'

def diff_table_data(self, tablename):
try:
Expand Down Expand Up @@ -61,7 +63,7 @@ def diff_table_data(self, tablename):
SELECT md5(array_agg(md5((t.*)::varchar))::varchar)
FROM (
SELECT *
FROM {tablename}
FROM {self.schema}.{tablename}
ORDER BY {pk} limit :row_limit offset :row_offset
) AS t;
"""
Expand Down Expand Up @@ -90,7 +92,7 @@ def get_all_sequences(self):
self.firstsession.execute(GET_SEQUENCES_SQL).fetchall()]

def diff_sequence(self, seq_name):
GET_SEQUENCES_VALUE_SQL = f"SELECT last_value FROM {seq_name};"
GET_SEQUENCES_VALUE_SQL = f"SELECT last_value FROM {self.schema}.{seq_name};"

try:
firstvalue = \
Expand Down Expand Up @@ -140,8 +142,11 @@ def diff_all_table_data(self):
with warnings.catch_warnings():
warnings.simplefilter("ignore", category=sa_exc.SAWarning)
tables = sorted(
self.firstinspector.get_table_names(schema="public"))
self.firstinspector.get_table_names(schema=self.schema))
for table in tables:
if table in self.exclude_tables:
print(bold(yellow(f"Ignoring table {table}")))
continue
with Halo(
text=f"Analysing table {table}. "
f"[{tables.index(table) + 1}/{len(tables)}]",
Expand Down