From df3b61cca5ae0113febfa5531774de5860b1320e Mon Sep 17 00:00:00 2001
From: yoid2000 <paul@francis.com>
Date: Mon, 22 Jul 2024 15:25:25 +0200
Subject: [PATCH] Delete my_debugger.py

---
 tests/my_debugger.py | 84 --------------------------------------------
 1 file changed, 84 deletions(-)
 delete mode 100644 tests/my_debugger.py

diff --git a/tests/my_debugger.py b/tests/my_debugger.py
deleted file mode 100644
index c3d3438..0000000
--- a/tests/my_debugger.py
+++ /dev/null
@@ -1,84 +0,0 @@
-import os
-
-import numpy as np
-import pandas as pd
-from pandas.errors import ParserError
-
-from syndiffix import Synthesizer
-
-
-def count_identical_rows(df1, df2):
-    # Merge the two dataframes on all columns
-    merged_df = pd.merge(df1, df2, how="inner")
-
-    # The number of identical rows is the number of rows in the merged dataframe
-    num_identical_rows = len(merged_df)
-
-    return num_identical_rows
-
-
-def test2():
-    catCols = [
-        "SEX",
-        "MSP",
-        "HISP",
-        "RAC1P",
-        "HOUSING_TYPE",
-        "OWN_RENT",
-        "INDP_CAT",
-        "EDU",
-        "PINCP_DECILE",
-        "DVET",
-        "DREM",
-        "DEYE",
-        "DEAR",
-        "DPHY",
-    ]
-    csv_path = os.path.join("c:\\", "paul", "sdnist", "diverse_communities_data_excerpts", "texas", "tx2019.csv")
-    print(csv_path)
-    df = pd.read_csv(csv_path, low_memory=False)
-    for col in catCols:
-        is_numeric = np.issubdtype(df[col].dtypes, np.number)
-        print(f"{col} is numeric {is_numeric}")
-        print(df[col].unique())
-        null_values = df[col].isnull()
-        # To count the number of null values
-        print(f"     {null_values.sum()} null values")
-    df = df.sample(n=1000)
-    # HISP and SEX are numeric, so let's change only HISP to string
-    df["HISP"] = df["HISP"].astype(str)
-    synth = Synthesizer(df[["HISP", "SEX"]])
-    df_syn = synth.sample()
-    print(df_syn.dtypes)
-    print(df_syn.head())
-    pass
-
-
-def test1():
-    csv_path = os.path.join("c:\\", "paul", "datasets", "banking.loans", "original", "loan_account_card_clients.csv")
-    print(csv_path)
-    df = pd.read_csv(csv_path, keep_default_na=False, na_values=[""], low_memory=False)
-    # Try to infer datetime columns.
-    for col in df.columns[df.dtypes == "object"]:
-        try:
-            df[col] = pd.to_datetime(df[col], format="ISO8601")
-        except (ParserError, ValueError):
-            pass
-
-    print("make synthesizer")
-    synthesizer = Synthesizer(df)
-    print("first sample")
-    df_syn1 = synthesizer.sample()
-    print(df_syn1[0:5].to_string())
-    print("second sample")
-    df_syn2 = synthesizer.sample()
-    print(df_syn2[0:5].to_string())
-
-    cnt = count_identical_rows(df_syn1, df_syn2)
-    print(f"There are {cnt} identical rows")
-
-
-if False:
-    test1()
-if True:
-    test2()