ran black, isort, etc.

diffix · Jul 22, 2024 · 79dfcac · 79dfcac
1 parent a596f7c
commit 79dfcac
Show file tree

Hide file tree

Showing 2 changed files with 36 additions and 17 deletions.
diff --git a/syndiffix/tree.py b/syndiffix/tree.py
@@ -207,9 +207,11 @@ def _create_child_leaf(self, child_index: int, initial_row: RowId) -> Leaf:
 
         # Set child's subnodes to the matching-interval children of the parent's subnodes.
         subnodes = tuple(
-            subnode.children.get(Branch._remove_dimension_from_index(dim_index, child_index))
-            if isinstance(subnode, Branch)
-            else None
+            (
+                subnode.children.get(Branch._remove_dimension_from_index(dim_index, child_index))
+                if isinstance(subnode, Branch)
+                else None
+            )
             for dim_index, subnode in enumerate(self.subnodes)
         )
 

diff --git a/tests/my_debugger.py b/tests/my_debugger.py
@@ -1,25 +1,40 @@
-import pandas as pd
-import numpy as np
 import os
+
+import numpy as np
+import pandas as pd
 from pandas.errors import ParserError
 
 from syndiffix import Synthesizer
 
+
 def count_identical_rows(df1, df2):
     # Merge the two dataframes on all columns
-    merged_df = pd.merge(df1, df2, how='inner')
-    
+    merged_df = pd.merge(df1, df2, how="inner")
+
     # The number of identical rows is the number of rows in the merged dataframe
     num_identical_rows = len(merged_df)
-    
+
     return num_identical_rows
 
+
 def test2():
-    catCols = [ 'SEX', 'MSP', 'HISP', 'RAC1P', 'HOUSING_TYPE', 
-              'OWN_RENT', 'INDP_CAT', 'EDU', 'PINCP_DECILE',
-              'DVET', 'DREM', 'DEYE', 'DEAR', 'DPHY', 
-              ]
-    csv_path = os.path.join('c:\\', 'paul', 'sdnist', 'diverse_communities_data_excerpts', 'texas', 'tx2019.csv')
+    catCols = [
+        "SEX",
+        "MSP",
+        "HISP",
+        "RAC1P",
+        "HOUSING_TYPE",
+        "OWN_RENT",
+        "INDP_CAT",
+        "EDU",
+        "PINCP_DECILE",
+        "DVET",
+        "DREM",
+        "DEYE",
+        "DEAR",
+        "DPHY",
+    ]
+    csv_path = os.path.join("c:\\", "paul", "sdnist", "diverse_communities_data_excerpts", "texas", "tx2019.csv")
     print(csv_path)
     df = pd.read_csv(csv_path, low_memory=False)
     for col in catCols:
@@ -31,15 +46,16 @@ def test2():
         print(f"     {null_values.sum()} null values")
     df = df.sample(n=1000)
     # HISP and SEX are numeric, so let's change only HISP to string
-    df['HISP'] = df['HISP'].astype(str)
-    synth = Synthesizer(df[['HISP','SEX']])
+    df["HISP"] = df["HISP"].astype(str)
+    synth = Synthesizer(df[["HISP", "SEX"]])
     df_syn = synth.sample()
     print(df_syn.dtypes)
     print(df_syn.head())
     pass
 
+
 def test1():
-    csv_path = os.path.join('c:\\', 'paul', 'datasets', 'banking.loans', 'original', 'loan_account_card_clients.csv')
+    csv_path = os.path.join("c:\\", "paul", "datasets", "banking.loans", "original", "loan_account_card_clients.csv")
     print(csv_path)
     df = pd.read_csv(csv_path, keep_default_na=False, na_values=[""], low_memory=False)
     # Try to infer datetime columns.
@@ -61,7 +77,8 @@ def test1():
     cnt = count_identical_rows(df_syn1, df_syn2)
     print(f"There are {cnt} identical rows")
 
+
 if False:
     test1()
 if True:
-    test2()
+    test2()