From 80920eb3ec86f862a1c3899123dee066022d9d98 Mon Sep 17 00:00:00 2001
From: Armand LEOPOLD <armand.leopold@outlook.com>
Date: Fri, 23 Feb 2024 12:14:22 +0100
Subject: [PATCH 1/2] fixes

---
 profiling_pack/main.py              | 16 ++++++++++------
 profiling_pack/properties.yaml      |  2 +-
 schema_scanner_pack/main.py         |  4 +++-
 schema_scanner_pack/properties.yaml |  2 +-
 schema_scanner_pack/pyproject.toml  |  1 -
 timeliness_pack/properties.yaml     |  2 +-
 timeliness_pack/pyproject.toml      |  1 -
 versioning_pack/properties.yaml     |  2 +-
 versioning_pack/pyproject.toml      |  1 -
 9 files changed, 17 insertions(+), 14 deletions(-)

diff --git a/profiling_pack/main.py b/profiling_pack/main.py
index 84c5a71..5ce0daa 100644
--- a/profiling_pack/main.py
+++ b/profiling_pack/main.py
@@ -10,6 +10,7 @@
 import os
 from ydata_profiling import ProfileReport
 from datetime import datetime
+from io import StringIO
 
 pack = Pack()
 pack.load_data("source")
@@ -22,7 +23,9 @@
 
 # Run the profiling report
 profile = ProfileReport(
-    pack.df_source, title=f"Profiling Report for {dataset_scope_name}"
+    pack.df_source,
+    title=f"Profiling Report for {dataset_scope_name}",
+    correlations={"auto": {"calculate": False}},
 )
 
 # Save the report to HTML
@@ -47,7 +50,8 @@
 
 try:
     with open(html_file_name, "r", encoding="utf-8") as f:
-        tables = pd.read_html(f.read())
+        html_content = f.read()
+        tables = pd.read_html(StringIO(html_content)) 
 except ValueError as e:
     print(f"No tables found in the HTML report: {e}")
     tables = [pd.DataFrame()]  # Create an empty DataFrame if no tables are found
@@ -121,9 +125,9 @@
 
 # Extract p_cells_missing value (as a decimal)
 df_missing = pd.DataFrame(pack.metrics.data)
-p_cells_missing_value = df_missing[
-    df_missing["key"] == "p_cells_missing"
-]["value"].values[0]
+p_cells_missing_value = df_missing[df_missing["key"] == "p_cells_missing"][
+    "value"
+].values[0]
 p_cells_missing = float(p_cells_missing_value)
 
 # Calculate the score
@@ -180,7 +184,7 @@
     print("No alerts table found in the HTML report.")
     alerts_data = pd.DataFrame()  # Create an empty DataFrame if no alerts are found
 
-alerts_list_of_dicts = alerts_data.to_dict(orient='records')
+alerts_list_of_dicts = alerts_data.to_dict(orient="records")
 pack.recommendations.data = alerts_list_of_dicts
 
 ############################ Schemas
diff --git a/profiling_pack/properties.yaml b/profiling_pack/properties.yaml
index c25cc28..53589e6 100644
--- a/profiling_pack/properties.yaml
+++ b/profiling_pack/properties.yaml
@@ -3,5 +3,5 @@ icon: icon.png
 name: profiling
 type: completeness
 url: https://github.com/qalita-io/packs/tree/main/profiling_pack
-version: 2.0.0
+version: 2.0.1
 visibility: public
diff --git a/schema_scanner_pack/main.py b/schema_scanner_pack/main.py
index 573cbeb..ebb4693 100644
--- a/schema_scanner_pack/main.py
+++ b/schema_scanner_pack/main.py
@@ -2,6 +2,7 @@
 import pandas as pd
 from ydata_profiling import ProfileReport
 from qalita_core.pack import Pack
+from io import StringIO
 
 pack = Pack()
 pack.load_data("source")
@@ -24,7 +25,8 @@
 
 try:
     with open(html_file_name, "r", encoding="utf-8") as f:
-        tables = pd.read_html(f.read())
+        html_content = f.read()
+        tables = pd.read_html(StringIO(html_content)) 
 except ValueError as e:
     print(f"No tables found in the HTML report: {e}")
     tables = [pd.DataFrame()]  # Create an empty DataFrame if no tables are found
diff --git a/schema_scanner_pack/properties.yaml b/schema_scanner_pack/properties.yaml
index d676082..754a491 100644
--- a/schema_scanner_pack/properties.yaml
+++ b/schema_scanner_pack/properties.yaml
@@ -3,5 +3,5 @@ icon: icon.png
 name: schema_scanner
 type: schema
 url: https://github.com/qalita-io/packs/tree/main/schema_scanner_pack
-version: 2.0.0
+version: 2.0.2
 visibility: public
diff --git a/schema_scanner_pack/pyproject.toml b/schema_scanner_pack/pyproject.toml
index b9a6a9f..85dd842 100644
--- a/schema_scanner_pack/pyproject.toml
+++ b/schema_scanner_pack/pyproject.toml
@@ -11,7 +11,6 @@ python = ">=3.10,<3.12"
 ydata-profiling = "^4.6.0"
 matplotlib = "3.7.0"
 lxml = "^4.9.3"
-pandas = "2.0.3"
 openpyxl = "^3.1.2"
 sqlalchemy = "^2.0.23"
 html5lib = "^1.1"
diff --git a/timeliness_pack/properties.yaml b/timeliness_pack/properties.yaml
index bf321df..d092f7f 100644
--- a/timeliness_pack/properties.yaml
+++ b/timeliness_pack/properties.yaml
@@ -3,5 +3,5 @@ icon: icon.png
 name: timeliness
 type: timeliness
 url: https://github.com/qalita-io/packs/tree/main/timeliness_pack
-version: 2.0.0
+version: 2.0.1
 visibility: public
diff --git a/timeliness_pack/pyproject.toml b/timeliness_pack/pyproject.toml
index 19f93c3..3646934 100644
--- a/timeliness_pack/pyproject.toml
+++ b/timeliness_pack/pyproject.toml
@@ -10,7 +10,6 @@ readme = "README.md"
 python = ">=3.10,<3.12"
 matplotlib = "3.7.0"
 lxml = "^4.9.3"
-pandas = "2.0.3"
 openpyxl = "^3.1.2"
 sqlalchemy = "^2.0.23"
 qalita-core = "^0.2.0"
diff --git a/versioning_pack/properties.yaml b/versioning_pack/properties.yaml
index 2b46f7d..9554b08 100644
--- a/versioning_pack/properties.yaml
+++ b/versioning_pack/properties.yaml
@@ -3,5 +3,5 @@ icon: icon.png
 name: versioning
 type: version
 url: https://github.com/qalita-io/packs/tree/main/versioning_pack
-version: 2.0.0
+version: 2.0.1
 visibility: public
diff --git a/versioning_pack/pyproject.toml b/versioning_pack/pyproject.toml
index 14d3ee8..0bcee9c 100644
--- a/versioning_pack/pyproject.toml
+++ b/versioning_pack/pyproject.toml
@@ -11,7 +11,6 @@ requests = "^2.31.0"
 python = ">=3.10,<3.12"
 matplotlib = "3.7.0"
 lxml = "^4.9.3"
-pandas = "2.0.3"
 openpyxl = "^3.1.2"
 sqlalchemy = "^2.0.23"
 qalita-core = "^0.2.0"

From 7f247a4a08910c4b1cf46966a02162b8ba18dec4 Mon Sep 17 00:00:00 2001
From: Armand LEOPOLD <armand.leopold@outlook.com>
Date: Fri, 23 Feb 2024 12:45:19 +0100
Subject: [PATCH 2/2] fix outlier

---
 outlier_detection_pack/main.py         | 2 +-
 outlier_detection_pack/properties.yaml | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/outlier_detection_pack/main.py b/outlier_detection_pack/main.py
index 9bb594d..feaf6ad 100644
--- a/outlier_detection_pack/main.py
+++ b/outlier_detection_pack/main.py
@@ -94,7 +94,7 @@
 df = pd.concat([df, encoded_df.reset_index(drop=True)], axis=1)
 
 # Exclude id_columns from df before Multivariate Outlier Detection
-df_for_multivariate = pack.df_source.drop(columns=id_columns)
+df_for_multivariate = df.drop(columns=id_columns)
 
 # Multivariate Outlier Detection
 multivariate_outliers = pd.DataFrame()
diff --git a/outlier_detection_pack/properties.yaml b/outlier_detection_pack/properties.yaml
index 29b1bc2..a9b158c 100644
--- a/outlier_detection_pack/properties.yaml
+++ b/outlier_detection_pack/properties.yaml
@@ -3,5 +3,5 @@ icon: icon.png
 name: outlier_detection
 type: reasonability
 url: https://github.com/qalita-io/packs/tree/main/outlier_detection_pack
-version: 2.0.0
+version: 2.0.1
 visibility: public