Merge pull request #16 from qalita-io/feature/qalita-core-bump

Feature/qalita core bump
qalita-io · Feb 23, 2024 · efb85e2 · efb85e2
2 parents 154e185 + 7f247a4
commit efb85e2
Show file tree

Hide file tree

Showing 11 changed files with 19 additions and 16 deletions.
diff --git a/outlier_detection_pack/main.py b/outlier_detection_pack/main.py
@@ -94,7 +94,7 @@
 df = pd.concat([df, encoded_df.reset_index(drop=True)], axis=1)
 
 # Exclude id_columns from df before Multivariate Outlier Detection
-df_for_multivariate = pack.df_source.drop(columns=id_columns)
+df_for_multivariate = df.drop(columns=id_columns)
 
 # Multivariate Outlier Detection
 multivariate_outliers = pd.DataFrame()

diff --git a/outlier_detection_pack/properties.yaml b/outlier_detection_pack/properties.yaml
@@ -3,5 +3,5 @@ icon: icon.png
 name: outlier_detection
 type: reasonability
 url: https://github.com/qalita-io/packs/tree/main/outlier_detection_pack
-version: 2.0.0
+version: 2.0.1
 visibility: public
diff --git a/profiling_pack/main.py b/profiling_pack/main.py
@@ -10,6 +10,7 @@
 import os
 from ydata_profiling import ProfileReport
 from datetime import datetime
+from io import StringIO
 
 pack = Pack()
 pack.load_data("source")
@@ -22,7 +23,9 @@
 
 # Run the profiling report
 profile = ProfileReport(
-    pack.df_source, title=f"Profiling Report for {dataset_scope_name}"
+    pack.df_source,
+    title=f"Profiling Report for {dataset_scope_name}",
+    correlations={"auto": {"calculate": False}},
 )
 
 # Save the report to HTML
@@ -47,7 +50,8 @@
 
 try:
     with open(html_file_name, "r", encoding="utf-8") as f:
-        tables = pd.read_html(f.read())
+        html_content = f.read()
+        tables = pd.read_html(StringIO(html_content)) 
 except ValueError as e:
     print(f"No tables found in the HTML report: {e}")
     tables = [pd.DataFrame()]  # Create an empty DataFrame if no tables are found
@@ -121,9 +125,9 @@
 
 # Extract p_cells_missing value (as a decimal)
 df_missing = pd.DataFrame(pack.metrics.data)
-p_cells_missing_value = df_missing[
-    df_missing["key"] == "p_cells_missing"
-]["value"].values[0]
+p_cells_missing_value = df_missing[df_missing["key"] == "p_cells_missing"][
+    "value"
+].values[0]
 p_cells_missing = float(p_cells_missing_value)
 
 # Calculate the score
@@ -180,7 +184,7 @@
     print("No alerts table found in the HTML report.")
     alerts_data = pd.DataFrame()  # Create an empty DataFrame if no alerts are found
 
-alerts_list_of_dicts = alerts_data.to_dict(orient='records')
+alerts_list_of_dicts = alerts_data.to_dict(orient="records")
 pack.recommendations.data = alerts_list_of_dicts
 
 ############################ Schemas

diff --git a/profiling_pack/properties.yaml b/profiling_pack/properties.yaml
@@ -3,5 +3,5 @@ icon: icon.png
 name: profiling
 type: completeness
 url: https://github.com/qalita-io/packs/tree/main/profiling_pack
-version: 2.0.0
+version: 2.0.1
 visibility: public
diff --git a/schema_scanner_pack/main.py b/schema_scanner_pack/main.py
@@ -2,6 +2,7 @@
 import pandas as pd
 from ydata_profiling import ProfileReport
 from qalita_core.pack import Pack
+from io import StringIO
 
 pack = Pack()
 pack.load_data("source")
@@ -24,7 +25,8 @@
 
 try:
     with open(html_file_name, "r", encoding="utf-8") as f:
-        tables = pd.read_html(f.read())
+        html_content = f.read()
+        tables = pd.read_html(StringIO(html_content)) 
 except ValueError as e:
     print(f"No tables found in the HTML report: {e}")
     tables = [pd.DataFrame()]  # Create an empty DataFrame if no tables are found

diff --git a/schema_scanner_pack/properties.yaml b/schema_scanner_pack/properties.yaml
@@ -3,5 +3,5 @@ icon: icon.png
 name: schema_scanner
 type: schema
 url: https://github.com/qalita-io/packs/tree/main/schema_scanner_pack
-version: 2.0.0
+version: 2.0.2
 visibility: public
diff --git a/schema_scanner_pack/pyproject.toml b/schema_scanner_pack/pyproject.toml
@@ -11,7 +11,6 @@ python = ">=3.10,<3.12"
 ydata-profiling = "^4.6.0"
 matplotlib = "3.7.0"
 lxml = "^4.9.3"
-pandas = "2.0.3"
 openpyxl = "^3.1.2"
 sqlalchemy = "^2.0.23"
 html5lib = "^1.1"

diff --git a/timeliness_pack/properties.yaml b/timeliness_pack/properties.yaml
@@ -3,5 +3,5 @@ icon: icon.png
 name: timeliness
 type: timeliness
 url: https://github.com/qalita-io/packs/tree/main/timeliness_pack
-version: 2.0.0
+version: 2.0.1
 visibility: public
diff --git a/timeliness_pack/pyproject.toml b/timeliness_pack/pyproject.toml
@@ -10,7 +10,6 @@ readme = "README.md"
 python = ">=3.10,<3.12"
 matplotlib = "3.7.0"
 lxml = "^4.9.3"
-pandas = "2.0.3"
 openpyxl = "^3.1.2"
 sqlalchemy = "^2.0.23"
 qalita-core = "^0.2.0"

diff --git a/versioning_pack/properties.yaml b/versioning_pack/properties.yaml
@@ -3,5 +3,5 @@ icon: icon.png
 name: versioning
 type: version
 url: https://github.com/qalita-io/packs/tree/main/versioning_pack
-version: 2.0.0
+version: 2.0.1
 visibility: public
diff --git a/versioning_pack/pyproject.toml b/versioning_pack/pyproject.toml
@@ -11,7 +11,6 @@ requests = "^2.31.0"
 python = ">=3.10,<3.12"
 matplotlib = "3.7.0"
 lxml = "^4.9.3"
-pandas = "2.0.3"
 openpyxl = "^3.1.2"
 sqlalchemy = "^2.0.23"
 qalita-core = "^0.2.0"