Skip to content

Commit

Permalink
Merge pull request #16 from qalita-io/feature/qalita-core-bump
Browse files Browse the repository at this point in the history
Feature/qalita core bump
  • Loading branch information
armandleopold authored Feb 23, 2024
2 parents 154e185 + 7f247a4 commit efb85e2
Show file tree
Hide file tree
Showing 11 changed files with 19 additions and 16 deletions.
2 changes: 1 addition & 1 deletion outlier_detection_pack/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -94,7 +94,7 @@
df = pd.concat([df, encoded_df.reset_index(drop=True)], axis=1)

# Exclude id_columns from df before Multivariate Outlier Detection
df_for_multivariate = pack.df_source.drop(columns=id_columns)
df_for_multivariate = df.drop(columns=id_columns)

# Multivariate Outlier Detection
multivariate_outliers = pd.DataFrame()
Expand Down
2 changes: 1 addition & 1 deletion outlier_detection_pack/properties.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -3,5 +3,5 @@ icon: icon.png
name: outlier_detection
type: reasonability
url: https://github.com/qalita-io/packs/tree/main/outlier_detection_pack
version: 2.0.0
version: 2.0.1
visibility: public
16 changes: 10 additions & 6 deletions profiling_pack/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
import os
from ydata_profiling import ProfileReport
from datetime import datetime
from io import StringIO

pack = Pack()
pack.load_data("source")
Expand All @@ -22,7 +23,9 @@

# Run the profiling report
profile = ProfileReport(
pack.df_source, title=f"Profiling Report for {dataset_scope_name}"
pack.df_source,
title=f"Profiling Report for {dataset_scope_name}",
correlations={"auto": {"calculate": False}},
)

# Save the report to HTML
Expand All @@ -47,7 +50,8 @@

try:
with open(html_file_name, "r", encoding="utf-8") as f:
tables = pd.read_html(f.read())
html_content = f.read()
tables = pd.read_html(StringIO(html_content))
except ValueError as e:
print(f"No tables found in the HTML report: {e}")
tables = [pd.DataFrame()] # Create an empty DataFrame if no tables are found
Expand Down Expand Up @@ -121,9 +125,9 @@

# Extract p_cells_missing value (as a decimal)
df_missing = pd.DataFrame(pack.metrics.data)
p_cells_missing_value = df_missing[
df_missing["key"] == "p_cells_missing"
]["value"].values[0]
p_cells_missing_value = df_missing[df_missing["key"] == "p_cells_missing"][
"value"
].values[0]
p_cells_missing = float(p_cells_missing_value)

# Calculate the score
Expand Down Expand Up @@ -180,7 +184,7 @@
print("No alerts table found in the HTML report.")
alerts_data = pd.DataFrame() # Create an empty DataFrame if no alerts are found

alerts_list_of_dicts = alerts_data.to_dict(orient='records')
alerts_list_of_dicts = alerts_data.to_dict(orient="records")
pack.recommendations.data = alerts_list_of_dicts

############################ Schemas
Expand Down
2 changes: 1 addition & 1 deletion profiling_pack/properties.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -3,5 +3,5 @@ icon: icon.png
name: profiling
type: completeness
url: https://github.com/qalita-io/packs/tree/main/profiling_pack
version: 2.0.0
version: 2.0.1
visibility: public
4 changes: 3 additions & 1 deletion schema_scanner_pack/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
import pandas as pd
from ydata_profiling import ProfileReport
from qalita_core.pack import Pack
from io import StringIO

pack = Pack()
pack.load_data("source")
Expand All @@ -24,7 +25,8 @@

try:
with open(html_file_name, "r", encoding="utf-8") as f:
tables = pd.read_html(f.read())
html_content = f.read()
tables = pd.read_html(StringIO(html_content))
except ValueError as e:
print(f"No tables found in the HTML report: {e}")
tables = [pd.DataFrame()] # Create an empty DataFrame if no tables are found
Expand Down
2 changes: 1 addition & 1 deletion schema_scanner_pack/properties.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -3,5 +3,5 @@ icon: icon.png
name: schema_scanner
type: schema
url: https://github.com/qalita-io/packs/tree/main/schema_scanner_pack
version: 2.0.0
version: 2.0.2
visibility: public
1 change: 0 additions & 1 deletion schema_scanner_pack/pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,6 @@ python = ">=3.10,<3.12"
ydata-profiling = "^4.6.0"
matplotlib = "3.7.0"
lxml = "^4.9.3"
pandas = "2.0.3"
openpyxl = "^3.1.2"
sqlalchemy = "^2.0.23"
html5lib = "^1.1"
Expand Down
2 changes: 1 addition & 1 deletion timeliness_pack/properties.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -3,5 +3,5 @@ icon: icon.png
name: timeliness
type: timeliness
url: https://github.com/qalita-io/packs/tree/main/timeliness_pack
version: 2.0.0
version: 2.0.1
visibility: public
1 change: 0 additions & 1 deletion timeliness_pack/pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,6 @@ readme = "README.md"
python = ">=3.10,<3.12"
matplotlib = "3.7.0"
lxml = "^4.9.3"
pandas = "2.0.3"
openpyxl = "^3.1.2"
sqlalchemy = "^2.0.23"
qalita-core = "^0.2.0"
Expand Down
2 changes: 1 addition & 1 deletion versioning_pack/properties.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -3,5 +3,5 @@ icon: icon.png
name: versioning
type: version
url: https://github.com/qalita-io/packs/tree/main/versioning_pack
version: 2.0.0
version: 2.0.1
visibility: public
1 change: 0 additions & 1 deletion versioning_pack/pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,6 @@ requests = "^2.31.0"
python = ">=3.10,<3.12"
matplotlib = "3.7.0"
lxml = "^4.9.3"
pandas = "2.0.3"
openpyxl = "^3.1.2"
sqlalchemy = "^2.0.23"
qalita-core = "^0.2.0"
Expand Down

0 comments on commit efb85e2

Please sign in to comment.