Skip to content

Commit

Permalink
Add Edge case compare data when no id columns & Outlier detection add…
Browse files Browse the repository at this point in the history
… outliers table
  • Loading branch information
armandleopold committed Mar 11, 2024
1 parent eabf9bf commit d7c9127
Show file tree
Hide file tree
Showing 6 changed files with 43 additions and 7 deletions.
9 changes: 6 additions & 3 deletions data_compare_pack/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,13 +10,13 @@


# Checking if the columns exist in the DataFrames
compare_col_list = pack.pack_config["job"]["compare_col_list"]
id_columns = pack.pack_config["job"]["id_columns"]
compare_col_list = pack.pack_config["job"].get("compare_col_list", [])
id_columns = pack.pack_config["job"].get("id_columns", [])
abs_tol = pack.pack_config["job"].get("abs_tol", 0.0001)
rel_tol = pack.pack_config["job"].get("rel_tol", 0)

# Create an intersection of source and target columns if compare_col_list is empty
if not compare_col_list:
if compare_col_list == []:
compare_col_list = list(
set(pack.df_source.columns).intersection(set(pack.df_target.columns))
)
Expand All @@ -37,6 +37,9 @@
# Combine compare_col_list and id_columns while removing duplicates
combined_columns_list = list(dict.fromkeys(compare_col_list + id_columns))

if len(id_columns) == 0 :
id_columns = compare_col_list

# Creating subsets for source and target data with no repeated columns
df_source_subset = pack.df_source[combined_columns_list]
df_target_subset = pack.df_target[combined_columns_list]
Expand Down
2 changes: 1 addition & 1 deletion data_compare_pack/pack_conf.json
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@
"justify": true
},
{
"metric_key": "recommendation_levels",
"metric_key": "recommendation_levels_mismatches",
"chart_type": "recommendation_level_indicator",
"display_title": true
},
Expand Down
2 changes: 1 addition & 1 deletion data_compare_pack/properties.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -3,5 +3,5 @@ icon: icon.png
name: data_compare
type: consistency
url: https://github.com/qalita-io/packs/tree/main/data_compare_pack
version: 2.0.1
version: 2.0.12
visibility: public
30 changes: 29 additions & 1 deletion outlier_detection_pack/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -177,7 +177,6 @@ def determine_recommendation_level(proportion_outliers):
}
)

pack.metrics.save()

# Define a threshold for considering a data point as an outlier
normality_threshold = pack.pack_config["job"][
Expand Down Expand Up @@ -278,6 +277,35 @@ def determine_recommendation_level(proportion_outliers):
)
all_univariate_outliers = all_univariate_outliers[id_and_other_columns]


# Extracting column labels
columnLabels = all_univariate_outliers.columns.tolist()

# Converting the DataFrame into the desired format without row labels
data_formatted = [
[{"value": row[col]} for col in all_univariate_outliers.columns]
for index, row in all_univariate_outliers.iterrows()
]

# The formatted data structure, now without rowLabels
format_structure = {
"columnLabels": columnLabels,
"data": data_formatted,
}

# Append the precision, recall, and F1 score to the metrics
pack.metrics.data.extend(
[
{
"key": "outliers_table",
"value": format_structure,
"scope": {"perimeter": "dataset", "value": pack.source_config["name"]},
},
]
)

pack.metrics.save()

# Step 2: Compile Multivariate Outliers
multivariate_outliers["index"] = (
multivariate_outliers.index
Expand Down
5 changes: 5 additions & 0 deletions outlier_detection_pack/pack_conf.json
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,11 @@
"chart_type": "text",
"display_title": true,
"justify": true
},
{
"metric_key": "outliers_table",
"chart_type": "table",
"display_title": true
}
],
"scoped": [
Expand Down
2 changes: 1 addition & 1 deletion outlier_detection_pack/properties.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -3,5 +3,5 @@ icon: icon.png
name: outlier_detection
type: reasonability
url: https://github.com/qalita-io/packs/tree/main/outlier_detection_pack
version: 2.0.2
version: 2.0.5
visibility: public

0 comments on commit d7c9127

Please sign in to comment.