Skip to content

Commit

Permalink
Rename columns from df1 & df2 to source & target + level info mismatc…
Browse files Browse the repository at this point in the history
…hes indicator
  • Loading branch information
armandleopold committed Mar 17, 2024
1 parent d7c9127 commit 8f4fafe
Show file tree
Hide file tree
Showing 3 changed files with 64 additions and 41 deletions.
31 changes: 25 additions & 6 deletions data_compare_pack/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@
# Combine compare_col_list and id_columns while removing duplicates
combined_columns_list = list(dict.fromkeys(compare_col_list + id_columns))

if len(id_columns) == 0 :
if len(id_columns) == 0:
id_columns = compare_col_list

# Creating subsets for source and target data with no repeated columns
Expand Down Expand Up @@ -229,19 +229,38 @@
]
)


# Extracting column labels
columnLabels = df_all_mismatch.columns.tolist()

# Converting the DataFrame into the desired format without row labels
# Dictionary to map the old suffix to the new one
suffix_mapping = {"_df1": "_source", "_df2": "_target"}

# Revise the loop to correctly process replacement without duplication
new_columnLabels = [
(
col
if not any(col.endswith(suffix) for suffix in suffix_mapping.keys())
else next(
col.replace(suffix, replacement)
for suffix, replacement in suffix_mapping.items()
if col.endswith(suffix)
)
)
for col in columnLabels
]

# Assuming `df_all_mismatch` is your DataFrame, rename its columns with the new labels
df_all_mismatch.columns = new_columnLabels

# Since you've updated column names, you don't need to change the way you convert the DataFrame
data_formatted = [
[{"value": row[col]} for col in df_all_mismatch.columns]
for index, row in df_all_mismatch.iterrows()
]

# The formatted data structure, now without rowLabels
# The formatted data structure, now with renamed labels
format_structure = {
"columnLabels": columnLabels,
"columnLabels": new_columnLabels, # Use the new column labels
"data": data_formatted,
}

Expand All @@ -250,7 +269,7 @@
[
{
"key": "recommendation_levels_mismatches",
"value": {"info": "<=0.5", "warning": ">0.5", "high": ">0.8"},
"value": {"info": "0", "warning": "0.5", "high": "0.8"},
"scope": {"perimeter": "dataset", "value": pack.source_config["name"]},
},
{
Expand Down
72 changes: 38 additions & 34 deletions data_compare_pack/pack_conf.json
Original file line number Diff line number Diff line change
@@ -1,36 +1,40 @@
{
"job": {
"compare_col_list": [],
"id_columns": [],
"abs_tol": 0.0001,
"rel_tol": 0,
"source": {
"skiprows": 0
}
},
"charts": {
"overview": [
{
"metric_key": "score",
"chart_type": "text",
"display_title": true,
"justify": true
},
{
"metric_key": "recommendation_levels_mismatches",
"chart_type": "recommendation_level_indicator",
"display_title": true
},
{
"metric_key": "check_column",
"chart_type": "check_table",
"display_title": true
},
{
"metric_key": "mismatches_table",
"chart_type": "table",
"display_title": true
}
]
"job": {
"compare_col_list": [],
"id_columns": [],
"abs_tol": 0.0001,
"rel_tol": 0,
"source": {
"skiprows": 0
}
}
},
"charts": {
"overview": [
{
"metric_key": "score",
"chart_type": "text",
"display_title": true,
"justify": true
},
{
"metric_key": "recommendation_levels_mismatches",
"chart_type": "recommendation_level_indicator",
"tooltip": {
"title": "Recommendation level's importance mapping",
"content": "Gives the recommendation level for proportions of mismatches"
},
"display_title": true
},
{
"metric_key": "check_column",
"chart_type": "check_table",
"display_title": true
},
{
"metric_key": "mismatches_table",
"chart_type": "table",
"display_title": true
}
]
}
}
2 changes: 1 addition & 1 deletion data_compare_pack/properties.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -3,5 +3,5 @@ icon: icon.png
name: data_compare
type: consistency
url: https://github.com/qalita-io/packs/tree/main/data_compare_pack
version: 2.0.12
version: 2.0.21
visibility: public

0 comments on commit 8f4fafe

Please sign in to comment.