metrics rounded to 2 decimals , add sqlalchemy + edge case

qalita-io · Dec 11, 2023 · aa94d9a · aa94d9a
1 parent 45bc0cf
commit aa94d9a
Show file tree

Hide file tree

Showing 6 changed files with 78 additions and 46 deletions.
diff --git a/.gitignore b/.gitignore
@@ -1,2 +1,3 @@
 **/__pycache__/
-poetry.lock
+poetry.lock
+profiling_pack/source_conf.json
diff --git a/profiling_pack/main.py b/profiling_pack/main.py
@@ -10,6 +10,18 @@
 
 warnings.filterwarnings("ignore", category=DeprecationWarning)
 
+def round_if_numeric(value, decimals=2):
+    try:
+        # Convert to a float and round
+        rounded_value = round(float(value), decimals)
+        # If the rounded value is an integer, convert it to an int
+        if rounded_value.is_integer():
+            return str(int(rounded_value))
+        # Otherwise, format it as a string with two decimal places
+        return "{:.2f}".format(rounded_value)
+    except (ValueError, TypeError):
+        # Return the original value if it's not a number
+        return str(value)
 
 # Function to extract percentage and determine level
 def determine_level(content):
@@ -67,7 +79,7 @@ def denormalize(data):
 for col in df.columns:
     non_null_count = df[col].notnull().sum()
     total_count = len(df)
-    completeness_score = non_null_count / total_count
+    completeness_score = round(non_null_count / total_count, 2)
     completeness_scores.append(
         {
             "key": "completeness_score",
@@ -89,7 +101,7 @@ def denormalize(data):
 for key, value in general_data.items():
     entry = {
         "key": key,
-        "value": str(value),
+        "value": round_if_numeric(value),
         "scope": {"perimeter": "dataset", "value": config["name"]},
     }
     new_format_data.append(entry)
@@ -101,7 +113,7 @@ def denormalize(data):
     for attr_name, attr_value in attributes.items():
         entry = {
             "key": attr_name,
-            "value": str(attr_value),
+            "value": round_if_numeric(attr_value),
             "scope": {"perimeter": "column", "value": variable_name},
         }
         new_format_data.append(entry)
@@ -122,10 +134,10 @@ def denormalize(data):
 score = pd.DataFrame(
     {
         "key": "score",
-        "value": str(
+        "value": str(round(
             (int(number_of_observations) - int(missing_cells))
-            / int(number_of_observations)
-        ),
+            / int(number_of_observations), 2
+        )),
         "scope": {"perimeter": "dataset", "value": config["name"]},
     },
     index=[0],

diff --git a/profiling_pack/opener.py b/profiling_pack/opener.py
@@ -1,4 +1,7 @@
-# opener.py
+"""
+The opener module contains functions to load data from files and databases.
+"""
+
 import os
 import glob
 import pandas as pd
@@ -91,7 +94,7 @@ def load_data(config):
             return load_data_file(first_data_file)
         else:
             raise FileNotFoundError(
-                f"The path {path} is neither a file nor a directory."
+                f"The path {path} is neither a file nor a directory. Or can't be reached."
             )
 
     elif source_type == "database":

diff --git a/profiling_pack/pack_conf.json b/profiling_pack/pack_conf.json
@@ -1,38 +1,53 @@
 {
-    "metrics_chart_mapping": [
-        {
-            "metric_key":"n_cells_missing",
-            "chart_type":"area_chart"
-        },
-        {
-            "metric_key":"n",
-            "chart_type":"area_chart"
-        },
-        {
-            "metric_key":"n_cells_missing",
-            "chart_type":"text_header"
-        },
-        {
-            "metric_key":"p_cells_missing",
-            "chart_type":"text_header"
-        }
-    ],
-    "schemas_chart_mapping": [
-        {
-            "metric_key":"n_missing",
-            "chart_type":"area_chart"
-        },
-        {
-            "metric_key":"min",
-            "chart_type":"text_header"
-        },
-        {
-            "metric_key":"max",
-            "chart_type":"text_header"
-        },
-        {
-            "metric_key":"skewness",
-            "chart_type":"text_header"
-        }
-    ]
+    "charts": {
+        "overview": [
+            {
+                "metric_key": "n_cells_missing",
+                "chart_type": "area_chart",
+                "display_title": true
+            },
+            {
+                "metric_key": "n",
+                "chart_type": "area_chart",
+                "display_title": true
+            },
+            {
+                "metric_key": "n_cells_missing",
+                "chart_type": "text",
+                "display_title": true,
+                "justify": true
+            },
+            {
+                "metric_key": "p_cells_missing",
+                "chart_type": "text",
+                "display_title": true,
+                "justify": true
+            },
+            {
+                "metric_key": "score",
+                "chart_type": "text",
+                "display_title": true,
+                "justify": true
+            }
+        ],
+        "scoped": [
+            {
+                "metric_key": "type",
+                "chart_type": "badge",
+                "display_title": true,
+                "justify": true
+            },
+            {
+                "metric_key": "completeness_score",
+                "chart_type": "text",
+                "display_title": true,
+                "justify": true
+            },
+            {
+                "metric_key": "completeness_score",
+                "chart_type": "spark_area_chart",
+                "display_title": false
+            }
+        ]
+    }
 }
diff --git a/profiling_pack/properties.yaml b/profiling_pack/properties.yaml
@@ -3,5 +3,5 @@ icon: icon.png
 name: profiling
 type: completeness
 url: https://github.com/qalita-io/packs/tree/main/profiling_pack
-version: 1.0.15
+version: 1.0.21
 visibility: public
diff --git a/profiling_pack/pyproject.toml b/profiling_pack/pyproject.toml
@@ -13,6 +13,7 @@ matplotlib = "3.7.0"
 lxml = "^4.9.3"
 pandas = "2.0.3"
 openpyxl = "^3.1.2"
+sqlalchemy = "^2.0.23"
 
 [build-system]
 requires = ["poetry-core"]