From b6c4d63c6a8baa8b4a4f9e16f63a4aa88e8d5ded Mon Sep 17 00:00:00 2001
From: Sandeep Mane <sandeepongtalk@gmail.com>
Date: Fri, 6 Dec 2024 09:56:45 -0800
Subject: [PATCH] Moving tutorials into tests and organizing tests

---
 .../src/robyn/tutorials/utils/compare_data.py | 260 ------------------
 .../src/robyn/tutorials/utils/plot_utils.py   |  51 ----
 .../e2e_test/tutorial3_modeling_compare.ipynb |   0
 .../e2e_test/tutorial4_pareto_e2e_test.ipynb  |   0
 ...rial4_pareto_e2e_test_with_allocator.ipynb |   0
 .../r_tutorial3_modeling_compare.ipynb        |   0
 .../tutorial2_feature_engineering.ipynb       |   0
 .../tutorial3_modeling.ipynb                  |   0
 .../tutorial4_pareto.ipynb                    |   0
 .../tutorial5_calibration.ipynb               |   0
 .../tutorial6_allocator.ipynb                 |   0
 .../tutorial7_clustering.ipynb                |   0
 .../tutorial_data_mapper.ipynb                |   0
 .../component_tutorials}/utils/data_mapper.py |   0
 .../modeling/pareto/test_hill_calculator.py   |   0
 .../modeling/pareto/test_response_curve.py    |   0
 .../modeling/test_feature_engineering.py      |   0
 .../modeling_pareto/test_pareto_utils.py      |   0
 .../test_calibration_input_validation.py      |   0
 .../test_holidays_data_validation.py          |   0
 .../test_hyperparameter_validation.py         |   0
 .../{ => unit}/test_mmmdata_validation.py     |   0
 python/tests/{ => unit}/test_model_builder.py |   0
 23 files changed, 311 deletions(-)
 delete mode 100644 python/src/robyn/tutorials/utils/compare_data.py
 delete mode 100644 python/src/robyn/tutorials/utils/plot_utils.py
 rename python/{src/robyn/tutorials => tests/component_tutorials}/e2e_test/tutorial3_modeling_compare.ipynb (100%)
 rename python/{src/robyn/tutorials => tests/component_tutorials}/e2e_test/tutorial4_pareto_e2e_test.ipynb (100%)
 rename python/{src/robyn/tutorials => tests/component_tutorials}/e2e_test/tutorial4_pareto_e2e_test_with_allocator.ipynb (100%)
 rename python/{src/robyn/tutorials => tests/component_tutorials}/r_exported_data/r_tutorial3_modeling_compare.ipynb (100%)
 rename python/{src/robyn/tutorials => tests/component_tutorials}/tutorial2_feature_engineering.ipynb (100%)
 rename python/{src/robyn/tutorials => tests/component_tutorials}/tutorial3_modeling.ipynb (100%)
 rename python/{src/robyn/tutorials => tests/component_tutorials}/tutorial4_pareto.ipynb (100%)
 rename python/{src/robyn/tutorials => tests/component_tutorials}/tutorial5_calibration.ipynb (100%)
 rename python/{src/robyn/tutorials => tests/component_tutorials}/tutorial6_allocator.ipynb (100%)
 rename python/{src/robyn/tutorials => tests/component_tutorials}/tutorial7_clustering.ipynb (100%)
 rename python/{src/robyn/tutorials => tests/component_tutorials}/tutorial_data_mapper.ipynb (100%)
 rename python/{src/robyn/tutorials => tests/component_tutorials}/utils/data_mapper.py (100%)
 rename python/tests/{ => unit}/modeling/pareto/test_hill_calculator.py (100%)
 rename python/tests/{ => unit}/modeling/pareto/test_response_curve.py (100%)
 rename python/tests/{ => unit}/modeling/test_feature_engineering.py (100%)
 rename python/tests/{ => unit}/modeling_pareto/test_pareto_utils.py (100%)
 rename python/tests/{ => unit}/test_calibration_input_validation.py (100%)
 rename python/tests/{ => unit}/test_holidays_data_validation.py (100%)
 rename python/tests/{ => unit}/test_hyperparameter_validation.py (100%)
 rename python/tests/{ => unit}/test_mmmdata_validation.py (100%)
 rename python/tests/{ => unit}/test_model_builder.py (100%)

diff --git a/python/src/robyn/tutorials/utils/compare_data.py b/python/src/robyn/tutorials/utils/compare_data.py
deleted file mode 100644
index 897bbb9e6..000000000
--- a/python/src/robyn/tutorials/utils/compare_data.py
+++ /dev/null
@@ -1,260 +0,0 @@
-import pandas as pd
-import numpy as np
-from tabulate import tabulate
-from utils.data_mapper import load_data_from_json, import_data
-
-
-def compare_featurized_mmm_data(r_python_data_path, python_only_data):
-    loaded_data = load_data_from_json(r_python_data_path)
-    imported_data = import_data(loaded_data)
-    r_python_featurized_mmm_data = imported_data["featurized_mmm_data"]
-
-    dt_mod_diff = compare_dataframes(
-        r_python_featurized_mmm_data.dt_mod, python_only_data.dt_mod, "dt_mod"
-    )
-    dt_modRollWind_diff = compare_dataframes(
-        r_python_featurized_mmm_data.dt_modRollWind,
-        python_only_data.dt_modRollWind,
-        "dt_modRollWind",
-    )
-    # modNLS_diff = compare_modNLS(r_python_featurized_mmm_data.modNLS, python_only_data.modNLS, "modNLS")
-    modNLS_diff = None
-    return dt_mod_diff, dt_modRollWind_diff, modNLS_diff
-
-
-def compare_dataframes(df1, df2, name):
-    result = [f"\n{name} DataFrame Comparison:"]
-
-    if df1.empty and df2.empty:
-        result.append(f"{name} DataFrames are both empty.")
-        return "\n".join(result)
-
-    shape_table = [
-        ["", "R/Python", "New Python"],
-        ["Rows", df1.shape[0], df2.shape[0]],
-        ["Columns", df1.shape[1], df2.shape[1]],
-    ]
-    result.append("Shape Comparison:")
-    result.append(tabulate(shape_table, headers="firstrow", tablefmt="grid"))
-
-    cols1 = set(df1.columns)
-    cols2 = set(df2.columns)
-    common_cols = cols1.intersection(cols2)
-    only_in_df1 = cols1 - cols2
-    only_in_df2 = cols2 - cols1
-
-    result.append("\nColumn Comparison:")
-    col_table = [
-        ["Common Columns", "Only in R/Python", "Only in New Python"],
-        [
-            ", ".join(sorted(common_cols)),
-            ", ".join(sorted(only_in_df1)),
-            ", ".join(sorted(only_in_df2)),
-        ],
-    ]
-    result.append(tabulate(col_table, headers="firstrow", tablefmt="grid"))
-
-    dtype_table = [["Column", "R/Python Type", "New Python Type"]]
-    for col in common_cols:
-        if df1[col].dtype != df2[col].dtype:
-            dtype_table.append([col, df1[col].dtype, df2[col].dtype])
-
-    if len(dtype_table) > 1:
-        result.append("\nData Type Differences:")
-        result.append(tabulate(dtype_table, headers="firstrow", tablefmt="grid"))
-    else:
-        result.append("\nAll data types are identical.")
-
-    stats_diff = []
-    for col in common_cols:
-        if df1[col].dtype != df2[col].dtype:
-            stats_diff.append([col, "Skipped (different data types)", ""])
-        else:
-            stats1 = calculate_summary_stats(df1[col])
-            stats2 = calculate_summary_stats(df2[col])
-            diff = compare_summary_stats(stats1, stats2)
-            if diff:
-                for stat, values in diff.items():
-                    stats_diff.append(
-                        [f"{col} ({stat})", values["R/Python"], values["New Python"]]
-                    )
-
-    if stats_diff:
-        result.append("\nSummary Statistics Differences:")
-        result.append(
-            tabulate(
-                stats_diff,
-                headers=["Column (Statistic)", "R/Python", "New Python"],
-                tablefmt="grid",
-            )
-        )
-    else:
-        result.append("\nAll summary statistics are identical within tolerance.")
-
-    return "\n".join(result)
-
-
-def calculate_summary_stats(series):
-    if pd.api.types.is_numeric_dtype(series):
-        return {
-            "min": series.min(),
-            "max": series.max(),
-            "mean": series.mean(),
-            "median": series.median(),
-            "std": series.std(),
-        }
-    elif pd.api.types.is_datetime64_any_dtype(series):
-        return {
-            "min": series.min(),
-            "max": series.max(),
-            "mean": series.mean(),
-            "median": series.median(),
-        }
-    else:
-        return {
-            "unique_count": series.nunique(),
-            "most_common": series.value_counts().index[0] if not series.empty else None,
-        }
-
-
-def compare_summary_stats(stats1, stats2, tolerance=1e-5):
-    diff = {}
-    for key in stats1.keys():
-        if key in ["min", "max", "mean", "median"]:
-            if isinstance(stats1[key], (pd.Timestamp, np.datetime64)):
-                if stats1[key] != stats2[key]:
-                    diff[key] = {"R/Python": stats1[key], "New Python": stats2[key]}
-            elif not np.isclose(
-                stats1[key], stats2[key], rtol=tolerance, atol=tolerance, equal_nan=True
-            ):
-                diff[key] = {"R/Python": stats1[key], "New Python": stats2[key]}
-        elif key == "std":
-            if not np.isclose(
-                stats1[key], stats2[key], rtol=tolerance, atol=tolerance, equal_nan=True
-            ):
-                diff[key] = {"R/Python": stats1[key], "New Python": stats2[key]}
-        else:  # unique_count, most_common
-            if stats1[key] != stats2[key]:
-                diff[key] = {"R/Python": stats1[key], "New Python": stats2[key]}
-    return diff
-
-
-def compare_modNLS(dict1, dict2, name):
-    result = [f"\n{name} Dictionary Comparison:"]
-
-    keys1 = set(dict1.keys())
-    keys2 = set(dict2.keys())
-    common_keys = keys1.intersection(keys2)
-    only_in_dict1 = keys1 - keys2
-    only_in_dict2 = keys2 - keys1
-
-    result.append("Key Comparison:")
-    key_table = [
-        ["Common Keys", "Only in R/Python", "Only in New Python"],
-        [
-            ", ".join(sorted(common_keys)),
-            ", ".join(sorted(only_in_dict1)),
-            ", ".join(sorted(only_in_dict2)),
-        ],
-    ]
-    result.append(tabulate(key_table, headers="firstrow", tablefmt="grid"))
-
-    result.append("\nCommon Keys Analysis:")
-    for key in common_keys:
-        result.append(f"\nKey: {key}")
-        if isinstance(dict1[key], dict) and isinstance(dict2[key], dict):
-            sub_diff = compare_nested_dict(dict1[key], dict2[key], f"{name}.{key}")
-            if sub_diff:
-                result.append("Differences in nested dictionary:")
-                result.append(
-                    tabulate(
-                        sub_diff,
-                        headers=["Subkey", "R/Python", "New Python"],
-                        tablefmt="grid",
-                    )
-                )
-            else:
-                result.append("Nested dictionaries are identical within tolerance.")
-        elif isinstance(dict1[key], pd.DataFrame) and isinstance(
-            dict2[key], pd.DataFrame
-        ):
-            df_diff = compare_dataframes(dict1[key], dict2[key], f"{name}.{key}")
-            result.append(df_diff)
-        elif isinstance(dict1[key], (int, float, str, bool)):
-            if np.isclose(dict1[key], dict2[key], equal_nan=True, rtol=1e-5, atol=1e-8):
-                result.append(f"Values are identical within tolerance: {dict1[key]}")
-            else:
-                result.append(
-                    f"Values differ: R/Python = {dict1[key]}, New Python = {dict2[key]}"
-                )
-        elif isinstance(dict1[key], np.ndarray) and isinstance(dict2[key], np.ndarray):
-            if np.allclose(
-                dict1[key], dict2[key], equal_nan=True, rtol=1e-5, atol=1e-8
-            ):
-                result.append("Arrays are identical within tolerance.")
-                result.append(f"Shape: {dict1[key].shape}")
-                result.append("Summary statistics:")
-                stats = calculate_array_stats(dict1[key])
-                result.append(
-                    tabulate(
-                        [["Statistic", "Value"]] + [[k, v] for k, v in stats.items()],
-                        headers="firstrow",
-                        tablefmt="grid",
-                    )
-                )
-            else:
-                result.append("Arrays are different.")
-                result.append("Summary statistics for R/Python array:")
-                stats1 = calculate_array_stats(dict1[key])
-                result.append(
-                    tabulate(
-                        [["Statistic", "Value"]] + [[k, v] for k, v in stats1.items()],
-                        headers="firstrow",
-                        tablefmt="grid",
-                    )
-                )
-                result.append("Summary statistics for New Python array:")
-                stats2 = calculate_array_stats(dict2[key])
-                result.append(
-                    tabulate(
-                        [["Statistic", "Value"]] + [[k, v] for k, v in stats2.items()],
-                        headers="firstrow",
-                        tablefmt="grid",
-                    )
-                )
-        else:
-            result.append(f"Unable to compare {type(dict1[key])} objects")
-
-    return "\n".join(result)
-
-
-def calculate_array_stats(arr):
-    return {
-        "min": np.min(arr),
-        "max": np.max(arr),
-        "mean": np.mean(arr),
-        "median": np.median(arr),
-        "std": np.std(arr),
-    }
-
-
-def compare_nested_dict(dict1, dict2, name):
-    differences = []
-    for key in set(dict1.keys()) | set(dict2.keys()):
-        if key not in dict1:
-            differences.append([f"{key}", "Missing", dict2[key]])
-        elif key not in dict2:
-            differences.append([f"{key}", dict1[key], "Missing"])
-        elif isinstance(dict1[key], dict) and isinstance(dict2[key], dict):
-            sub_diff = compare_nested_dict(dict1[key], dict2[key], f"{name}.{key}")
-            differences.extend(sub_diff)
-        elif isinstance(dict1[key], (int, float)) and isinstance(
-            dict2[key], (int, float)
-        ):
-            if not np.isclose(
-                dict1[key], dict2[key], equal_nan=True, rtol=1e-5, atol=1e-8
-            ):
-                differences.append([f"{key}", dict1[key], dict2[key]])
-        elif dict1[key] != dict2[key]:
-            differences.append([f"{key}", dict1[key], dict2[key]])
-    return differences
diff --git a/python/src/robyn/tutorials/utils/plot_utils.py b/python/src/robyn/tutorials/utils/plot_utils.py
deleted file mode 100644
index 72ef93fdf..000000000
--- a/python/src/robyn/tutorials/utils/plot_utils.py
+++ /dev/null
@@ -1,51 +0,0 @@
-import io
-import binascii
-from PIL import Image
-from IPython.display import display
-import warnings
-
-
-def plot_outputgraphs(model_outputs, graph_type, max_size=(1000, 1500)):
-    """
-    Plots the output graphs for the given model outputs.
-    Args:
-        model_outputs: ModelOutputs object containing the output data for the graphs.
-        graph_type: The type of graph to plot.
-        max_size: Optional. The maximum size of the rendered images. Defaults to (1000, 1500).
-    Returns:
-        None. The function renders the plots and displays them using the `display()` function from IPython.
-    """
-    convergence = model_outputs.convergence
-
-    if graph_type in ["moo_distrb_plot", "moo_cloud_plot"]:
-        image_data = binascii.unhexlify("".join(getattr(convergence, graph_type)))
-        image = Image.open(io.BytesIO(image_data))
-        image.thumbnail(max_size, Image.Resampling.LANCZOS)
-        display(image)
-    elif graph_type == "ts_validation_plot":
-        if hasattr(model_outputs, "ts_validation_plot"):
-            image_data = binascii.unhexlify("".join(model_outputs.ts_validation_plot))
-            image = Image.open(io.BytesIO(image_data))
-            image.thumbnail(max_size, Image.Resampling.LANCZOS)
-            display(image)
-        else:
-            warnings.warn("ts_validation_plot not available in model outputs")
-    else:
-        warnings.warn(f"Graph type '{graph_type}' is not supported")
-
-
-# Example usage
-if __name__ == "__main__":
-    from data_mapper import load_data_from_json, import_data
-
-    # Load the data
-    loaded_data = load_data_from_json("path/to/your/exported_data.json")
-    imported_data = import_data(loaded_data)
-
-    # Get the model_outputs
-    model_outputs = imported_data["model_outputs"]
-
-    # Plot the graphs
-    plot_outputgraphs(model_outputs, "moo_distrb_plot")
-    plot_outputgraphs(model_outputs, "moo_cloud_plot")
-    plot_outputgraphs(model_outputs, "ts_validation_plot")
diff --git a/python/src/robyn/tutorials/e2e_test/tutorial3_modeling_compare.ipynb b/python/tests/component_tutorials/e2e_test/tutorial3_modeling_compare.ipynb
similarity index 100%
rename from python/src/robyn/tutorials/e2e_test/tutorial3_modeling_compare.ipynb
rename to python/tests/component_tutorials/e2e_test/tutorial3_modeling_compare.ipynb
diff --git a/python/src/robyn/tutorials/e2e_test/tutorial4_pareto_e2e_test.ipynb b/python/tests/component_tutorials/e2e_test/tutorial4_pareto_e2e_test.ipynb
similarity index 100%
rename from python/src/robyn/tutorials/e2e_test/tutorial4_pareto_e2e_test.ipynb
rename to python/tests/component_tutorials/e2e_test/tutorial4_pareto_e2e_test.ipynb
diff --git a/python/src/robyn/tutorials/e2e_test/tutorial4_pareto_e2e_test_with_allocator.ipynb b/python/tests/component_tutorials/e2e_test/tutorial4_pareto_e2e_test_with_allocator.ipynb
similarity index 100%
rename from python/src/robyn/tutorials/e2e_test/tutorial4_pareto_e2e_test_with_allocator.ipynb
rename to python/tests/component_tutorials/e2e_test/tutorial4_pareto_e2e_test_with_allocator.ipynb
diff --git a/python/src/robyn/tutorials/r_exported_data/r_tutorial3_modeling_compare.ipynb b/python/tests/component_tutorials/r_exported_data/r_tutorial3_modeling_compare.ipynb
similarity index 100%
rename from python/src/robyn/tutorials/r_exported_data/r_tutorial3_modeling_compare.ipynb
rename to python/tests/component_tutorials/r_exported_data/r_tutorial3_modeling_compare.ipynb
diff --git a/python/src/robyn/tutorials/tutorial2_feature_engineering.ipynb b/python/tests/component_tutorials/tutorial2_feature_engineering.ipynb
similarity index 100%
rename from python/src/robyn/tutorials/tutorial2_feature_engineering.ipynb
rename to python/tests/component_tutorials/tutorial2_feature_engineering.ipynb
diff --git a/python/src/robyn/tutorials/tutorial3_modeling.ipynb b/python/tests/component_tutorials/tutorial3_modeling.ipynb
similarity index 100%
rename from python/src/robyn/tutorials/tutorial3_modeling.ipynb
rename to python/tests/component_tutorials/tutorial3_modeling.ipynb
diff --git a/python/src/robyn/tutorials/tutorial4_pareto.ipynb b/python/tests/component_tutorials/tutorial4_pareto.ipynb
similarity index 100%
rename from python/src/robyn/tutorials/tutorial4_pareto.ipynb
rename to python/tests/component_tutorials/tutorial4_pareto.ipynb
diff --git a/python/src/robyn/tutorials/tutorial5_calibration.ipynb b/python/tests/component_tutorials/tutorial5_calibration.ipynb
similarity index 100%
rename from python/src/robyn/tutorials/tutorial5_calibration.ipynb
rename to python/tests/component_tutorials/tutorial5_calibration.ipynb
diff --git a/python/src/robyn/tutorials/tutorial6_allocator.ipynb b/python/tests/component_tutorials/tutorial6_allocator.ipynb
similarity index 100%
rename from python/src/robyn/tutorials/tutorial6_allocator.ipynb
rename to python/tests/component_tutorials/tutorial6_allocator.ipynb
diff --git a/python/src/robyn/tutorials/tutorial7_clustering.ipynb b/python/tests/component_tutorials/tutorial7_clustering.ipynb
similarity index 100%
rename from python/src/robyn/tutorials/tutorial7_clustering.ipynb
rename to python/tests/component_tutorials/tutorial7_clustering.ipynb
diff --git a/python/src/robyn/tutorials/tutorial_data_mapper.ipynb b/python/tests/component_tutorials/tutorial_data_mapper.ipynb
similarity index 100%
rename from python/src/robyn/tutorials/tutorial_data_mapper.ipynb
rename to python/tests/component_tutorials/tutorial_data_mapper.ipynb
diff --git a/python/src/robyn/tutorials/utils/data_mapper.py b/python/tests/component_tutorials/utils/data_mapper.py
similarity index 100%
rename from python/src/robyn/tutorials/utils/data_mapper.py
rename to python/tests/component_tutorials/utils/data_mapper.py
diff --git a/python/tests/modeling/pareto/test_hill_calculator.py b/python/tests/unit/modeling/pareto/test_hill_calculator.py
similarity index 100%
rename from python/tests/modeling/pareto/test_hill_calculator.py
rename to python/tests/unit/modeling/pareto/test_hill_calculator.py
diff --git a/python/tests/modeling/pareto/test_response_curve.py b/python/tests/unit/modeling/pareto/test_response_curve.py
similarity index 100%
rename from python/tests/modeling/pareto/test_response_curve.py
rename to python/tests/unit/modeling/pareto/test_response_curve.py
diff --git a/python/tests/modeling/test_feature_engineering.py b/python/tests/unit/modeling/test_feature_engineering.py
similarity index 100%
rename from python/tests/modeling/test_feature_engineering.py
rename to python/tests/unit/modeling/test_feature_engineering.py
diff --git a/python/tests/modeling_pareto/test_pareto_utils.py b/python/tests/unit/modeling_pareto/test_pareto_utils.py
similarity index 100%
rename from python/tests/modeling_pareto/test_pareto_utils.py
rename to python/tests/unit/modeling_pareto/test_pareto_utils.py
diff --git a/python/tests/test_calibration_input_validation.py b/python/tests/unit/test_calibration_input_validation.py
similarity index 100%
rename from python/tests/test_calibration_input_validation.py
rename to python/tests/unit/test_calibration_input_validation.py
diff --git a/python/tests/test_holidays_data_validation.py b/python/tests/unit/test_holidays_data_validation.py
similarity index 100%
rename from python/tests/test_holidays_data_validation.py
rename to python/tests/unit/test_holidays_data_validation.py
diff --git a/python/tests/test_hyperparameter_validation.py b/python/tests/unit/test_hyperparameter_validation.py
similarity index 100%
rename from python/tests/test_hyperparameter_validation.py
rename to python/tests/unit/test_hyperparameter_validation.py
diff --git a/python/tests/test_mmmdata_validation.py b/python/tests/unit/test_mmmdata_validation.py
similarity index 100%
rename from python/tests/test_mmmdata_validation.py
rename to python/tests/unit/test_mmmdata_validation.py
diff --git a/python/tests/test_model_builder.py b/python/tests/unit/test_model_builder.py
similarity index 100%
rename from python/tests/test_model_builder.py
rename to python/tests/unit/test_model_builder.py