facebookexperimental · alxlyj · Dec 6, 2024 · Dec 5, 2024 · Dec 6, 2024 · Dec 6, 2024
diff --git a/python/src/robyn/modeling/ridge/ridge_metrics_calculator.py b/python/src/robyn/modeling/ridge/ridge_metrics_calculator.py
@@ -13,7 +13,6 @@ def __init__(self, mmm_data, hyperparameters, ridge_data_builder):
         self.ridge_data_builder = ridge_data_builder
         self.logger = logging.getLogger(__name__)
 
-    # Updated _calculate_decomp_spend_dist method
     def _calculate_decomp_spend_dist(
         self, model: Ridge, X: pd.DataFrame, y: pd.Series, metrics: Dict[str, float]
     ) -> pd.DataFrame:
@@ -29,17 +28,17 @@ def _calculate_decomp_spend_dist(
         all_effects = {}
         all_spends = {}
 
+        # Calculate effects using absolute values for scaling
         for col in paid_media_cols:
             idx = list(X.columns).index(col)
             coef = model.coef_[idx]
             spend = np.abs(X[col].sum())  # Ensure positive spend
-            effect = coef * spend  # Keep original sign for effect
+            # Use absolute values for effect calculation
+            effect = np.abs(coef * spend)  # Changed to use absolute value
             all_effects[col] = effect
             all_spends[col] = spend
 
-        total_effect = np.sum(
-            np.abs([e for e in all_effects.values()])
-        )  # Use absolute sum
+        total_effect = np.sum([e for e in all_effects.values()])
 
         # Second pass to calculate normalized metrics
         results = []
@@ -51,7 +50,9 @@ def _calculate_decomp_spend_dist(
 
             # Handle non-zero values properly
             non_zero_mask = X[col] != 0
-            non_zero_effect = X[col][non_zero_mask] * coef
+            non_zero_effect = np.abs(
+                X[col][non_zero_mask] * coef
+            )  # Changed to use absolute value
             non_zero_mean = float(
                 non_zero_effect.mean() if len(non_zero_effect) > 0 else 0
             )
@@ -60,14 +61,12 @@ def _calculate_decomp_spend_dist(
             spend_share = (
                 float(spend / total_media_spend) if total_media_spend > 0 else 0
             )
-            effect_share = (
-                float(np.abs(effect) / total_effect) if total_effect > 0 else 0
-            )
+            effect_share = float(effect / total_effect) if total_effect > 0 else 0
 
             result = {
                 "rn": str(col),
                 "coef": float(coef),
-                "xDecompAgg": float(effect),
+                "xDecompAgg": float(effect),  # This is now positive
                 "total_spend": float(spend),
                 "mean_spend": float(np.abs(X[col].mean())),
                 "spend_share": spend_share,
@@ -111,14 +110,33 @@ def _calculate_decomp_spend_dist(
                     "iterNG": int(metrics.get("iterNG", 0)),
                     "iterPar": int(metrics.get("iterPar", 0)),
                     "Elapsed": float(metrics.get("elapsed", 0)),
+                    "pos": bool(coef >= 0),
                 }
             )
 
             results.append(result)
 
         df = pd.DataFrame(results)
 
-        # Ensure correct column order
+        # Ensure correct column types and order
+        df = df.astype(
+            {
+                "rn": "str",
+                "coef": "float64",
+                "xDecompAgg": "float64",
+                "total_spend": "float64",
+                "mean_spend": "float64",
+                "effect_share": "float64",
+                "spend_share": "float64",
+                "sol_id": "str",
+                "pos": "bool",
+                "mape": "int64",
+                "trial": "int64",
+                "iterNG": "int64",
+                "iterPar": "int64",
+            }
+        )
+
         required_cols = [
             "rn",
             "coef",
@@ -127,10 +145,6 @@ def _calculate_decomp_spend_dist(
             "mean_spend",
             "spend_share",
             "effect_share",
-            "xDecompPerc",
-            "xDecompMeanNon0",
-            "xDecompMeanNon0Perc",
-            "pos",
             "sol_id",
             "rsq_train",
             "rsq_val",
@@ -146,14 +160,9 @@ def _calculate_decomp_spend_dist(
             "iterNG",
             "iterPar",
             "Elapsed",
+            "pos",
         ]
-        self.logger.debug(f"Decomp spend distribution debug:")
-        self.logger.debug(f"Total media spend: {total_media_spend}")
-        self.logger.debug(f"Total effect: {total_effect}")
-        for col in paid_media_cols:
-            self.logger.debug(
-                f"{col} - effect: {all_effects[col]}, spend: {all_spends[col]}"
-            )
+
         return df[required_cols]
 
     def _calculate_lambda(

diff --git a/python/src/robyn/tutorials/tutorial1.ipynb b/python/src/robyn/tutorials/tutorial1.ipynb
@@ -23,10 +23,18 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 1,
    "id": "eb8146e8",
    "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "2024-12-06 14:21:59,837 - robyn - INFO - Logging is set up to console only.\n"
+     ]
+    }
+   ],
    "source": [
     "import sys\n",
     "\n",
@@ -230,7 +238,7 @@
     "from robyn.modeling.entities.modelrun_trials_config import TrialsConfig\n",
     "\n",
     "\n",
-    "trials_config = TrialsConfig(iterations=54, trials=5)\n",
+    "trials_config = TrialsConfig(iterations=2000, trials=5)\n",
     "\n",
     "# Run the model\n",
     "robyn.train_models(trials_config=trials_config,\n",

diff --git a/python/src/robyn/tutorials/tutorial4_pareto.ipynb b/python/src/robyn/tutorials/tutorial4_pareto.ipynb
@@ -9,6 +9,8 @@
     "# Test Pareto Optimizer\n",
     "import sys\n",
     "\n",
+    "sys.path.append(\"/Users/yijuilee/robynpy_release_reviews/Robyn/python/src\")\n",
+    "\n",
     "import pandas as pd\n",
     "import json\n",
     "from typing import Dict, Any, List\n",
@@ -17,9 +19,18 @@
     "from robyn.data.entities.mmmdata import MMMData\n",
     "from robyn.modeling.entities.modeloutputs import ModelOutputs, Trial\n",
     "from robyn.modeling.pareto.pareto_optimizer import ParetoOptimizer\n",
-    "from robyn.data.entities.enums import DependentVarType, PaidMediaSigns, OrganicSigns, ContextSigns\n",
+    "from robyn.data.entities.enums import (\n",
+    "    DependentVarType,\n",
+    "    PaidMediaSigns,\n",
+    "    OrganicSigns,\n",
+    "    ContextSigns,\n",
+    ")\n",
     "\n",
-    "from robyn.tutorials.utils.data_mapper import import_output_models, import_input_collect, load_data_from_json"
+    "from robyn.tutorials.utils.data_mapper import (\n",
+    "    import_output_models,\n",
+    "    import_input_collect,\n",
+    "    load_data_from_json,\n",
+    ")"
    ]
   },
   {
@@ -30,10 +41,10 @@
    "source": [
     "# Load data from JSON\n",
     "inputCollect = load_data_from_json(\n",
-    "    \"/Users/funny/Documents/git/Robyn/python/src/robyn/tutorials/resources/Pareto_InputCollect.json\"\n",
+    "    \"/Users/yijuilee/project_robyn/original/Robyn_original_2/Robyn/robyn_api/data/test_Pareto_2000_iterations_5_trials_InputCollect.json\"\n",
     ")\n",
     "outputModel = load_data_from_json(\n",
-    "    \"/Users/funny/Documents/git/Robyn/python/src/robyn/tutorials/resources/Pareto_OutputModels.json\"\n",
+    "    \"/Users/yijuilee/project_robyn/original/Robyn_original_2/Robyn/robyn_api/data/test_Pareto_2000_iterations_5_trials_OutputModels.json\"\n",
     ")\n",
     "input_collect = import_input_collect(inputCollect)\n",
     "model_outputs = import_output_models(outputModel)\n",
@@ -42,7 +53,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 3,
+   "execution_count": null,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -63,12 +74,14 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 4,
+   "execution_count": null,
    "metadata": {},
    "outputs": [],
    "source": [
     "# 3. Create ParetoOptimizer instance\n",
-    "pareto_optimizer = ParetoOptimizer(mmm_data, model_outputs, hyperparameters, featurized_mmm_data, holidays_data)"
+    "pareto_optimizer = ParetoOptimizer(\n",
+    "    mmm_data, model_outputs, hyperparameters, featurized_mmm_data, holidays_data\n",
+    ")"
    ]
   },
   {
@@ -90,7 +103,9 @@
     "# 5. Check results\n",
     "print(\"Pareto Optimization Results:\")\n",
     "print(f\"Number of Pareto fronts: {pareto_result.pareto_solutions}\")\n",
-    "print(f\"MediaVecCollect: {pareto_result.media_vec_collect.shape, pareto_result.media_vec_collect}\")\n",
+    "print(\n",
+    "    f\"MediaVecCollect: {pareto_result.media_vec_collect.shape, pareto_result.media_vec_collect}\"\n",
+    ")\n",
     "print(\"\\Hyper parameter solutions:\")\n",
     "print(pareto_result.result_hyp_param)\n",
     "\n",
@@ -122,7 +137,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "print(pareto_result.x_decomp_agg[pareto_result.x_decomp_agg[\"sol_id\"] == '5_221_9'])"
+    "print(pareto_result.x_decomp_agg[pareto_result.x_decomp_agg[\"sol_id\"] == \"5_221_9\"])"
    ]
   },
   {
@@ -134,7 +149,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 8,
+   "execution_count": null,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -167,7 +182,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.11.5"
+   "version": "3.10.15"
   }
  },
  "nbformat": 4,