Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[accuracy] modeling changes for calculate_decomp_spend_dist #1175

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
53 changes: 31 additions & 22 deletions python/src/robyn/modeling/ridge/ridge_metrics_calculator.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,6 @@ def __init__(self, mmm_data, hyperparameters, ridge_data_builder):
self.ridge_data_builder = ridge_data_builder
self.logger = logging.getLogger(__name__)

# Updated _calculate_decomp_spend_dist method
def _calculate_decomp_spend_dist(
self, model: Ridge, X: pd.DataFrame, y: pd.Series, metrics: Dict[str, float]
) -> pd.DataFrame:
Expand All @@ -29,17 +28,17 @@ def _calculate_decomp_spend_dist(
all_effects = {}
all_spends = {}

# Calculate effects using absolute values for scaling
for col in paid_media_cols:
idx = list(X.columns).index(col)
coef = model.coef_[idx]
spend = np.abs(X[col].sum()) # Ensure positive spend
effect = coef * spend # Keep original sign for effect
# Use absolute values for effect calculation
effect = np.abs(coef * spend) # Changed to use absolute value
all_effects[col] = effect
all_spends[col] = spend

total_effect = np.sum(
np.abs([e for e in all_effects.values()])
) # Use absolute sum
total_effect = np.sum([e for e in all_effects.values()])

# Second pass to calculate normalized metrics
results = []
Expand All @@ -51,7 +50,9 @@ def _calculate_decomp_spend_dist(

# Handle non-zero values properly
non_zero_mask = X[col] != 0
non_zero_effect = X[col][non_zero_mask] * coef
non_zero_effect = np.abs(
X[col][non_zero_mask] * coef
) # Changed to use absolute value
non_zero_mean = float(
non_zero_effect.mean() if len(non_zero_effect) > 0 else 0
)
Expand All @@ -60,14 +61,12 @@ def _calculate_decomp_spend_dist(
spend_share = (
float(spend / total_media_spend) if total_media_spend > 0 else 0
)
effect_share = (
float(np.abs(effect) / total_effect) if total_effect > 0 else 0
)
effect_share = float(effect / total_effect) if total_effect > 0 else 0

result = {
"rn": str(col),
"coef": float(coef),
"xDecompAgg": float(effect),
"xDecompAgg": float(effect), # This is now positive
"total_spend": float(spend),
"mean_spend": float(np.abs(X[col].mean())),
"spend_share": spend_share,
Expand Down Expand Up @@ -111,14 +110,33 @@ def _calculate_decomp_spend_dist(
"iterNG": int(metrics.get("iterNG", 0)),
"iterPar": int(metrics.get("iterPar", 0)),
"Elapsed": float(metrics.get("elapsed", 0)),
"pos": bool(coef >= 0),
}
)

results.append(result)

df = pd.DataFrame(results)

# Ensure correct column order
# Ensure correct column types and order
df = df.astype(
{
"rn": "str",
"coef": "float64",
"xDecompAgg": "float64",
"total_spend": "float64",
"mean_spend": "float64",
"effect_share": "float64",
"spend_share": "float64",
"sol_id": "str",
"pos": "bool",
"mape": "int64",
"trial": "int64",
"iterNG": "int64",
"iterPar": "int64",
}
)

required_cols = [
"rn",
"coef",
Expand All @@ -127,10 +145,6 @@ def _calculate_decomp_spend_dist(
"mean_spend",
"spend_share",
"effect_share",
"xDecompPerc",
"xDecompMeanNon0",
"xDecompMeanNon0Perc",
"pos",
"sol_id",
"rsq_train",
"rsq_val",
Expand All @@ -146,14 +160,9 @@ def _calculate_decomp_spend_dist(
"iterNG",
"iterPar",
"Elapsed",
"pos",
]
self.logger.debug(f"Decomp spend distribution debug:")
self.logger.debug(f"Total media spend: {total_media_spend}")
self.logger.debug(f"Total effect: {total_effect}")
for col in paid_media_cols:
self.logger.debug(
f"{col} - effect: {all_effects[col]}, spend: {all_spends[col]}"
)

return df[required_cols]

def _calculate_lambda(
Expand Down
14 changes: 11 additions & 3 deletions python/src/robyn/tutorials/tutorial1.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -23,10 +23,18 @@
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 1,
"id": "eb8146e8",
"metadata": {},
"outputs": [],
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"2024-12-06 14:21:59,837 - robyn - INFO - Logging is set up to console only.\n"
]
}
],
"source": [
"import sys\n",
"\n",
Expand Down Expand Up @@ -230,7 +238,7 @@
"from robyn.modeling.entities.modelrun_trials_config import TrialsConfig\n",
"\n",
"\n",
"trials_config = TrialsConfig(iterations=54, trials=5)\n",
"trials_config = TrialsConfig(iterations=2000, trials=5)\n",
"\n",
"# Run the model\n",
"robyn.train_models(trials_config=trials_config,\n",
Expand Down
37 changes: 26 additions & 11 deletions python/src/robyn/tutorials/tutorial4_pareto.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,8 @@
"# Test Pareto Optimizer\n",
"import sys\n",
"\n",
"sys.path.append(\"/Users/yijuilee/robynpy_release_reviews/Robyn/python/src\")\n",
"\n",
"import pandas as pd\n",
"import json\n",
"from typing import Dict, Any, List\n",
Expand All @@ -17,9 +19,18 @@
"from robyn.data.entities.mmmdata import MMMData\n",
"from robyn.modeling.entities.modeloutputs import ModelOutputs, Trial\n",
"from robyn.modeling.pareto.pareto_optimizer import ParetoOptimizer\n",
"from robyn.data.entities.enums import DependentVarType, PaidMediaSigns, OrganicSigns, ContextSigns\n",
"from robyn.data.entities.enums import (\n",
" DependentVarType,\n",
" PaidMediaSigns,\n",
" OrganicSigns,\n",
" ContextSigns,\n",
")\n",
"\n",
"from robyn.tutorials.utils.data_mapper import import_output_models, import_input_collect, load_data_from_json"
"from robyn.tutorials.utils.data_mapper import (\n",
" import_output_models,\n",
" import_input_collect,\n",
" load_data_from_json,\n",
")"
]
},
{
Expand All @@ -30,10 +41,10 @@
"source": [
"# Load data from JSON\n",
"inputCollect = load_data_from_json(\n",
" \"/Users/funny/Documents/git/Robyn/python/src/robyn/tutorials/resources/Pareto_InputCollect.json\"\n",
" \"/Users/yijuilee/project_robyn/original/Robyn_original_2/Robyn/robyn_api/data/test_Pareto_2000_iterations_5_trials_InputCollect.json\"\n",
")\n",
"outputModel = load_data_from_json(\n",
" \"/Users/funny/Documents/git/Robyn/python/src/robyn/tutorials/resources/Pareto_OutputModels.json\"\n",
" \"/Users/yijuilee/project_robyn/original/Robyn_original_2/Robyn/robyn_api/data/test_Pareto_2000_iterations_5_trials_OutputModels.json\"\n",
")\n",
"input_collect = import_input_collect(inputCollect)\n",
"model_outputs = import_output_models(outputModel)\n",
Expand All @@ -42,7 +53,7 @@
},
{
"cell_type": "code",
"execution_count": 3,
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
Expand All @@ -63,12 +74,14 @@
},
{
"cell_type": "code",
"execution_count": 4,
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# 3. Create ParetoOptimizer instance\n",
"pareto_optimizer = ParetoOptimizer(mmm_data, model_outputs, hyperparameters, featurized_mmm_data, holidays_data)"
"pareto_optimizer = ParetoOptimizer(\n",
" mmm_data, model_outputs, hyperparameters, featurized_mmm_data, holidays_data\n",
")"
]
},
{
Expand All @@ -90,7 +103,9 @@
"# 5. Check results\n",
"print(\"Pareto Optimization Results:\")\n",
"print(f\"Number of Pareto fronts: {pareto_result.pareto_solutions}\")\n",
"print(f\"MediaVecCollect: {pareto_result.media_vec_collect.shape, pareto_result.media_vec_collect}\")\n",
"print(\n",
" f\"MediaVecCollect: {pareto_result.media_vec_collect.shape, pareto_result.media_vec_collect}\"\n",
")\n",
"print(\"\\Hyper parameter solutions:\")\n",
"print(pareto_result.result_hyp_param)\n",
"\n",
Expand Down Expand Up @@ -122,7 +137,7 @@
"metadata": {},
"outputs": [],
"source": [
"print(pareto_result.x_decomp_agg[pareto_result.x_decomp_agg[\"sol_id\"] == '5_221_9'])"
"print(pareto_result.x_decomp_agg[pareto_result.x_decomp_agg[\"sol_id\"] == \"5_221_9\"])"
]
},
{
Expand All @@ -134,7 +149,7 @@
},
{
"cell_type": "code",
"execution_count": 8,
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
Expand Down Expand Up @@ -167,7 +182,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.11.5"
"version": "3.10.15"
}
},
"nbformat": 4,
Expand Down
Loading