-
Notifications
You must be signed in to change notification settings - Fork 3
/
Copy pathbandit_sim.py
107 lines (94 loc) · 4.19 KB
/
bandit_sim.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
import pandas as pd
import statsmodels.api as sm
import numpy as np
import random as rd
import sys
# Static parameters of the simulation
CONTEXTUAL_BANDIT = True # set to False to disable smartness.
VISITORS = 2000 # number of visitors to simulate.
CHUNKS = 50 # number of samples to collect before printing intermediate results.
WARMUP = 100 # number of samples to collect before training.
J = 100 # number of bootstrap samples to use.
MAX_SAMPLE = 2000 # max number of samples per bootstrap.
SIMULATIONS = 100 # number of simulations to run sequentially.
print "simulation, visitors, current conversion rate, overall conversion rate, total conversions, pulls per arm";
for sim in xrange(0, SIMULATIONS):
# counters for simulation
conversions = 0
sub_conversions = 0
# with these settings, non-contextual best should be ~0.4 vs contextual ~0.5
arms = [
{
"base_rate": 0.3,
"modifiers": {
"var-A_1": 2,
"var-B_1": 1./2,
},
"models": []
},
{
"base_rate": 0.4,
"modifiers": { },
"models": []
},
]
# init models and pulls
for a in xrange(0, len(arms)):
arms[a]["pulls"] = 0
for j in xrange(0,J):
arms[a]["models"].append({"model": None, "df": pd.DataFrame(), "update": True})
for r in xrange(1,VISITORS+1):
# generate a visitor
l = rd.choice(["A","B"])
v = {
"var-A": int(l == "A"),
"var-B": int(l == "B"),
}
# predict conversion each arm.
scores = []
for a in xrange(0,len(arms)):
j = rd.randint(0,J-1)
if (len(arms[a]["models"][j]["df"]) < WARMUP):
scores.append(np.random.ranf())
else:
if CONTEXTUAL_BANDIT:
if (arms[a]["models"][j]["update"] == True):
df = arms[a]["models"][j]["df"]
arms[a]["models"][j]["model"] = sm.Logit(df["c"], df[df.columns[1:]]).fit(disp=0)
arms[a]["models"][j]["update"] = False
scores.append(arms[a]["models"][j]["model"].predict(pd.DataFrame([v]))[0])
else:
scores.append((sum(arms[a]["models"][j]["df"]["c"])*1.)/len(arms[a]["models"][j]["df"]))
# pick best arm
arm = np.argmax(scores) # argmax(index @scores)
# calculate conversion odds
odds = arms[arm]["base_rate"]
for m in arms[arm]["modifiers"].keys():
if int(v[m.split("_")[0]]) == int(m.split("_")[1]):
odds *= arms[arm]["modifiers"][m]
# pull arm and check for conversion
arms[arm]["pulls"] += 1
v["c"] = np.random.ranf() < odds
conversions += v["c"]
# update bootstrap samples for all pools
for j in xrange(0,J):
if (rd.randint(0,1) == 1): # probabilistically sample 50%
if arms[arm]["models"][j]["df"] is None:
arms[arm]["models"][j]["df"] = pd.DataFrame([v])
else:
if (len(arms[arm]["models"][j]["df"]) == MAX_SAMPLE): # limit sample size
i = rd.randint(0,MAX_SAMPLE-1) # replace random old sample
arms[arm]["models"][j]["df"][i:i+1] = pd.DataFrame([v])
else:
arms[arm]["models"][j]["df"] = arms[arm]["models"][j]["df"].append(pd.DataFrame([v]))
# set update models if needed
if CONTEXTUAL_BANDIT:
if (len(arms[arm]["models"][j]["df"]) >= WARMUP):
arms[arm]["models"][j]["update"] = True
if r % CHUNKS == 0:
p = str(arms[0]["pulls"])
for a in xrange(1, len(arms)):
p += ", " + str(arms[a]["pulls"])
print str(sim) + ", " + str(r) + ", " + str((conversions-sub_conversions)*1./CHUNKS) + ", " + str(conversions*1./r) + ", " + str(conversions) + ", " + p
sys.stdout.flush()
sub_conversions = conversions