forked from pkremp/polls
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathstate and national polls.stan
executable file
·177 lines (172 loc) · 8 KB
/
state and national polls.stan
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
data{
int S; // Number of states (for which at least 1 poll is available) + 1
int T; // Number of days
int N; // Number of polls
int W; // Number of weeks
int P; // Number of pollsters
int last_poll_W;
int last_poll_T;
int s[N]; // State index
int t[N]; // Day index
int w[N]; // Week index (weeks start on Sundays)
int p[N]; // Pollster index
int T_unique; // Number of (unique) days with at least one national poll
int t_unique[N]; // = Which *unique national poll day index* does this poll correspond to?
int unique_ts[T_unique];
int unique_ws[T_unique];
int week[last_poll_T];
int day_of_week[last_poll_T];
vector[S] state_weights;
real alpha_prior;
int n_clinton[N];
int <lower = 0> n_respondents[N];
vector [S] mu_b_prior;
matrix [S-1, S-1] sigma_mu_b_end;
matrix [S-1, S-1] sigma_walk_b_forecast;
matrix [S-1, S-1] sigma_poll_error;
}
transformed data{
matrix [S-1, S-1] chol_sigma_walk_b_forecast;
matrix [S-1, S-1] chol_sigma_mu_b_end;
matrix [S-1, S-1] chol_sigma_poll_error;
vector [S-1] zero_vec;
int last_sunday;
// Cholesky decompositions to speed up sampling from multivariate normal.
chol_sigma_walk_b_forecast = cholesky_decompose(sigma_walk_b_forecast);
chol_sigma_mu_b_end = cholesky_decompose(sigma_mu_b_end);
chol_sigma_poll_error = cholesky_decompose(sigma_poll_error);
for (state in 1:(S-1)) zero_vec[state] = 0;
last_sunday = last_poll_T-day_of_week[last_poll_T];
}
parameters{
vector[last_poll_T-1] delta_a;
matrix[W-1,S] delta_b;
vector[S] mu_b_end;
vector[P] mu_c;
real alpha_01;
real<lower = 0, upper = 0.1> sigma_c;
real u[N];
real<lower = 0, upper = 0.1> sigma_u_national;
real<lower = 0, upper = 0.1> sigma_u_state;
real<lower = 0, upper = 0.05> sigma_walk_a_past;
real<lower = 0, upper = 0.05> sigma_walk_b_past;
vector[S-1] poll_error_01;
}
transformed parameters{
real logit_clinton_hat[N];
vector[last_poll_T] mu_a;
matrix[W,S] mu_b;
vector[T_unique] average_states;
matrix[T_unique , S-1] matrix_inv_logit_mu_ab;
vector[S-1] poll_error;
real mu_a_t;
real alpha;
alpha = alpha_prior + alpha_01*0.2;
poll_error = chol_sigma_poll_error * to_vector(poll_error_01);
# Calculating mu_a: setting mu_a values to zero for the current week
for (i in last_sunday:last_poll_T)
mu_a[i] = 0;
# Calculating mu_a: reverse random walk
for (i in 1:(last_sunday- 1))
mu_a[last_sunday-i] = mu_a[last_sunday-i+1] + sigma_walk_a_past * delta_a[last_sunday-i];
# Calculating mu_b (using the cholesky decompositions of covariance matrices)
mu_b[W, 2:S] = to_row_vector(mu_b_prior[2:S] + chol_sigma_mu_b_end * to_vector(mu_b_end[2:S]));
if (last_poll_W < W){
for (wk in 1:(W - last_poll_W))
mu_b[W - wk, 2:S] = mu_b[W - wk + 1, 2:S] +
to_row_vector(chol_sigma_walk_b_forecast * to_vector(delta_b[W - wk, 2:S]));
}
for (wk in (W - last_poll_W + 1):(W-1))
mu_b[W - wk, 2:S] = mu_b[W - wk + 1, 2:S] + sigma_walk_b_past * sqrt(7) * delta_b[W - wk, 2:S];
for (wk in 1:W)
mu_b[wk, 1] = 0;
# Creating a lookup table for national vote weighted average;
# No need to compute values for every day:
# Restricting the calculation to days in which there are national polls saves computation time.
for(i in 1:T_unique){
mu_a_t = mu_a[unique_ts[i]];
for (state in 1:(S-1))
# Careful here: state indices for poll_error are off by 1 relative to mu_b state indices.
matrix_inv_logit_mu_ab[i, state] = inv_logit(mu_a_t + mu_b[unique_ws[i], state+1] + poll_error[state]);
}
average_states = matrix_inv_logit_mu_ab * state_weights[2:S];
# Calculating p_clinton_hat parameter for each national/state poll
for(i in 1:N){
if (s[i] == 1){
# For national polls:
# p_clinton_hat is a function of: a national parameter **mu_a**,
# the weighted average of the (interpolated) state parameters **mu_b**
# an adjustment parameter **alpha** reflecting the fact that the average polled
# state voter is not representative of the average US voter
# and pollster house effects **mu_c**.
logit_clinton_hat[i] = logit(average_states[t_unique[i]]) + alpha + sigma_c*mu_c[p[i]] + sigma_u_national*u[i];
}
else{
# For state polls:
# p_clinton_hat is a function of national and state parameters **mu_a**, **mu_b**
# and pollster house effects **mu_c**
logit_clinton_hat[i] = mu_a[t[i]] + mu_b[w[i], s[i]] + sigma_c*mu_c[p[i]] + sigma_u_state*u[i]
+ poll_error[s[i]-1];
}
}
}
model{
# mu_b_end, delta_a, & delta_b are drawn from Normal(0,1), and values for mu_a and mu_b
# are calculated in the transformed parameters block; this speeds up convergence dramatically.
# Prior of state parameters on election day mu_b
mu_b_end[2:S] ~ normal(0, 1);
# delta_a and delta_b are steps of the reverse random walks.
delta_a ~ normal(0, 1);
for (wk in 1:(W-1))
delta_b[W - wk, 2:S] ~ normal(0, 1);
# Prior for the difference between national and weighted average of state parameters:
alpha_01 ~ normal(0, 1);
# Measurement error (one value per poll)
u ~ normal(0, 1);
# Polling error (one value per state; variance and correlation set by sigma_poll_error matrix)
poll_error_01 ~ normal(0, 1);
# Pollster house effects
mu_c ~ normal(0, 1);
# Likelihood of the model:
n_clinton ~ binomial_logit(n_respondents, logit_clinton_hat);
}
generated quantities{
matrix[last_sunday + W - last_poll_W, S] predicted_score;
// Predicted scores have *daily* values for past dates (since they depend on mu_b AND mu_a parameters),
// but *weekly* values for future dates (since they only depend on mu_b).
for (state in 2:S){
// Backward estimates (daily)
for (date in 1:last_sunday){
// Linear interpolation of mu_b values between previous and current week,
// or current and next week
// Using the following weights, depending on day_of_week:
// w-1 w w+1
// 0 Sun 3/7 4/7
// 1 Mon 2/7 5/7
// 2 Tue 1/7 6/7
// 3 Wed 0 7/7
// 4 Thu 6/7 1/7
// 5 Fri 5/7 2/7
// 6 Sat 4/7 3/7
if (day_of_week[date] <= 3){ // 0=Sun, 1=Mon, 2=Tue, 3=Wed
predicted_score[date, state] =
inv_logit(mu_a[date] + (1.0-(4+day_of_week[date])/7.0)*mu_b[max(week[date]-1, 1), state]
+ ((4+day_of_week[date])/7.0)*mu_b[week[date], state]);
}
else{ // 4=Thu, 5=Fri, 6=Sat
predicted_score[date, state] = // Linear interpolation between current and next week
inv_logit(mu_a[date] + ((10-day_of_week[date])/7.0)*mu_b[week[date], state]
+ (1.0-(10-day_of_week[date])/7.0)*mu_b[min(week[date]+1, W), state]);
}
// predicted_score[date, state] = // Just a little bit of linear interpolation between weeks
// inv_logit(mu_a[date] + (1.0-day_of_week[date]/7.0)*mu_b[week[date], state]
// + (day_of_week[date]/7.0)*mu_b[min(week[date]+1, W), state]);
}
// Forward estimates (weekly)
for (date in (last_sunday+1):(last_sunday + W - last_poll_W))
predicted_score[date, state] = inv_logit(mu_b[last_poll_W + date - last_sunday, state]);
}
for (date in 1:(last_sunday + W - last_poll_W))
// National score: averaging state scores by state weights.
predicted_score[date, 1] = predicted_score[date, 2:S] * state_weights[2:S];
}