-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathrecipe.py
147 lines (125 loc) · 5.81 KB
/
recipe.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
from cook import create_task, Task
from cook.contexts import create_group
import itertools as it
import numpy as np
import os
from pathlib import Path
from typing import Literal
FAST = "FAST" in os.environ
create_task("requirements", action="pip-compile -v", targets=["requirements.txt"],
dependencies=["requirements.in"])
create_task("pip-sync", action="pip-sync", dependencies=["requirements.txt"])
# Run the profiling experiments with different sample sizes and noise scales.
SIZES = 16 * 2 ** np.arange(11)
FOURIER_ONLY_SIZE_THRESHOLD = 16 * 2 ** 9
LOG10_NOISE_SCALES = np.linspace(-1, 1, 7)
PARAMETERIZATIONS = [
"graph_centered", "graph_non_centered", "fourier_centered", "fourier_non_centered",
"standard_centered", "standard_non_centered"
]
def create_profile_task(
method: Literal["sample", "variational"], parameterization: str, log10_sigma: float,
size: int, max_chains: int = None,
timeout: float = None, iter_sampling: int = None, train_frac: float = 1, suffix: str = ""
) -> Task:
"""
Create a task for a profiling.
Args:
method: Stan inference method to use (see :code:`cmdstanpy.CmdStanModel.[method]`).
parameterizations: One of the parameterizations in the :code:`PARAMETERIZATIONS` variable
above.
log10_sigma: Observation noise scale; the marginal variance of the GP is 1.
size: Number of observations per example.
max_chains: Maximum number of chains to run per example. This limits the amount of time
spent on small :code:`size` experiments because we don't need to run the experiment for
the whole :code:`timeout` seconds to get a good idea of sampling time.
timeout: Maximum duration to run an experiment for before aborting.
iter_sampling: Number of posterior draws after warmup.
train_frac: Fraction of observations to use for fitting.
suffix: Suffix to add to the name of the task and output filename.
Returns:
Task to run the profiling experiment.
"""
timeout = timeout or (10 if FAST else 60)
max_chains = max_chains or (2 if FAST else 20)
iter_sampling = iter_sampling or (10 if FAST else 100)
name = f"log10_noise_scale-{log10_sigma:.3f}_size-{size}{suffix}"
target = f"profile/results/{method}/{parameterization}/{name}.pkl"
args = [
"python", "profile/run_profile.py", method, parameterization, 10 ** log10_sigma, target,
f"--iter_sampling={iter_sampling}", f"--n={size}", f"--max_chains={max_chains}",
f"--timeout={timeout}", f"--train_frac={train_frac}",
]
dependencies = [
"profile/run_profile.py",
"profile/data.stan",
f"profile/{parameterization}.stan",
]
create_task(name=f"profile:{method}-{parameterization}-{name}", action=args, targets=[target],
dependencies=dependencies)
profile_group: create_group
with create_group("profile") as profile_group:
product = it.product(PARAMETERIZATIONS, LOG10_NOISE_SCALES, SIZES)
for parameterization, log10_sigma, size in product:
# Only run Fourier methods if the size threshold is exceeded.
if size >= FOURIER_ONLY_SIZE_THRESHOLD and not parameterization.startswith("fourier"):
continue
create_profile_task("sample", parameterization, log10_sigma, size)
# Add variational inference.
for parameterization, log10_sigma in it.product(PARAMETERIZATIONS, LOG10_NOISE_SCALES):
create_profile_task("variational", parameterization, log10_sigma, 1024, train_frac=0.8)
# Here, we use a long timeout and many samples to ensure we get the distributions right.
create_profile_task(
"sample", parameterization, log10_sigma, 1024, train_frac=0.8, suffix="-train-test",
iter_sampling=100 if FAST else 500, timeout=60 if FAST else 300
)
# Add a one-off task to calculate statistics for the abstract with 10k observations.
create_profile_task("sample", "fourier_centered", 0, 10_000, timeout=300)
create_profile_task("sample", "fourier_non_centered", 0, 10_000, timeout=300)
# Run the notebooks to generate figures (the booleans indicate if a figure should be generated.)
figures = []
examples = {
"getting_started": False,
"kernels": True,
"linear": False,
"padding": True,
"profile": True,
"trees": True,
"tube": True,
}
for example, has_figure in examples.items():
ipynb = Path(example, f"{example}.ipynb")
md = ipynb.with_suffix(".md")
create_task(f"{example}:nb", dependencies=[md], targets=[ipynb],
action=f"jupytext --to notebook {md}")
targets = [ipynb.with_suffix(".html")]
if has_figure:
figure = ipynb.with_suffix(".png")
targets.append(figure)
figures.append(figure)
task = create_task(
f"{example}:run", dependencies=[ipynb], targets=targets,
action=f"jupyter nbconvert --to=html --execute --ExecutePreprocessor.timeout=-1 {ipynb}"
)
if example == "profile":
task.task_dependencies.append(profile_group.task)
# Task that reproduces all outputs.
create_task("figures", dependencies=figures)
# Add the R example for getting started.
rmd = "getting_started/getting_started.Rmd"
html = Path("getting_started/getting_started_R.html")
action = [
"Rscript",
"-e",
f"rmarkdown::render('{rmd}', output_file = '{html.name}', output_dir='getting_started')"
]
create_task(name="getting_started_R:run", dependencies=[rmd], targets=[html],
action=action)
def delete_compiled_stan_files(_: Task) -> None:
# Find all Stan files and remove compiled versions if they exist.
for path in Path(".").glob("**/*.stan"):
path = path.with_suffix("")
if path.is_file():
os.unlink(path)
print(f"removed {path}")
create_task("rm-compiled", action=delete_compiled_stan_files)