forked from lcdb/lcdb-wf
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathdeploy.py
234 lines (206 loc) · 6.71 KB
/
deploy.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
import os
import tempfile
import argparse
import subprocess as sp
import datetime
import json
import fnmatch
HERE = os.path.dirname(__file__)
usage = """
This script assists in the deployment of lcdb-wf to working directories.
The lcdb-wf repository contains infrastructure for testing that is not
typically needed when using it in practice. Furthermore, you might not need all
possible workflows.
This script copies over only the files requred for each "flavor" of analysis
(rnaseq, chipseq, colocalization, full) and also stores a file,
`.lcdb-wf-deployment.yaml`, containing details about the git commit that was
used and the timestamp. This can be used to compare changes and stay
up-to-date.
"""
# Notes on include/exclude patterns for rsync:
#
# - Excluding a directory excludes everything below it
# - Including a directory does not automatically include everything below it.
# - Use "dir/***" to include everything
# - Patterns with no / applies to basename
# - Patterns ending with / implies directories only
# - Patterns starting with / implies that the root is the source dir provided to rsync
# - * is anything but /
# - ** is any part of path, including /
always = {
"include": [
"wrappers/wrappers",
"include",
"lib",
"requirements.txt",
".gitignore",
],
"exclude": [
"sra_sampletable.tsv",
"/.buildkite/*",
"/ci/*",
"/.circleci/*",
"/config/*",
"/deploy.py",
"/docs/***",
"/include/AnnotationHubCache",
"/lib/postprocess/__pycache__",
"/lib/__pycache__",
"*/.pytest_cache/*",
"/README.md",
"/test/***",
"/.travis.yml",
"/workflows/*/results",
"/workflows/*/data",
"/workflows/figures/*",
"/workflows/*/references_data",
"/workflows/*/references_dir",
"/workflows/*/reports",
"/workflows/rnaseq/downstream/final_clusters",
"/workflows/rnaseq/downstream/*html",
"/workflows/rnaseq/downstream/*log",
"/workflows/rnaseq/downstream/rnaseq_cache",
"/workflows/rnaseq/downstream/rnaseq_files",
"/workflows/rnaseq/downstream/*.tsv*",
"/workflows/*/run_test.sh",
"/workflows/*/Snakefile.test",
"/workflows/*/.snakemake",
"/wrappers/demo/*",
"/wrappers/test/*",
"/wrappers/test_toy.py",
],
}
flavors = {
"chipseq": ["workflows/chipseq/*", "workflows/references/*"],
"rnaseq": ["workflows/rnaseq/*", "workflows/references/*"],
"colocalization": ["workflows/colocalization/*"],
"full": ["workflows/*"],
}
ap = argparse.ArgumentParser(usage=usage)
ap.add_argument(
"--flavor",
default="full",
help="""Options are {0}. Default is full.""".format(list(flavors.keys())),
)
ap.add_argument("--dest", help="""Destination directory in which to copy files""")
ap.add_argument(
"--build-env",
action="store_true",
help="""If specified, a conda environment with all dependencies will be
installed into a directory called "env" within the directory provided for
--dest. """,
)
ap.add_argument("--verbose", "-v", action="store_true", help="""Verbose mode""")
args = ap.parse_args()
dest = args.dest
flavor = args.flavor
def filter_out_excluded(filenames, patterns_to_exclude):
"""
Only return the subset of `filenames` that do not match any
`patterns_to_exclude`.
"""
keep = []
for filename in filenames:
if not any(
fnmatch.fnmatch(filename, pattern) for pattern in patterns_to_exclude
):
keep.append(filename)
return keep
def filter_out_other_workflows(filenames, patterns_to_include, prefilter="workflows/*"):
"""
Return subset of `filenames` that:
- don't match `prefilter`
- match prefilter AND match any of `patterns_to_include`
"""
keep = []
for filename in filenames:
# If it doesn't match prefilter, we don't want to do anything about it,
# so keep it and move on.
if not fnmatch.fnmatch(filename, prefilter):
keep.append(filename)
continue
# Otherwise, only keep it if it's in patterns_to_include.
if any(fnmatch.fnmatch(filename, pattern) for pattern in patterns_to_include):
keep.append(filename)
return keep
# We start with anything under version control, and then progressively filter
# out other stuff.
under_version_control = sorted(
sp.check_output(
["git", "ls-tree", "-r", "HEAD", "--name-only"], universal_newlines=True
).splitlines(False)
)
keep = filter_out_excluded(under_version_control, always["exclude"])
keep = filter_out_other_workflows(keep, flavors[flavor])
exclude = tempfile.NamedTemporaryFile(delete=False).name
if args.verbose:
print("Exclude file: {}".format(exclude))
with open(exclude, "w") as fout:
fout.write("\n".join(always["exclude"]))
include = tempfile.NamedTemporaryFile(delete=False).name
if args.verbose:
print("Include file: {}".format(include))
with open(include, "w") as fout:
fout.write("\n\n")
fout.write("\n".join(keep) + "\n")
rsync = [
"rsync",
"--relative",
"-ar",
"--progress",
"--files-from={}".format(include),
"--exclude-from={}".format(exclude),
HERE,
dest,
]
if args.verbose:
rsync.append("-vv")
sp.check_call(rsync)
# This next section builds the .lcdb-wf-deployment.json data.
#
# First, the last commit:
commit, message = (
sp.check_output(["git", "log", "--oneline", "-1"], universal_newlines=True)
.strip()
.split(" ", 1)
)
# When we're deploying:
now = datetime.datetime.strftime(datetime.datetime.now(), "%Y%m%d%H%M")
# Where the remote was:
remotes = sp.check_output(["git", "remote", "-v"], universal_newlines=True)
remotes = [i.strip() for i in remotes.splitlines()]
# The branch we're deploying from:
branch = sp.check_output(["git", "branch"], universal_newlines=True)
branch = [i for i in branch.splitlines() if i.startswith("*")]
assert len(branch) == 1
branch = branch[0]
branch = branch.split("* ")[1]
d = {
"git": {"commit": commit, "message": message, "remotes": remotes, "branch": branch},
"timestamp": now,
}
log = os.path.join(dest, ".lcdb-wf-deployment.json")
with open(log, "w") as fout:
fout.write(json.dumps(d) + "\n")
os.chmod(log, 0o440)
# If specified, build an environment in `dest/env`, using the correct channels.
if args.build_env:
sp.check_call(
[
"conda",
"create",
"-y",
"-p",
"./env",
"--file",
"requirements.txt",
"-c",
"conda-forge",
"-c",
"bioconda",
"-c",
"defaults",
],
universal_newlines=True,
cwd=dest,
)