From 94bfedf0e2ccc5d56d40628dd510b75ff83ed7a2 Mon Sep 17 00:00:00 2001 From: Mike Lin Date: Thu, 5 Nov 2020 22:20:36 -1000 Subject: [PATCH 1/7] add task hints to grammar --- WDL/Tree.py | 6 ++++++ WDL/_grammar.py | 6 ++++-- WDL/_parser.py | 4 ++++ 3 files changed, 14 insertions(+), 2 deletions(-) diff --git a/WDL/Tree.py b/WDL/Tree.py index dfc117db..45894591 100644 --- a/WDL/Tree.py +++ b/WDL/Tree.py @@ -261,6 +261,10 @@ class Task(SourceNode): """:type: Dict[str,WDL.Expr.Base] ``runtime{}`` section, with keys and corresponding expressions to be evaluated""" + hints: Dict[str, Any] + """:type: Dict[str, Any] + + ``hints{}`` section as a JSON-like dict""" meta: Dict[str, Any] """:type: Dict[str,Any] @@ -283,6 +287,7 @@ def __init__( parameter_meta: Dict[str, Any], runtime: Dict[str, Expr.Base], meta: Dict[str, Any], + hints: Dict[str, Any], ) -> None: super().__init__(pos) self.name = name @@ -293,6 +298,7 @@ def __init__( self.parameter_meta = parameter_meta self.runtime = runtime self.meta = meta + self.hints = hints self.effective_wdl_version = "1.0" # overridden by Document.__init__ # TODO: enforce validity constraints on parameter_meta and runtime # TODO: if the input section exists, then all postinputs decls must be diff --git a/WDL/_grammar.py b/WDL/_grammar.py index 57bfd771..4267cc87 100644 --- a/WDL/_grammar.py +++ b/WDL/_grammar.py @@ -313,6 +313,7 @@ | output_decls | meta_section | runtime_section + | hints_section | any_decl -> noninput_decl tasks: task* @@ -326,7 +327,7 @@ ?command: command1 | command2 // meta/parameter_meta sections (effectively JSON) -meta_object: "{" [meta_kv (","? meta_kv)*] "}" +meta_object: "{" [meta_kv (","? meta_kv)*] ","? "}" meta_kv: CNAME ":" meta_value ?meta_value: literal | string_literal | meta_object @@ -336,6 +337,7 @@ // task runtime section (key-expression pairs) runtime_section: "runtime" "{" [runtime_kv (","? runtime_kv)*] "}" runtime_kv: CNAME ":" expr +hints_section: "hints" meta_object /////////////////////////////////////////////////////////////////////////////////////////////////// // decl @@ -479,7 +481,7 @@ %ignore COMMENT """ keywords["development"] = set( - "Array Directory File Float Int Map None Pair String alias as call command else false if import input left meta object output parameter_meta right runtime scatter struct task then true workflow".split( + "Array Directory File Float Int Map None Pair String alias as call command else false hints if import input left meta object output parameter_meta right runtime scatter struct task then true workflow".split( " " ) ) diff --git a/WDL/_parser.py b/WDL/_parser.py index 785f4c7f..76caf2aa 100644 --- a/WDL/_parser.py +++ b/WDL/_parser.py @@ -363,6 +363,9 @@ def runtime_section(self, items, meta): d[k] = v return {"runtime": d} + def hints_section(self, items, meta): + return {"hints": items[0]} + def task(self, items, meta): d = {"noninput_decls": []} for item in items: @@ -391,6 +394,7 @@ def task(self, items, meta): d.get("parameter_meta", {}), d.get("runtime", {}), d.get("meta", {}), + d.get("hints", {}), ) def tasks(self, items, meta): From a675670b206151f51035baf0c2ea2074cdddfd30 Mon Sep 17 00:00:00 2001 From: Mike Lin Date: Sat, 7 Nov 2020 00:31:47 -1000 Subject: [PATCH 2/7] override task runtime values from input JSON --- WDL/CLI.py | 1 + WDL/Tree.py | 7 ++++++- WDL/__init__.py | 28 ++++++++++++++++++++++------ WDL/runtime/task.py | 7 +++++-- WDL/runtime/workflow.py | 9 ++++++++- tests/test_7runner.py | 37 +++++++++++++++++++++++++++++++++++++ 6 files changed, 79 insertions(+), 10 deletions(-) diff --git a/WDL/CLI.py b/WDL/CLI.py index 5cfbb5db..bafc60f2 100644 --- a/WDL/CLI.py +++ b/WDL/CLI.py @@ -971,6 +971,7 @@ def bold(line): ans.append(bold(f" {str(b.value.type)} {b.name}")) add_wrapped_parameter_meta(target, b.name, ans) optional_inputs = target.available_inputs.subtract(target.required_inputs) + optional_inputs = optional_inputs.filter(lambda b: not b.value.name.startswith("_")) if target.inputs is None: # if the target doesn't have an input{} section (pre WDL 1.0), exclude # declarations bound to a non-constant expression (heuristic) diff --git a/WDL/Tree.py b/WDL/Tree.py index 45894591..371e51f6 100644 --- a/WDL/Tree.py +++ b/WDL/Tree.py @@ -316,6 +316,11 @@ def available_inputs(self) -> Env.Bindings[Decl]: Each input is at the top level of the Env, with no namespace. """ ans = Env.Bindings() + + if self.effective_wdl_version not in ["draft-2", "1.0"]: + # synthetic placeholder to expose runtime & hints overrides + ans = ans.bind("_runtime", Decl(self.pos, Type.Any(), "_runtime")) + for decl in reversed(self.inputs if self.inputs is not None else self.postinputs): ans = ans.bind(decl.name, decl) return ans @@ -333,7 +338,7 @@ def required_inputs(self) -> Env.Bindings[Decl]: for b in reversed(list(self.available_inputs)): assert isinstance(b, Env.Binding) d: Decl = b.value - if d.expr is None and d.type.optional is False: + if d.expr is None and d.type.optional is False and not d.name.startswith("_"): ans = Env.Bindings(b, ans) return ans diff --git a/WDL/__init__.py b/WDL/__init__.py index 38a2ebd6..6f68a9e5 100644 --- a/WDL/__init__.py +++ b/WDL/__init__.py @@ -242,17 +242,33 @@ def values_from_json( key2 = key if namespace and key.startswith(namespace): key2 = key[len(namespace) :] - if key2 not in available: - # attempt to simplify .. + + ty = None + if key2 in available: + ty = available[key2] + else: key2parts = key2.split(".") - if len(key2parts) == 3 and key2parts[0] and key2parts[1] and key2parts[2]: + + runtime_idx = next( + (i for i, term in enumerate(key2parts) if term in ("runtime", "hints")), -1 + ) + if ( + runtime_idx >= 0 + and len(key2parts) > (runtime_idx + 1) + and ".".join(key2parts[:runtime_idx] + ["_runtime"]) in available + ): + # allow arbitrary keys for runtime/hints + ty = Type.Any() + elif len(key2parts) == 3 and key2parts[0] and key2parts[1] and key2parts[2]: + # attempt to simplify .. from old Cromwell JSON key2 = ".".join([key2parts[0], key2parts[2]]) - try: - ty = available[key2] - except KeyError: + if key2 in available: + ty = available[key2] + if not ty: raise Error.InputError("unknown input/output: " + key) from None if isinstance(ty, Tree.Decl): ty = ty.type + assert isinstance(ty, Type.Base) try: ans = ans.bind(key2, Value.from_json(ty, values_json[key])) diff --git a/WDL/runtime/task.py b/WDL/runtime/task.py index ec23eb00..0f68e27e 100644 --- a/WDL/runtime/task.py +++ b/WDL/runtime/task.py @@ -167,7 +167,7 @@ def run_local_task( # evaluate runtime fields stdlib = InputStdLib(task.effective_wdl_version, logger, container) container.runtime_values = _eval_task_runtime( - cfg, logger, task, container, container_env, stdlib + cfg, logger, task, posix_inputs, container, container_env, stdlib ) # interpolate command @@ -409,6 +409,7 @@ def _eval_task_runtime( cfg: config.Loader, logger: logging.Logger, task: Tree.Task, + inputs: Env.Bindings[Value.Base], container: TaskContainer, env: Env.Bindings[Value.Base], stdlib: StdLib.Base, @@ -421,8 +422,10 @@ def _eval_task_runtime( runtime_values[key] = Value.Int(v) else: raise Error.InputError(f"invalid default runtime setting {key} = {v}") - for key, expr in task.runtime.items(): + for key, expr in task.runtime.items(): # evaluate expressions in source code runtime_values[key] = expr.eval(env, stdlib) + for b in inputs.enter_namespace("runtime"): + runtime_values[b.name] = b.value # input overrides logger.debug(_("runtime values", **dict((key, str(v)) for key, v in runtime_values.items()))) ans = {} diff --git a/WDL/runtime/workflow.py b/WDL/runtime/workflow.py index e19a9168..1a888d03 100644 --- a/WDL/runtime/workflow.py +++ b/WDL/runtime/workflow.py @@ -371,7 +371,14 @@ def _do_job( assert isinstance(job.node.callee, (Tree.Task, Tree.Workflow)) callee_inputs = job.node.callee.available_inputs call_inputs = call_inputs.map( - lambda b: Env.Binding(b.name, b.value.coerce(callee_inputs[b.name].type)) + lambda b: Env.Binding( + b.name, + ( + b.value.coerce(callee_inputs[b.name].type) + if b.name in callee_inputs + else b.value + ), + ) ) # check input files against whitelist disallowed_filenames = _fspaths(call_inputs) - self.filename_whitelist diff --git a/tests/test_7runner.py b/tests/test_7runner.py index 5b62278c..6ada96bd 100644 --- a/tests/test_7runner.py +++ b/tests/test_7runner.py @@ -678,6 +678,43 @@ def test_directory(self, capture): logs += new_logs +class RuntimeOverride(RunnerTestCase): + def test_runtime_override(self): + wdl = """ + version development + workflow w { + input { + String who + } + call t { + input: + who = who + } + } + task t { + input { + String who + } + command { + cp /etc/issue issue + echo "Hello, ~{who}!" + } + output { + String msg = read_string(stdout()) + String issue = read_string("issue") + } + runtime { + docker: "ubuntu:20.04" + } + } + """ + outp = self._run(wdl, { + "who": "Alice", + "t.runtime.docker": "ubuntu:20.10" + }) + assert "20.10" in outp["t.issue"] + + class MiscRegressionTests(RunnerTestCase): def test_repeated_file_rewriting(self): wdl = """ From 60d57f578d4aca9039c58135c31698f300fa1bbd Mon Sep 17 00:00:00 2001 From: Mike Lin Date: Sun, 8 Nov 2020 11:41:40 -1000 Subject: [PATCH 3/7] CLI runtime overrides --- WDL/CLI.py | 31 ++++++++++++++++++++++++------- WDL/Env.py | 10 ++++++++++ tests/runner.t | 17 +++++++++++++---- 3 files changed, 47 insertions(+), 11 deletions(-) diff --git a/WDL/CLI.py b/WDL/CLI.py index bafc60f2..9c0361f4 100644 --- a/WDL/CLI.py +++ b/WDL/CLI.py @@ -877,9 +877,18 @@ def runner_input( name, s_value = buf # find corresponding input declaration - try: - decl = available_inputs[name] - except KeyError: + decl = available_inputs.get(name) + + if not decl: + # allow arbitrary runtime/hints overrides + nmparts = name.split(".") + runtime_idx = next( + (i for i, term in enumerate(nmparts) if term in ("runtime", "hints")), -1 + ) + if runtime_idx >= 0 and len(nmparts) > (runtime_idx + 1): + decl = available_inputs.get(".".join(nmparts[:runtime_idx] + ["_runtime"])) + + if not decl: runner_input_help(target) raise Error.InputError(f"No such input to {target.name}: {buf[0]}") @@ -887,10 +896,7 @@ def runner_input( v = runner_input_value(s_value, decl.type, downloadable, root) # insert value into input_env - try: - existing = input_env[name] - except KeyError: - existing = None + existing = input_env.get(name) if existing: if isinstance(v, Value.Array): assert isinstance(existing, Value.Array) and v.type.coerces(existing.type) @@ -1053,6 +1059,17 @@ def runner_input_value(s_value, ty, downloadable, root): return Value.Array( ty.item_type, [runner_input_value(s_value, ty.item_type, downloadable, root)] ) + if isinstance(ty, Type.Any): + # infer dynamically-typed runtime/hints overrides + try: + return Value.Int(int(s_value)) + except ValueError: + pass + try: + return Value.Float(float(s_value)) + except ValueError: + pass + return Value.String(s_value) raise Error.InputError( "No command-line support yet for inputs of type {}; workaround: specify in JSON file with --input".format( str(ty) diff --git a/WDL/Env.py b/WDL/Env.py index 0e8d628c..0500b080 100644 --- a/WDL/Env.py +++ b/WDL/Env.py @@ -105,6 +105,16 @@ def resolve_binding(self, name: str) -> Binding[T]: return b raise KeyError() + def get(self, name: str, default: Optional[T] = None) -> Optional[T]: + """ + Look up a bound value by name, returning the default value or ``None`` if there's no such + binding. + """ + try: + return self.resolve_binding(name).value + except KeyError: + return default + def resolve(self, name: str) -> T: """ Look up a bound value by name. Equivalently, ``env[name]`` diff --git a/tests/runner.t b/tests/runner.t index 4cf0bc6c..365c7955 100644 --- a/tests/runner.t +++ b/tests/runner.t @@ -11,7 +11,7 @@ source tests/bash-tap/bash-tap-bootstrap export PYTHONPATH="$SOURCE_DIR:$PYTHONPATH" miniwdl="python3 -m WDL" -plan tests 72 +plan tests 74 $miniwdl run_self_test is "$?" "0" "run_self_test" @@ -202,7 +202,7 @@ is "$?" "0" "failer2000 try3 iwuzhere" cat << 'EOF' > multitask.wdl -version 1.0 +version development workflow multi { call first } @@ -219,16 +219,20 @@ task first { task second { command { echo -n two + cp /etc/issue issue } output { String msg = read_string(stdout()) + File issue = "issue" } } EOF -$miniwdl run multitask.wdl --task second +$miniwdl run multitask.wdl runtime.docker=ubuntu:20.10 --task second is "$?" "0" "multitask" is "$(jq -r '.["second.msg"]' _LAST/outputs.json)" "two" "multitask stdout & _LAST" +grep -q 20.10 _LAST/out/issue/issue +is "$?" "0" "override runtime.docker" cat << 'EOF' > mv_input_file.wdl version 1.0 @@ -261,6 +265,7 @@ workflow w { } output { Int dsz = round(size(t.files)) + File issue = t.issue } } task t { @@ -268,11 +273,13 @@ task t { Directory d } command <<< + cp /etc/issue issue mkdir outdir find ~{d} -type f | xargs -i{} cp {} outdir/ >>> output { Array[File] files = glob("outdir/*") + File issue = "issue" } } EOF @@ -280,9 +287,11 @@ EOF mkdir -p indir/subdir echo alice > indir/alice.txt echo bob > indir/subdir/bob.txt -$miniwdl run dir_io.wdl d=indir +$miniwdl run dir_io.wdl d=indir t.runtime.docker=ubuntu:20.10 is "$?" "0" "directory input" is `jq -r '.["w.dsz"]' _LAST/outputs.json` "10" "use of directory input" +grep -q 20.10 _LAST/out/issue/issue +is "$?" "0" "override t.runtime.docker" cat << 'EOF' > uri_inputs.json { From 0cc53650d0984c8116b6eceada2727020c99f64e Mon Sep 17 00:00:00 2001 From: Mike Lin Date: Sun, 8 Nov 2020 11:59:01 -1000 Subject: [PATCH 4/7] alias runtime.container to runtime.docker and accept array values --- WDL/runtime/task.py | 8 ++++++-- tests/test_7runner.py | 3 +++ 2 files changed, 9 insertions(+), 2 deletions(-) diff --git a/WDL/runtime/task.py b/WDL/runtime/task.py index 0f68e27e..ad2e9401 100644 --- a/WDL/runtime/task.py +++ b/WDL/runtime/task.py @@ -429,8 +429,12 @@ def _eval_task_runtime( logger.debug(_("runtime values", **dict((key, str(v)) for key, v in runtime_values.items()))) ans = {} - if "docker" in runtime_values: - ans["docker"] = runtime_values["docker"].coerce(Type.String()).value + docker_value = runtime_values.get("docker", runtime_values.get("container")) + if docker_value: + if isinstance(docker_value, Value.Array) and len(docker_value.value): + # TODO: ask TaskContainer to choose preferred candidate + docker_value = docker_value.value[0] + ans["docker"] = docker_value.coerce(Type.String()).value host_limits = container.__class__.detect_resource_limits(cfg, logger) if "cpu" in runtime_values: diff --git a/tests/test_7runner.py b/tests/test_7runner.py index 6ada96bd..1a75891c 100644 --- a/tests/test_7runner.py +++ b/tests/test_7runner.py @@ -792,6 +792,9 @@ def test_weird_filenames(self): output { Array[File] files_out = glob("files_out/*") } + runtime { + docker: ["ubuntu:20.04"] + } } """ From 3dfe9866c327b057a6f3bc0334fdb58eb8e7b60b Mon Sep 17 00:00:00 2001 From: Mike Lin Date: Sun, 8 Nov 2020 15:59:43 -1000 Subject: [PATCH 5/7] fix --- WDL/Env.py | 18 +++++++++--------- WDL/Tree.py | 8 +++----- WDL/runtime/task.py | 2 +- tests/test_7runner.py | 9 +++++++-- 4 files changed, 20 insertions(+), 17 deletions(-) diff --git a/WDL/Env.py b/WDL/Env.py index 0500b080..f8fbc294 100644 --- a/WDL/Env.py +++ b/WDL/Env.py @@ -105,24 +105,24 @@ def resolve_binding(self, name: str) -> Binding[T]: return b raise KeyError() + def resolve(self, name: str) -> T: + """ + Look up a bound value by name. Equivalently, ``env[name]`` + + :raise KeyError: no such binding + """ + return self.resolve_binding(name).value + def get(self, name: str, default: Optional[T] = None) -> Optional[T]: """ Look up a bound value by name, returning the default value or ``None`` if there's no such binding. """ try: - return self.resolve_binding(name).value + return self.resolve(name) except KeyError: return default - def resolve(self, name: str) -> T: - """ - Look up a bound value by name. Equivalently, ``env[name]`` - - :raise KeyError: no such binding - """ - return self.resolve_binding(name).value - def __getitem__(self, name: str) -> T: return self.resolve(name) diff --git a/WDL/Tree.py b/WDL/Tree.py index 371e51f6..b258ada4 100644 --- a/WDL/Tree.py +++ b/WDL/Tree.py @@ -317,7 +317,7 @@ def available_inputs(self) -> Env.Bindings[Decl]: """ ans = Env.Bindings() - if self.effective_wdl_version not in ["draft-2", "1.0"]: + if self.effective_wdl_version not in ("draft-2", "1.0"): # synthetic placeholder to expose runtime & hints overrides ans = ans.bind("_runtime", Decl(self.pos, Type.Any(), "_runtime")) @@ -1174,13 +1174,11 @@ def _rewrite_output_idents(self) -> None: # into the decl name with a ., which is a weird corner # case! synthetic_output_name = ".".join(output_ident) - try: - ty = self._type_env.resolve(synthetic_output_name) - except KeyError: + ty = self._type_env.get(synthetic_output_name) + if not ty: raise Error.UnknownIdentifier( Expr.Ident(self._output_idents_pos, synthetic_output_name) ) from None - assert isinstance(ty, Type.Base) output_ident_decls.append( Decl( self.pos, diff --git a/WDL/runtime/task.py b/WDL/runtime/task.py index ad2e9401..9fc63a3c 100644 --- a/WDL/runtime/task.py +++ b/WDL/runtime/task.py @@ -429,7 +429,7 @@ def _eval_task_runtime( logger.debug(_("runtime values", **dict((key, str(v)) for key, v in runtime_values.items()))) ans = {} - docker_value = runtime_values.get("docker", runtime_values.get("container")) + docker_value = runtime_values.get("docker", runtime_values.get("container", None)) if docker_value: if isinstance(docker_value, Value.Array) and len(docker_value.value): # TODO: ask TaskContainer to choose preferred candidate diff --git a/tests/test_7runner.py b/tests/test_7runner.py index 1a75891c..a0eed798 100644 --- a/tests/test_7runner.py +++ b/tests/test_7runner.py @@ -766,7 +766,7 @@ def test_weird_filenames(self): inputs["files"].append(fn) wdl = """ - version 1.0 + version development workflow w { input { Array[File] files @@ -793,7 +793,12 @@ def test_weird_filenames(self): Array[File] files_out = glob("files_out/*") } runtime { - docker: ["ubuntu:20.04"] + container: ["ubuntu:20.04"] + } + hints { + foo: { + bar: ["bas", 42] + } } } """ From c4584bad11db0d3a1a16a814f4d89c4c8ee13efd Mon Sep 17 00:00:00 2001 From: Mike Lin Date: Sun, 8 Nov 2020 19:30:03 -1000 Subject: [PATCH 6/7] fix --- WDL/runtime/task.py | 4 +++- tests/test_7runner.py | 2 +- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/WDL/runtime/task.py b/WDL/runtime/task.py index 9fc63a3c..bc630bc6 100644 --- a/WDL/runtime/task.py +++ b/WDL/runtime/task.py @@ -426,10 +426,12 @@ def _eval_task_runtime( runtime_values[key] = expr.eval(env, stdlib) for b in inputs.enter_namespace("runtime"): runtime_values[b.name] = b.value # input overrides + if "container" in runtime_values: # alias + runtime_values["docker"] = runtime_values["container"] logger.debug(_("runtime values", **dict((key, str(v)) for key, v in runtime_values.items()))) ans = {} - docker_value = runtime_values.get("docker", runtime_values.get("container", None)) + docker_value = runtime_values.get("docker", None) if docker_value: if isinstance(docker_value, Value.Array) and len(docker_value.value): # TODO: ask TaskContainer to choose preferred candidate diff --git a/tests/test_7runner.py b/tests/test_7runner.py index a0eed798..94cbe7ac 100644 --- a/tests/test_7runner.py +++ b/tests/test_7runner.py @@ -710,7 +710,7 @@ def test_runtime_override(self): """ outp = self._run(wdl, { "who": "Alice", - "t.runtime.docker": "ubuntu:20.10" + "t.runtime.container": ["ubuntu:20.10"] }) assert "20.10" in outp["t.issue"] From 74c07489c468564f600333b40f9d2b8523800ac1 Mon Sep 17 00:00:00 2001 From: Mike Lin Date: Mon, 9 Nov 2020 22:01:07 -1000 Subject: [PATCH 7/7] polish --- WDL/runtime/task.py | 4 +++- docs/runner_reference.md | 1 + 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/WDL/runtime/task.py b/WDL/runtime/task.py index bc630bc6..a26d0048 100644 --- a/WDL/runtime/task.py +++ b/WDL/runtime/task.py @@ -489,7 +489,9 @@ def _eval_task_runtime( if ans: logger.info(_("effective runtime", **ans)) unused_keys = list( - key for key in runtime_values if key not in ("cpu", "memory") and key not in ans + key + for key in runtime_values + if key not in ("cpu", "memory", "container") and key not in ans ) if unused_keys: logger.warning(_("ignored runtime settings", keys=unused_keys)) diff --git a/docs/runner_reference.md b/docs/runner_reference.md index 958abe25..6e1afc5b 100644 --- a/docs/runner_reference.md +++ b/docs/runner_reference.md @@ -79,6 +79,7 @@ Any option can thus be set/changed temporarily without a configuration file. The default local implementation observes these task `runtime {}` attributes: * `docker` (String): docker image tag used to instantiate container; if omitted, a default image is specified in the miniwdl configuration option `[task_runtime] defaults` (currently `ubuntu:20.04`) +* `container` (alias for `docker`) * `cpu` (Int): container reserves, and is throttled to, this many CPUs * Automatically rounds down to all host CPUs, if fewer * Multiple tasks can run concurrently on the local host, if CPUs and memory are available to meet their total reservations, and the workflow dependencies allow