From 320f6f460c1adf5810da05fc4e71f5b62c6e9e48 Mon Sep 17 00:00:00 2001 From: Alexandros Nikolaos Ziogas Date: Thu, 27 Jul 2023 21:55:14 +0200 Subject: [PATCH 1/8] Removed erroneous numba.prange. --- npbench/benchmarks/polybench/cholesky/cholesky_numba_npr.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/npbench/benchmarks/polybench/cholesky/cholesky_numba_npr.py b/npbench/benchmarks/polybench/cholesky/cholesky_numba_npr.py index de53dcd..fb79194 100644 --- a/npbench/benchmarks/polybench/cholesky/cholesky_numba_npr.py +++ b/npbench/benchmarks/polybench/cholesky/cholesky_numba_npr.py @@ -7,7 +7,7 @@ def kernel(A): A[0, 0] = np.sqrt(A[0, 0]) for i in range(1, A.shape[0]): - for j in nb.prange(i): + for j in range(i): A[i, j] -= np.dot(A[i, :j], A[j, :j]) A[i, j] /= A[j, j] A[i, i] -= np.dot(A[i, :i], A[i, :i]) From b24d04a3ac60f7b5368db1e39b0825b13abab7f6 Mon Sep 17 00:00:00 2001 From: Alexandros Nikolaos Ziogas Date: Thu, 27 Jul 2023 21:56:09 +0200 Subject: [PATCH 2/8] Renamed numba sample. --- .../cholesky/{cholesky_numba_npr.py => cholesky_numba_np.py} | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename npbench/benchmarks/polybench/cholesky/{cholesky_numba_npr.py => cholesky_numba_np.py} (100%) diff --git a/npbench/benchmarks/polybench/cholesky/cholesky_numba_npr.py b/npbench/benchmarks/polybench/cholesky/cholesky_numba_np.py similarity index 100% rename from npbench/benchmarks/polybench/cholesky/cholesky_numba_npr.py rename to npbench/benchmarks/polybench/cholesky/cholesky_numba_np.py From 99dc8f04b3a433362a47afc382e1c20f1b2638cf Mon Sep 17 00:00:00 2001 From: Alexandros Nikolaos Ziogas Date: Thu, 27 Jul 2023 21:56:43 +0200 Subject: [PATCH 3/8] Fixed string, framework used. --- npbench/infrastructure/test.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/npbench/infrastructure/test.py b/npbench/infrastructure/test.py index da0eafd..bccc3f7 100644 --- a/npbench/infrastructure/test.py +++ b/npbench/infrastructure/test.py @@ -43,8 +43,8 @@ def _execute(self, frmwrk: Framework, impl: Callable, impl_name: str, mode: str, out = [out] else: out = [] - if "out_args" in self.bench.info.keys(): - out += [ldict[a] for a in self.frmwrk.args(self.bench)] + if "output_args" in self.bench.info.keys(): + out += [ldict[a] for a in frmwrk.args(self.bench)] return out, timelist def run(self, preset: str, validate: bool, repeat: int, timeout: float = 200.0, ignore_errors: bool = True): From 54c3f0e4530299d91578560582948545160abb04 Mon Sep 17 00:00:00 2001 From: Alexandros Nikolaos Ziogas Date: Thu, 27 Jul 2023 21:57:50 +0200 Subject: [PATCH 4/8] Amended utility method to properly populate context dictionary with all arguments. --- npbench/infrastructure/utilities.py | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/npbench/infrastructure/utilities.py b/npbench/infrastructure/utilities.py index d9f870e..7bdf810 100644 --- a/npbench/infrastructure/utilities.py +++ b/npbench/infrastructure/utilities.py @@ -134,16 +134,20 @@ def inner(_it, _timer{init}): def benchmark(stmt, setup="pass", out_text="", repeat=1, context={}, output=None, verbose=True): - timeit.template = timeit_tmpl.format(init='{init}', setup='{setup}', stmt='{stmt}', output=output) - ldict = {**context} - output = timeit.repeat(stmt, setup=setup, repeat=repeat, number=1, globals=ldict) - res = output[0][1] - raw_time_list = [a for a, _ in output] + raw_time_list = timeit.repeat(stmt, setup=setup, repeat=repeat, number=1, globals=ldict) raw_time = np.median(raw_time_list) ms_time = time_to_ms(raw_time) if verbose: print("{}: {}ms".format(out_text, ms_time)) + + if output is not None: + exec(setup, context) + exec(stmt, context) + res = context[output] + else: + res = None + return res, raw_time_list From 988dc1b6f0fcb575ae14fbeba6889f4459e70712 Mon Sep 17 00:00:00 2001 From: Alexandros Nikolaos Ziogas Date: Tue, 14 Nov 2023 00:04:45 +0100 Subject: [PATCH 5/8] Using apply_gpu_storage and auto-opt with use_gpu_storage. --- npbench/infrastructure/dace_framework.py | 25 ++++++++++++------------ 1 file changed, 13 insertions(+), 12 deletions(-) diff --git a/npbench/infrastructure/dace_framework.py b/npbench/infrastructure/dace_framework.py index 2c007ad..2a5afec 100644 --- a/npbench/infrastructure/dace_framework.py +++ b/npbench/infrastructure/dace_framework.py @@ -185,14 +185,14 @@ def parallelize(sdfg): try: def autoopt(sdfg, device, symbols): #, nofuse): - # Mark arrays as on the GPU - if device == dtypes.DeviceType.GPU: - for k, v in sdfg.arrays.items(): - if not v.transient and type(v) == dace.data.Array: - v.storage = dace.dtypes.StorageType.GPU_Global + # # Mark arrays as on the GPU + # if device == dtypes.DeviceType.GPU: + # for k, v in sdfg.arrays.items(): + # if not v.transient and type(v) == dace.data.Array: + # v.storage = dace.dtypes.StorageType.GPU_Global # Auto-optimize SDFG - opt.auto_optimize(auto_opt_sdfg, device, symbols=symbols) + opt.auto_optimize(auto_opt_sdfg, device, symbols=symbols, use_gpu_storage=True) auto_opt_sdfg = copy.deepcopy(strict_sdfg) auto_opt_sdfg._name = 'auto_opt' @@ -229,9 +229,10 @@ def vectorize(sdfg, vec_len=None): dace.Config.set('library', 'blas', 'default_implementation', value='cuBLAS') def copy_to_gpu(sdfg): - for k, v in sdfg.arrays.items(): - if not v.transient and isinstance(v, dace.data.Array): - v.storage = dace.dtypes.StorageType.GPU_Global + opt.apply_gpu_storage(sdfg) + # for k, v in sdfg.arrays.items(): + # if not v.transient and isinstance(v, dace.data.Array): + # v.storage = dace.dtypes.StorageType.GPU_Global if self.info["arch"] == "gpu": import cupy as cp @@ -242,9 +243,9 @@ def copy_to_gpu(sdfg): fe_time = t if sdfg._name != 'auto_opt': device = dtypes.DeviceType.GPU if self.info["arch"] == "gpu" else dtypes.DeviceType.CPU - if self.info["arch"] == "cpu": - # GPUTransform will set GPU schedules by itself - opt.set_fast_implementations(sdfg, device) + # if self.info["arch"] == "cpu": + # # GPUTransform will set GPU schedules by itself + opt.set_fast_implementations(sdfg, device) if self.info["arch"] == "gpu": if sdfg._name in ['strict', 'parallel', 'fusion']: _, gpu_time1 = util.benchmark("copy_to_gpu(sdfg)", From 78942b265f237d11075d82ca1b1e4f54967b64f0 Mon Sep 17 00:00:00 2001 From: Alexandros Nikolaos Ziogas Date: Fri, 31 Jan 2025 18:46:42 +0100 Subject: [PATCH 6/8] Rename `out_args` to `mutable_args` and added separate method for returning inout arguments that have to be validated. --- npbench/infrastructure/framework.py | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) diff --git a/npbench/infrastructure/framework.py b/npbench/infrastructure/framework.py index 1958f24..1cd8910 100644 --- a/npbench/infrastructure/framework.py +++ b/npbench/infrastructure/framework.py @@ -91,7 +91,7 @@ def args(self, bench: Benchmark, impl: Callable = None): for a in bench.info["input_args"] ] - def out_args(self, bench: Benchmark, impl: Callable = None): + def mutable_args(self, bench: Benchmark, impl: Callable = None): """ Generates the input/output arguments that should be copied during the setup. :param bench: A benchmark. @@ -99,9 +99,17 @@ def out_args(self, bench: Benchmark, impl: Callable = None): """ return ["__npb_{pr}_{a}".format(pr=self.info["prefix"], a=a) for a in bench.info["array_args"]] + - # def params(self, bench: Benchmark, impl: Callable = None): - # return list(bench.info["input_params"]) + def inout_args(self, bench: Benchmark, impl: Callable = None): + """ Generates the input/output arguments that should be checked during + validation. + :param bench: A benchmark. + :param impl: A benchmark implementation. + """ + + return ["__npb_{pr}_{a}".format(pr=self.info["prefix"], a=a) for a in bench.info["output_args"]] + def arg_str(self, bench: Benchmark, impl: Callable = None): """ Generates the argument-string that should be used for calling @@ -119,7 +127,7 @@ def out_arg_str(self, bench: Benchmark, impl: Callable = None): :param impl: A benchmark implementation. """ - output_args = self.out_args(bench, impl) + output_args = self.mutable_args(bench, impl) return ", ".join(output_args) def setup_str(self, bench: Benchmark, impl: Callable = None): From 9b626f292d0dae3cf7e7fbafa7cff9504f548c18 Mon Sep 17 00:00:00 2001 From: Alexandros Nikolaos Ziogas Date: Fri, 31 Jan 2025 18:47:54 +0100 Subject: [PATCH 7/8] Amended how (total) output arguments are returned and added assertion check to verify that all inout arguments have been added to the out list. --- npbench/infrastructure/test.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/npbench/infrastructure/test.py b/npbench/infrastructure/test.py index bccc3f7..a33dff5 100644 --- a/npbench/infrastructure/test.py +++ b/npbench/infrastructure/test.py @@ -44,7 +44,10 @@ def _execute(self, frmwrk: Framework, impl: Callable, impl_name: str, mode: str, else: out = [] if "output_args" in self.bench.info.keys(): - out += [ldict[a] for a in frmwrk.args(self.bench)] + num_return_args = len(out) + num_output_args = len(self.bench.info["output_args"]) + out += [ldict[a] for a in frmwrk.inout_args(self.bench)] + assert len(out) == num_return_args + num_output_args, "Number of output arguments does not match." return out, timelist def run(self, preset: str, validate: bool, repeat: int, timeout: float = 200.0, ignore_errors: bool = True): From 4eba4dfe2017c7ebb38badebb71ae8bda990da3f Mon Sep 17 00:00:00 2001 From: Alexandros Nikolaos Ziogas Date: Fri, 31 Jan 2025 18:48:11 +0100 Subject: [PATCH 8/8] Updated documentation about mutable and inout arguments. --- frameworks.md | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/frameworks.md b/frameworks.md index de1aecd..a816e2b 100644 --- a/frameworks.md +++ b/frameworks.md @@ -21,7 +21,8 @@ The base `Framework` class (found in [`npbench/infrastructure/framework.py`](npb - impl_files: Returns a list of the framework's implementation files for the input benchmark. Each element in the list is a tuple of the implementation filename and a description (e.g. `default` or `nopython-parallel`). - implementations: Returns a list of the framework's implementations for the input benchmark. Each element in the list is a tuple of the implementation method and a description (as above). - args: Returns a list with the names of the input arguments for running the input implementation of the input benchmark. -- out_args: Returns a list with the input arguments for running the input implementation of the input benchmark **and** have to be copied(for example, because they may be modified during benchmark execution). +- mutable_args: Returns a list with the input arguments for running the input implementation of the input benchmark **and** have to be copied(for example, because they may be modified during benchmark execution). +- inout_args: Returns a list with the input arguments that are also output, i.e., they must be validated. - arg_str: Returns the argument-string needed to call the input implementation of the input benchmark. - out_arg_str: Returns the argument-string with the input arguments that must be copied. - setup_str: Returns the setup-string of the code that should be executed for, e.g., copying data, before executing the benchmark implementation.