Olympus-HPC · johnbowen42 · Jan 23, 2025 · Jan 31, 2025 · Jan 31, 2025 · Feb 1, 2025
diff --git a/.gitmodules b/.gitmodules
@@ -0,0 +1,3 @@
+[submodule "RAJAPerf"]
+	path = benchmarks/RAJAPerf
+	url = https://github.com/Olympus-HPC/RAJAPerf.git
diff --git a/benchmarks.toml b/benchmarks.toml
@@ -1,65 +1,119 @@
 [adam]
 [adam.nvidia]
-aot = "benchmarks/hecbench/cuda/adam"
-proteus = "benchmarks/hecbench/cuda/adam"
-jitify = "benchmarks/hecbench/cuda-jitify/adam"
-[adam.amd]
-aot = "benchmarks/hecbench/hip/adam"
-proteus = "benchmarks/hecbench/hip/adam"
+[adam.nvidia.aot]
+path = "benchmarks/hecbench/cuda/adam"
+exe = "adam-aot.x"
+[adam.nvidia.proteus]
+path = "benchmarks/hecbench/cuda/adam"
+exe = "adam-proteus.x"
+[adam.nvidia.jitify]
+path = "benchmarks/hecbench/cuda-jitify/adam"
+exe = "adam-jitify.x"
+[adam.amd.aot]
+path = "benchmarks/hecbench/hip/adam"
+exe = "adam-aot.x"
+[adam.amd.proteus]
+path = "benchmarks/hecbench/hip/adam"
+exe = "adam-proteus.x"
 [adam.inputs]
 default = "160000 1600 1000"
 
 [feynman-kac]
 [feynman-kac.nvidia]
-aot = "benchmarks/hecbench/cuda/feynman-kac"
-proteus = "benchmarks/hecbench/cuda/feynman-kac"
-jitify = "benchmarks/hecbench/cuda-jitify/feynman-kac"
-[feynman-kac.amd]
-aot = "benchmarks/hecbench/hip/feynman-kac"
-proteus = "benchmarks/hecbench/hip/feynman-kac"
+[feynman-kac.nvidia.aot]
+path = "benchmarks/hecbench/cuda/feynman-kac"
+exe = "kac-aot.x"
+[feynman-kac.nvidia.proteus]
+path = "benchmarks/hecbench/cuda/feynman-kac"
+exe = "kac-proteus.x"
+[feynman-kac.nvidia.jitify]
+path = "benchmarks/hecbench/cuda-jitify/feynman-kac"
+exe = "kac-jitify.x"
+[feynman-kac.amd.aot]
+path = "benchmarks/hecbench/hip/feynman-kac"
+exe = "kac-aot.x"
+[feynman-kac.amd.proteus]
+path = "benchmarks/hecbench/hip/feynman-kac"
+exe = "kac-proteus.x"
 [feynman-kac.inputs]
 default = "1"
 
 [lulesh]
 [lulesh.nvidia]
-aot = "benchmarks/hecbench/cuda/LULESH"
-proteus = "benchmarks/hecbench/cuda/LULESH"
-jitify = "benchmarks/hecbench/cuda-jitify/LULESH"
-[lulesh.amd]
-aot = "benchmarks/hecbench/hip/LULESH"
-proteus = "benchmarks/hecbench/hip/LULESH"
+[lulesh.nvidia.aot]
+path = "benchmarks/hecbench/cuda/LULESH"
+exe = "LULESH-aot.x"
+[lulesh.nvidia.proteus]
+path = "benchmarks/hecbench/cuda/LULESH"
+exe = "LULESH-proteus.x"
+[lulesh.nvidia.jitify]
+path = "benchmarks/hecbench/cuda-jitify/LULESH"
+exe = "LULESH-jitify.x"
+[lulesh.amd.aot]
+path = "benchmarks/hecbench/hip/LULESH"
+exe = "LULESH-aot.x"
+[lulesh.amd.proteus]
+path = "benchmarks/hecbench/hip/LULESH"
+exe = "LULESH-proteus.x"
 [lulesh.inputs]
 default = "-s 128 -i 1000"
 
 [rsbench]
 [rsbench.nvidia]
-aot = "benchmarks/hecbench/cuda/rsbench"
-proteus = "benchmarks/hecbench/cuda/rsbench"
-jitify = "benchmarks/hecbench/cuda-jitify/rsbench"
-[rsbench.amd]
-aot = "benchmarks/hecbench/hip/rsbench"
-proteus = "benchmarks/hecbench/hip/rsbench"
+[rsbench.nvidia.aot]
+path = "benchmarks/hecbench/cuda/rsbench"
+exe = "rsbench-aot.x"
+[rsbench.nvidia.proteus]
+path = "benchmarks/hecbench/cuda/rsbench"
+exe = "rsbench-proteus.x"
+[rsbench.nvidia.jitify]
+path = "benchmarks/hecbench/cuda-jitify/rsbench"
+exe = "rsbench-jitify.x"
+[rsbench.amd.aot]
+path = "benchmarks/hecbench/hip/rsbench"
+exe = "rsbench-aot.x"
+[rsbench.amd.proteus]
+path = "benchmarks/hecbench/hip/rsbench"
+exe = "rsbench-proteus.x"
 [rsbench.inputs]
 default = "-s large -m event"
 
 [sw4ck]
 [sw4ck.nvidia]
-aot = "benchmarks/hecbench/cuda/sw4ck"
-proteus = "benchmarks/hecbench/cuda/sw4ck"
-jitify = "benchmarks/hecbench/cuda-jitify/sw4ck"
-[sw4ck.amd]
-aot = "benchmarks/hecbench/hip/sw4ck"
-proteus = "benchmarks/hecbench/hip/sw4ck"
+[sw4ck.nvidia.aot]
+path = "benchmarks/hecbench/cuda/sw4ck"
+exe = "sw4ck-aot.x"
+[sw4ck.nvidia.proteus]
+path = "benchmarks/hecbench/cuda/sw4ck"
+exe = "sw4ck-proteus.x"
+[sw4ck.nvidia.jitify]
+path = "benchmarks/hecbench/cuda-jitify/sw4ck"
+exe = "sw4ck-jitify.x"
+[sw4ck.amd.aot]
+path = "benchmarks/hecbench/hip/sw4ck"
+exe = "sw4ck-aot.x"
+[sw4ck.amd.proteus]
+path = "benchmarks/hecbench/hip/sw4ck"
+exe = "sw4ck-proteus.x"
 [sw4ck.inputs]
 default = "sw4ck.in 100"
 
 [wsm5]
 [wsm5.nvidia]
-aot = "benchmarks/hecbench/cuda/wsm5"
-proteus = "benchmarks/hecbench/cuda/wsm5"
-jitify = "benchmarks/hecbench/cuda-jitify/wsm5"
-[wsm5.amd]
-aot = "benchmarks/hecbench/hip/wsm5"
-proteus = "benchmarks/hecbench/hip/wsm5"
+[wsm5.nvidia.aot]
+path = "benchmarks/hecbench/cuda/wsm5"
+exe = "wsm5-aot.x"
+[wsm5.nvidia.proteus]
+path = "benchmarks/hecbench/cuda/wsm5"
+exe = "wsm5-proteus.x"
+[wsm5.nvidia.jitify]
+path = "benchmarks/hecbench/cuda-jitify/wsm5"
+exe = "wsm5-jitify.x"
+[wsm5.amd.aot]
+path = "benchmarks/hecbench/hip/wsm5"
+exe = "wsm5-aot.x"
+[wsm5.amd.proteus]
+path = "benchmarks/hecbench/hip/wsm5"
+exe = "wsm5-proteus.x"
 [wsm5.inputs]
 default = "10"
diff --git a/benchmarks/RAJAPerf b/benchmarks/RAJAPerf
diff --git a/driver.py b/driver.py
@@ -142,10 +142,17 @@ def get_hash(x):
 
 
 class Executor:
-    def __init__(self, benchmark, path, exemode, inputs, cc, proteus_path, env_configs):
+    def __init__(self, benchmark, path, executable_name, extra_args, exemode,
+                build_command, inputs, cc, proteus_path, env_configs):
         self.benchmark = benchmark
         self.path = path
+        self.executable_name = executable_name
+        self.extra_args = extra_args
         self.exemode = exemode
+        # the build command is meant to be a full bash command to build the benchmark, eg
+        # `cmake -DCMAKE_BUILD_TYPE=Debug --build` or `make benchmark`
+        # If none is provided, it will default to `make`
+        self.build_command = 'make' if build_command == None else build_command
         self.inputs = inputs
         self.cc = cc
         self.proteus_path = proteus_path
@@ -183,20 +190,22 @@ def clean(self):
 
     def build(self, do_jit):
         os.chdir(self.path)
-        cmd = "make"
         env = os.environ.copy()
         env["ENABLE_PROTEUS"] = "yes" if do_jit else "no"
         env["PROTEUS_PATH"] = self.proteus_path
         env["CC"] = self.cc
         t1 = time.perf_counter()
         print(
             "Build command",
-            cmd,
+            self.build_command,
             "CC=" + env["CC"],
             "PROTEUS_PATH=" + env["PROTEUS_PATH"],
             "ENABLE_PROTEUS=" + env["ENABLE_PROTEUS"],
         )
-        self.execute_command(cmd, env=env)
+        if not isinstance(self.build_command, list):
+            self.build_command = [self.build_command]
+        for cmd in self.build_command:
+            self.execute_command(cmd, env=env)
         t2 = time.perf_counter()
         return t2 - t1
 
@@ -211,12 +220,10 @@ def build_and_run(self, reps, profiler=None):
             or self.exemode == "jitify"
         ), "Expected aot or proteus or jitify for exemode"
 
-        exe = f"{self.benchmark}-{self.exemode}.x"
-        self.clean()
+        #self.clean()
         print("BUILD", self.path, "type", self.exemode)
-
         ctime = self.build(self.exemode != "aot")
-        exe_size = Path(f"{self.path}/{exe}").stat().st_size
+        exe_size = Path(f"{self.path}/{self.executable_name}").stat().st_size
         print("=> BUILT")
 
         for repeat in range(0, reps):
@@ -225,7 +232,7 @@ def build_and_run(self, reps, profiler=None):
                     cmd_env = os.environ.copy()
                     for k, v in env.items():
                         cmd_env[k] = v
-                    cmd = f"./{exe} {args}"
+                    cmd = f"{self.executable_name} {args} {self.extra_args}"
 
                     set_launch_bounds = (
                         False if env["ENV_PROTEUS_SET_LAUNCH_BOUNDS"] == "0" else True
@@ -461,19 +468,34 @@ def main():
         env_configs = JitifyConfig().get_env_configs()
     else:
         raise Exception(f"Invalid exemode {args.exemode}")
-
+    proteus_install = args.proteus_path
+    assert os.path.exists(proteus_install), f"Error: Proteus install path '{proteus_install}' does not exist!"
+    for env in env_configs:
+        env["PROTEUS_INSTALL_PATH"] = proteus_install
     experiments = []
+    build_command = None
+    build_once = False
+    # custom toml wide level build command specified
+    if "build" in benchmark_configs:
+        build_command = benchmark_configs["build"][args.machine]["command"]
+        build_once = True
+
     for benchmark in args.bench if args.bench else benchmark_configs:
+        if benchmark == "build":
+            continue
         config = benchmark_configs[benchmark]
         experiments.append(
             Executor(
                 benchmark,
-                Path.cwd() / Path(config[args.machine][args.exemode]),
+                Path.cwd() / Path(config[args.machine][args.exemode]["path"]),
+                Path(config[args.machine][args.exemode]["exe"]),
+                config[args.machine][args.exemode]["args"],
                 args.exemode,
+                build_command,
                 config["inputs"],
                 args.compiler,
                 args.proteus_path,
-                env_configs,
+                env_configs
             )
         )