-
Notifications
You must be signed in to change notification settings - Fork 0
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Add raja perf benchmarks #2
base: main
Are you sure you want to change the base?
Changes from all commits
7611aa8
ffb1085
b55bbf4
2a142a8
5de6a57
6dce73a
4a54767
4ef8547
11a8f93
b97d04e
99563ad
d5e4916
8e1bd0b
030fa2c
ade5a18
8835dbf
167b85c
938e1af
afc7645
4acd192
24555c1
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,3 @@ | ||
[submodule "RAJAPerf"] | ||
path = benchmarks/RAJAPerf | ||
url = https://github.com/Olympus-HPC/RAJAPerf.git |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,65 +1,129 @@ | ||
[build] | ||
[build.nvidia] | ||
command = ["make"] | ||
[build.nvidia.clean] | ||
command = "make clean" | ||
[build.amd] | ||
command = ["make"] | ||
[build.amd.clean] | ||
command = "make clean" | ||
|
||
[adam] | ||
[adam.nvidia] | ||
aot = "benchmarks/hecbench/cuda/adam" | ||
proteus = "benchmarks/hecbench/cuda/adam" | ||
jitify = "benchmarks/hecbench/cuda-jitify/adam" | ||
[adam.amd] | ||
aot = "benchmarks/hecbench/hip/adam" | ||
proteus = "benchmarks/hecbench/hip/adam" | ||
[adam.nvidia.aot] | ||
path = "benchmarks/hecbench/cuda/adam" | ||
exe = "adam-aot.x" | ||
[adam.nvidia.proteus] | ||
path = "benchmarks/hecbench/cuda/adam" | ||
exe = "adam-proteus.x" | ||
[adam.nvidia.jitify] | ||
path = "benchmarks/hecbench/cuda-jitify/adam" | ||
exe = "adam-jitify.x" | ||
[adam.amd.aot] | ||
path = "benchmarks/hecbench/hip/adam" | ||
exe = "adam-aot.x" | ||
[adam.amd.proteus] | ||
path = "benchmarks/hecbench/hip/adam" | ||
exe = "adam-proteus.x" | ||
[adam.inputs] | ||
default = "160000 1600 1000" | ||
|
||
[feynman-kac] | ||
[feynman-kac.nvidia] | ||
aot = "benchmarks/hecbench/cuda/feynman-kac" | ||
proteus = "benchmarks/hecbench/cuda/feynman-kac" | ||
jitify = "benchmarks/hecbench/cuda-jitify/feynman-kac" | ||
[feynman-kac.amd] | ||
aot = "benchmarks/hecbench/hip/feynman-kac" | ||
proteus = "benchmarks/hecbench/hip/feynman-kac" | ||
[feynman-kac.nvidia.aot] | ||
path = "benchmarks/hecbench/cuda/feynman-kac" | ||
exe = "feynman-kac-aot.x" | ||
[feynman-kac.nvidia.proteus] | ||
path = "benchmarks/hecbench/cuda/feynman-kac" | ||
exe = "feynman-kac-proteus.x" | ||
[feynman-kac.nvidia.jitify] | ||
path = "benchmarks/hecbench/cuda-jitify/feynman-kac" | ||
exe = "feynman-kac-jitify.x" | ||
[feynman-kac.amd.aot] | ||
path = "benchmarks/hecbench/hip/feynman-kac" | ||
exe = "feynman-kac-aot.x" | ||
[feynman-kac.amd.proteus] | ||
path = "benchmarks/hecbench/hip/feynman-kac" | ||
exe = "feynman-kac-proteus.x" | ||
[feynman-kac.inputs] | ||
default = "1" | ||
|
||
[lulesh] | ||
[lulesh.nvidia] | ||
aot = "benchmarks/hecbench/cuda/LULESH" | ||
proteus = "benchmarks/hecbench/cuda/LULESH" | ||
jitify = "benchmarks/hecbench/cuda-jitify/LULESH" | ||
[lulesh.amd] | ||
aot = "benchmarks/hecbench/hip/LULESH" | ||
proteus = "benchmarks/hecbench/hip/LULESH" | ||
[lulesh.nvidia.aot] | ||
path = "benchmarks/hecbench/cuda/LULESH" | ||
exe = "lulesh-aot.x" | ||
[lulesh.nvidia.proteus] | ||
path = "benchmarks/hecbench/cuda/LULESH" | ||
exe = "lulesh-proteus.x" | ||
[lulesh.nvidia.jitify] | ||
path = "benchmarks/hecbench/cuda-jitify/LULESH" | ||
exe = "lulesh-jitify.x" | ||
[lulesh.amd.aot] | ||
path = "benchmarks/hecbench/hip/LULESH" | ||
exe = "lulesh-aot.x" | ||
[lulesh.amd.proteus] | ||
path = "benchmarks/hecbench/hip/LULESH" | ||
exe = "lulesh-proteus.x" | ||
[lulesh.inputs] | ||
default = "-s 128 -i 1000" | ||
|
||
[rsbench] | ||
[rsbench.nvidia] | ||
aot = "benchmarks/hecbench/cuda/rsbench" | ||
proteus = "benchmarks/hecbench/cuda/rsbench" | ||
jitify = "benchmarks/hecbench/cuda-jitify/rsbench" | ||
[rsbench.amd] | ||
aot = "benchmarks/hecbench/hip/rsbench" | ||
proteus = "benchmarks/hecbench/hip/rsbench" | ||
[rsbench.nvidia.aot] | ||
path = "benchmarks/hecbench/cuda/rsbench" | ||
exe = "rsbench-aot.x" | ||
[rsbench.nvidia.proteus] | ||
path = "benchmarks/hecbench/cuda/rsbench" | ||
exe = "rsbench-proteus.x" | ||
[rsbench.nvidia.jitify] | ||
path = "benchmarks/hecbench/cuda-jitify/rsbench" | ||
exe = "rsbench-jitify.x" | ||
[rsbench.amd.aot] | ||
path = "benchmarks/hecbench/hip/rsbench" | ||
exe = "rsbench-aot.x" | ||
[rsbench.amd.proteus] | ||
path = "benchmarks/hecbench/hip/rsbench" | ||
exe = "rsbench-proteus.x" | ||
[rsbench.inputs] | ||
default = "-s large -m event" | ||
|
||
[sw4ck] | ||
[sw4ck.nvidia] | ||
aot = "benchmarks/hecbench/cuda/sw4ck" | ||
proteus = "benchmarks/hecbench/cuda/sw4ck" | ||
jitify = "benchmarks/hecbench/cuda-jitify/sw4ck" | ||
[sw4ck.amd] | ||
aot = "benchmarks/hecbench/hip/sw4ck" | ||
proteus = "benchmarks/hecbench/hip/sw4ck" | ||
[sw4ck.nvidia.aot] | ||
path = "benchmarks/hecbench/cuda/sw4ck" | ||
exe = "sw4ck-aot.x" | ||
[sw4ck.nvidia.proteus] | ||
path = "benchmarks/hecbench/cuda/sw4ck" | ||
exe = "sw4ck-proteus.x" | ||
[sw4ck.nvidia.jitify] | ||
path = "benchmarks/hecbench/cuda-jitify/sw4ck" | ||
exe = "sw4ck-jitify.x" | ||
[sw4ck.amd.aot] | ||
path = "benchmarks/hecbench/hip/sw4ck" | ||
exe = "sw4ck-aot.x" | ||
[sw4ck.amd.proteus] | ||
path = "benchmarks/hecbench/hip/sw4ck" | ||
exe = "sw4ck-proteus.x" | ||
[sw4ck.inputs] | ||
default = "sw4ck.in 100" | ||
|
||
[wsm5] | ||
[wsm5.nvidia] | ||
aot = "benchmarks/hecbench/cuda/wsm5" | ||
proteus = "benchmarks/hecbench/cuda/wsm5" | ||
jitify = "benchmarks/hecbench/cuda-jitify/wsm5" | ||
[wsm5.amd] | ||
aot = "benchmarks/hecbench/hip/wsm5" | ||
proteus = "benchmarks/hecbench/hip/wsm5" | ||
[wsm5.nvidia.aot] | ||
path = "benchmarks/hecbench/cuda/wsm5" | ||
exe = "wsm5-aot.x" | ||
[wsm5.nvidia.proteus] | ||
path = "benchmarks/hecbench/cuda/wsm5" | ||
exe = "wsm5-proteus.x" | ||
[wsm5.nvidia.jitify] | ||
path = "benchmarks/hecbench/cuda-jitify/wsm5" | ||
exe = "wsm5-jitify.x" | ||
[wsm5.amd.aot] | ||
path = "benchmarks/hecbench/hip/wsm5" | ||
exe = "wsm5-aot.x" | ||
[wsm5.amd.proteus] | ||
path = "benchmarks/hecbench/hip/wsm5" | ||
exe = "wsm5-proteus.x" | ||
[wsm5.inputs] | ||
default = "10" |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -14,6 +14,21 @@ | |
import tomllib | ||
|
||
|
||
def demangle(potentially_mangled_name): | ||
try: | ||
p = subprocess.run( | ||
"llvm-cxxfilt " + "\"" + potentially_mangled_name + "\"", check=True, text=True, capture_output=True, shell=True | ||
) | ||
except subprocess.CalledProcessError as e: | ||
print("Failed cmd", e.cmd) | ||
print("ret", e.returncode) | ||
print("stdout\n", e.stdout) | ||
print("stderr\n", e.stderr) | ||
print(e) | ||
raise e | ||
|
||
return p.stdout | ||
|
||
class ProteusConfig: | ||
def check_valid(self, key, values): | ||
if key not in self.valid_keys: | ||
|
@@ -91,7 +106,7 @@ def parse(self, fn): | |
def get_hash(x): | ||
try: | ||
hash_pos = 2 | ||
return cxxfilt.demangle(x.split("$")[hash_pos]) | ||
return demangle(x.split("$")[hash_pos]) | ||
except IndexError: | ||
return None | ||
|
||
|
@@ -101,7 +116,7 @@ def get_hash(x): | |
df["Duration"] = df["EndNs"] - df["BeginNs"] | ||
df["Name"] = df["Name"].str.replace(" [clone .kd]", "", regex=False) | ||
df["Hash"] = df.Name.apply(lambda x: get_hash(x)) | ||
df["Name"] = df.Name.apply(lambda x: cxxfilt.demangle(x.split("$")[0])) | ||
df["Name"] = df.Name.apply(lambda x: demangle(x.split("$")[0])) | ||
return df | ||
|
||
|
||
|
@@ -120,7 +135,7 @@ def parse(self, fn): | |
def get_hash(x): | ||
try: | ||
hash_pos = 2 | ||
return cxxfilt.demangle(x.split("$")[hash_pos]) | ||
return demangle(x.split("$")[hash_pos]) | ||
except IndexError: | ||
return None | ||
|
||
|
@@ -132,24 +147,35 @@ def get_hash(x): | |
df = df[1:] | ||
# Nvprof with metrics tracks only kernels. | ||
if self.metrics: | ||
df["Kernel"] = df.Kernel.apply(lambda x: cxxfilt.demangle(x.split("$")[0])) | ||
df["Kernel"] = df.Kernel.apply(lambda x: demangle(x.split("$")[0])) | ||
df.rename(columns={"Kernel": "Name"}, inplace=True) | ||
else: | ||
df["Hash"] = df.Name.apply(lambda x: get_hash(x)) | ||
df["Name"] = df.Name.apply(lambda x: cxxfilt.demangle(x.split("$")[0])) | ||
df["Name"] = df.Name.apply(lambda x: demangle(x.split("$")[0])) | ||
|
||
return df | ||
|
||
|
||
class Executor: | ||
def __init__(self, benchmark, path, exemode, inputs, cc, proteus_path, env_configs): | ||
def __init__(self, benchmark, path, executable_name, extra_args, exemode, | ||
build_command, clean_command, inputs, cc, proteus_path, env_configs, | ||
build_once, already_built): | ||
self.benchmark = benchmark | ||
self.path = path | ||
self.executable_name = executable_name | ||
self.extra_args = extra_args | ||
self.exemode = exemode | ||
# the build command is meant to be a full bash command to build the benchmark, eg | ||
# `cmake -DCMAKE_BUILD_TYPE=Debug --build` or `make benchmark` | ||
# If none is provided, it will default to `make` | ||
self.build_command = 'make' if build_command == None else build_command | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Remove the default. User should define explicitly. We should use the TOML dictionary hierarchically, so user can provide a build command that applies to a group of programs. |
||
self.clean_command = clean_command | ||
self.inputs = inputs | ||
self.cc = cc | ||
self.proteus_path = proteus_path | ||
self.env_configs = env_configs | ||
self.build_once = build_once | ||
self.already_built = already_built | ||
|
||
def __str__(self): | ||
return f"{self.benchmark} {self.path} {self.exemode}" | ||
|
@@ -178,25 +204,30 @@ def execute_command(self, cmd, **kwargs): | |
|
||
def clean(self): | ||
os.chdir(self.path) | ||
cmd = "make clean" | ||
self.execute_command(cmd) | ||
if self.clean_command is not None: | ||
self.execute_command(self.clean_command) | ||
|
||
def build(self, do_jit): | ||
os.chdir(self.path) | ||
cmd = "make" | ||
env = os.environ.copy() | ||
env["ENABLE_PROTEUS"] = "yes" if do_jit else "no" | ||
env["PROTEUS_PATH"] = self.proteus_path | ||
env["CC"] = self.cc | ||
if self.build_once and self.already_built: | ||
print(self.benchmark) | ||
return 0 | ||
t1 = time.perf_counter() | ||
print( | ||
"Build command", | ||
cmd, | ||
self.build_command, | ||
"CC=" + env["CC"], | ||
"PROTEUS_PATH=" + env["PROTEUS_PATH"], | ||
"ENABLE_PROTEUS=" + env["ENABLE_PROTEUS"], | ||
) | ||
self.execute_command(cmd, env=env) | ||
if not isinstance(self.build_command, list): | ||
self.build_command = [self.build_command] | ||
for cmd in self.build_command: | ||
self.execute_command(cmd, env=env) | ||
t2 = time.perf_counter() | ||
return t2 - t1 | ||
|
||
|
@@ -211,12 +242,10 @@ def build_and_run(self, reps, profiler=None): | |
or self.exemode == "jitify" | ||
), "Expected aot or proteus or jitify for exemode" | ||
|
||
exe = f"{self.benchmark}-{self.exemode}.x" | ||
self.clean() | ||
print("BUILD", self.path, "type", self.exemode) | ||
|
||
ctime = self.build(self.exemode != "aot") | ||
exe_size = Path(f"{self.path}/{exe}").stat().st_size | ||
exe_size = Path(f"{self.path}/{self.executable_name}").stat().st_size | ||
print("=> BUILT") | ||
|
||
for repeat in range(0, reps): | ||
|
@@ -225,7 +254,7 @@ def build_and_run(self, reps, profiler=None): | |
cmd_env = os.environ.copy() | ||
for k, v in env.items(): | ||
cmd_env[k] = v | ||
cmd = f"./{exe} {args}" | ||
cmd = f"{self.executable_name} {args} {self.extra_args}" | ||
|
||
set_launch_bounds = ( | ||
False if env["ENV_PROTEUS_SET_LAUNCH_BOUNDS"] == "0" else True | ||
|
@@ -461,21 +490,49 @@ def main(): | |
env_configs = JitifyConfig().get_env_configs() | ||
else: | ||
raise Exception(f"Invalid exemode {args.exemode}") | ||
|
||
proteus_install = args.proteus_path | ||
assert os.path.exists(proteus_install), f"Error: Proteus install path '{proteus_install}' does not exist!" | ||
for env in env_configs: | ||
env["PROTEUS_INSTALL_PATH"] = proteus_install | ||
Comment on lines
+495
to
+496
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Where is that used? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. build instructions for RAJA perf |
||
experiments = [] | ||
build_command = None | ||
build_once = False | ||
already_built = False | ||
if "build" in benchmark_configs and "build_once" in benchmark_configs["build"]: | ||
build_once = True | ||
|
||
# custom toml wide level build command specified | ||
if "build" in benchmark_configs and "command" in benchmark_configs["build"][args.machine]: | ||
build_command = benchmark_configs["build"][args.machine]["command"] | ||
else: | ||
raise Exception( | ||
"Build instructions must be supplied on a toml-wide level" | ||
) | ||
|
||
for benchmark in args.bench if args.bench else benchmark_configs: | ||
if benchmark == "build": | ||
continue | ||
config = benchmark_configs[benchmark] | ||
extra_args = config[args.machine][args.exemode]["args"] if "args" in config[args.machine][args.exemode] else "" | ||
clean_command = benchmark_configs["build"]["clean"] if "build" in benchmark_configs and "clean" in benchmark_configs["build"] else None | ||
experiments.append( | ||
Executor( | ||
benchmark, | ||
Path.cwd() / Path(config[args.machine][args.exemode]), | ||
Path.cwd() / Path(config[args.machine][args.exemode]["path"]), | ||
Path(config[args.machine][args.exemode]["exe"]), | ||
extra_args, | ||
args.exemode, | ||
build_command, | ||
clean_command, | ||
config["inputs"], | ||
args.compiler, | ||
args.proteus_path, | ||
env_configs, | ||
build_once, | ||
already_built | ||
) | ||
) | ||
already_built = True | ||
|
||
def gather_profiler_results(metrics): | ||
if args.machine == "amd": | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
(not for this PR) @johnbowen42 can we have build variants too? (e.g., build.nvidia.aot)