Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add raja perf benchmarks #2

Open
wants to merge 21 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions .gitmodules
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
[submodule "RAJAPerf"]
path = benchmarks/RAJAPerf
url = https://github.com/Olympus-HPC/RAJAPerf.git
136 changes: 100 additions & 36 deletions benchmarks.toml
Original file line number Diff line number Diff line change
@@ -1,65 +1,129 @@
[build]
[build.nvidia]
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

(not for this PR) @johnbowen42 can we have build variants too? (e.g., build.nvidia.aot)

command = ["make"]
[build.nvidia.clean]
command = "make clean"
[build.amd]
command = ["make"]
[build.amd.clean]
command = "make clean"

[adam]
[adam.nvidia]
aot = "benchmarks/hecbench/cuda/adam"
proteus = "benchmarks/hecbench/cuda/adam"
jitify = "benchmarks/hecbench/cuda-jitify/adam"
[adam.amd]
aot = "benchmarks/hecbench/hip/adam"
proteus = "benchmarks/hecbench/hip/adam"
[adam.nvidia.aot]
path = "benchmarks/hecbench/cuda/adam"
exe = "adam-aot.x"
[adam.nvidia.proteus]
path = "benchmarks/hecbench/cuda/adam"
exe = "adam-proteus.x"
[adam.nvidia.jitify]
path = "benchmarks/hecbench/cuda-jitify/adam"
exe = "adam-jitify.x"
[adam.amd.aot]
path = "benchmarks/hecbench/hip/adam"
exe = "adam-aot.x"
[adam.amd.proteus]
path = "benchmarks/hecbench/hip/adam"
exe = "adam-proteus.x"
[adam.inputs]
default = "160000 1600 1000"

[feynman-kac]
[feynman-kac.nvidia]
aot = "benchmarks/hecbench/cuda/feynman-kac"
proteus = "benchmarks/hecbench/cuda/feynman-kac"
jitify = "benchmarks/hecbench/cuda-jitify/feynman-kac"
[feynman-kac.amd]
aot = "benchmarks/hecbench/hip/feynman-kac"
proteus = "benchmarks/hecbench/hip/feynman-kac"
[feynman-kac.nvidia.aot]
path = "benchmarks/hecbench/cuda/feynman-kac"
exe = "feynman-kac-aot.x"
[feynman-kac.nvidia.proteus]
path = "benchmarks/hecbench/cuda/feynman-kac"
exe = "feynman-kac-proteus.x"
[feynman-kac.nvidia.jitify]
path = "benchmarks/hecbench/cuda-jitify/feynman-kac"
exe = "feynman-kac-jitify.x"
[feynman-kac.amd.aot]
path = "benchmarks/hecbench/hip/feynman-kac"
exe = "feynman-kac-aot.x"
[feynman-kac.amd.proteus]
path = "benchmarks/hecbench/hip/feynman-kac"
exe = "feynman-kac-proteus.x"
[feynman-kac.inputs]
default = "1"

[lulesh]
[lulesh.nvidia]
aot = "benchmarks/hecbench/cuda/LULESH"
proteus = "benchmarks/hecbench/cuda/LULESH"
jitify = "benchmarks/hecbench/cuda-jitify/LULESH"
[lulesh.amd]
aot = "benchmarks/hecbench/hip/LULESH"
proteus = "benchmarks/hecbench/hip/LULESH"
[lulesh.nvidia.aot]
path = "benchmarks/hecbench/cuda/LULESH"
exe = "lulesh-aot.x"
[lulesh.nvidia.proteus]
path = "benchmarks/hecbench/cuda/LULESH"
exe = "lulesh-proteus.x"
[lulesh.nvidia.jitify]
path = "benchmarks/hecbench/cuda-jitify/LULESH"
exe = "lulesh-jitify.x"
[lulesh.amd.aot]
path = "benchmarks/hecbench/hip/LULESH"
exe = "lulesh-aot.x"
[lulesh.amd.proteus]
path = "benchmarks/hecbench/hip/LULESH"
exe = "lulesh-proteus.x"
[lulesh.inputs]
default = "-s 128 -i 1000"

[rsbench]
[rsbench.nvidia]
aot = "benchmarks/hecbench/cuda/rsbench"
proteus = "benchmarks/hecbench/cuda/rsbench"
jitify = "benchmarks/hecbench/cuda-jitify/rsbench"
[rsbench.amd]
aot = "benchmarks/hecbench/hip/rsbench"
proteus = "benchmarks/hecbench/hip/rsbench"
[rsbench.nvidia.aot]
path = "benchmarks/hecbench/cuda/rsbench"
exe = "rsbench-aot.x"
[rsbench.nvidia.proteus]
path = "benchmarks/hecbench/cuda/rsbench"
exe = "rsbench-proteus.x"
[rsbench.nvidia.jitify]
path = "benchmarks/hecbench/cuda-jitify/rsbench"
exe = "rsbench-jitify.x"
[rsbench.amd.aot]
path = "benchmarks/hecbench/hip/rsbench"
exe = "rsbench-aot.x"
[rsbench.amd.proteus]
path = "benchmarks/hecbench/hip/rsbench"
exe = "rsbench-proteus.x"
[rsbench.inputs]
default = "-s large -m event"

[sw4ck]
[sw4ck.nvidia]
aot = "benchmarks/hecbench/cuda/sw4ck"
proteus = "benchmarks/hecbench/cuda/sw4ck"
jitify = "benchmarks/hecbench/cuda-jitify/sw4ck"
[sw4ck.amd]
aot = "benchmarks/hecbench/hip/sw4ck"
proteus = "benchmarks/hecbench/hip/sw4ck"
[sw4ck.nvidia.aot]
path = "benchmarks/hecbench/cuda/sw4ck"
exe = "sw4ck-aot.x"
[sw4ck.nvidia.proteus]
path = "benchmarks/hecbench/cuda/sw4ck"
exe = "sw4ck-proteus.x"
[sw4ck.nvidia.jitify]
path = "benchmarks/hecbench/cuda-jitify/sw4ck"
exe = "sw4ck-jitify.x"
[sw4ck.amd.aot]
path = "benchmarks/hecbench/hip/sw4ck"
exe = "sw4ck-aot.x"
[sw4ck.amd.proteus]
path = "benchmarks/hecbench/hip/sw4ck"
exe = "sw4ck-proteus.x"
[sw4ck.inputs]
default = "sw4ck.in 100"

[wsm5]
[wsm5.nvidia]
aot = "benchmarks/hecbench/cuda/wsm5"
proteus = "benchmarks/hecbench/cuda/wsm5"
jitify = "benchmarks/hecbench/cuda-jitify/wsm5"
[wsm5.amd]
aot = "benchmarks/hecbench/hip/wsm5"
proteus = "benchmarks/hecbench/hip/wsm5"
[wsm5.nvidia.aot]
path = "benchmarks/hecbench/cuda/wsm5"
exe = "wsm5-aot.x"
[wsm5.nvidia.proteus]
path = "benchmarks/hecbench/cuda/wsm5"
exe = "wsm5-proteus.x"
[wsm5.nvidia.jitify]
path = "benchmarks/hecbench/cuda-jitify/wsm5"
exe = "wsm5-jitify.x"
[wsm5.amd.aot]
path = "benchmarks/hecbench/hip/wsm5"
exe = "wsm5-aot.x"
[wsm5.amd.proteus]
path = "benchmarks/hecbench/hip/wsm5"
exe = "wsm5-proteus.x"
[wsm5.inputs]
default = "10"
1 change: 1 addition & 0 deletions benchmarks/RAJAPerf
Submodule RAJAPerf added at d185b0
91 changes: 74 additions & 17 deletions driver.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,21 @@
import tomllib


def demangle(potentially_mangled_name):
try:
p = subprocess.run(
"llvm-cxxfilt " + "\"" + potentially_mangled_name + "\"", check=True, text=True, capture_output=True, shell=True
)
except subprocess.CalledProcessError as e:
print("Failed cmd", e.cmd)
print("ret", e.returncode)
print("stdout\n", e.stdout)
print("stderr\n", e.stderr)
print(e)
raise e

return p.stdout

class ProteusConfig:
def check_valid(self, key, values):
if key not in self.valid_keys:
Expand Down Expand Up @@ -91,7 +106,7 @@ def parse(self, fn):
def get_hash(x):
try:
hash_pos = 2
return cxxfilt.demangle(x.split("$")[hash_pos])
return demangle(x.split("$")[hash_pos])
except IndexError:
return None

Expand All @@ -101,7 +116,7 @@ def get_hash(x):
df["Duration"] = df["EndNs"] - df["BeginNs"]
df["Name"] = df["Name"].str.replace(" [clone .kd]", "", regex=False)
df["Hash"] = df.Name.apply(lambda x: get_hash(x))
df["Name"] = df.Name.apply(lambda x: cxxfilt.demangle(x.split("$")[0]))
df["Name"] = df.Name.apply(lambda x: demangle(x.split("$")[0]))
return df


Expand All @@ -120,7 +135,7 @@ def parse(self, fn):
def get_hash(x):
try:
hash_pos = 2
return cxxfilt.demangle(x.split("$")[hash_pos])
return demangle(x.split("$")[hash_pos])
except IndexError:
return None

Expand All @@ -132,24 +147,35 @@ def get_hash(x):
df = df[1:]
# Nvprof with metrics tracks only kernels.
if self.metrics:
df["Kernel"] = df.Kernel.apply(lambda x: cxxfilt.demangle(x.split("$")[0]))
df["Kernel"] = df.Kernel.apply(lambda x: demangle(x.split("$")[0]))
df.rename(columns={"Kernel": "Name"}, inplace=True)
else:
df["Hash"] = df.Name.apply(lambda x: get_hash(x))
df["Name"] = df.Name.apply(lambda x: cxxfilt.demangle(x.split("$")[0]))
df["Name"] = df.Name.apply(lambda x: demangle(x.split("$")[0]))

return df


class Executor:
def __init__(self, benchmark, path, exemode, inputs, cc, proteus_path, env_configs):
def __init__(self, benchmark, path, executable_name, extra_args, exemode,
build_command, clean_command, inputs, cc, proteus_path, env_configs,
build_once, already_built):
self.benchmark = benchmark
self.path = path
self.executable_name = executable_name
self.extra_args = extra_args
self.exemode = exemode
# the build command is meant to be a full bash command to build the benchmark, eg
# `cmake -DCMAKE_BUILD_TYPE=Debug --build` or `make benchmark`
# If none is provided, it will default to `make`
self.build_command = 'make' if build_command == None else build_command
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Remove the default. User should define explicitly. We should use the TOML dictionary hierarchically, so user can provide a build command that applies to a group of programs.

self.clean_command = clean_command
self.inputs = inputs
self.cc = cc
self.proteus_path = proteus_path
self.env_configs = env_configs
self.build_once = build_once
self.already_built = already_built

def __str__(self):
return f"{self.benchmark} {self.path} {self.exemode}"
Expand Down Expand Up @@ -178,25 +204,30 @@ def execute_command(self, cmd, **kwargs):

def clean(self):
os.chdir(self.path)
cmd = "make clean"
self.execute_command(cmd)
if self.clean_command is not None:
self.execute_command(self.clean_command)

def build(self, do_jit):
os.chdir(self.path)
cmd = "make"
env = os.environ.copy()
env["ENABLE_PROTEUS"] = "yes" if do_jit else "no"
env["PROTEUS_PATH"] = self.proteus_path
env["CC"] = self.cc
if self.build_once and self.already_built:
print(self.benchmark)
return 0
t1 = time.perf_counter()
print(
"Build command",
cmd,
self.build_command,
"CC=" + env["CC"],
"PROTEUS_PATH=" + env["PROTEUS_PATH"],
"ENABLE_PROTEUS=" + env["ENABLE_PROTEUS"],
)
self.execute_command(cmd, env=env)
if not isinstance(self.build_command, list):
self.build_command = [self.build_command]
for cmd in self.build_command:
self.execute_command(cmd, env=env)
t2 = time.perf_counter()
return t2 - t1

Expand All @@ -211,12 +242,10 @@ def build_and_run(self, reps, profiler=None):
or self.exemode == "jitify"
), "Expected aot or proteus or jitify for exemode"

exe = f"{self.benchmark}-{self.exemode}.x"
self.clean()
print("BUILD", self.path, "type", self.exemode)

ctime = self.build(self.exemode != "aot")
exe_size = Path(f"{self.path}/{exe}").stat().st_size
exe_size = Path(f"{self.path}/{self.executable_name}").stat().st_size
print("=> BUILT")

for repeat in range(0, reps):
Expand All @@ -225,7 +254,7 @@ def build_and_run(self, reps, profiler=None):
cmd_env = os.environ.copy()
for k, v in env.items():
cmd_env[k] = v
cmd = f"./{exe} {args}"
cmd = f"{self.executable_name} {args} {self.extra_args}"

set_launch_bounds = (
False if env["ENV_PROTEUS_SET_LAUNCH_BOUNDS"] == "0" else True
Expand Down Expand Up @@ -461,21 +490,49 @@ def main():
env_configs = JitifyConfig().get_env_configs()
else:
raise Exception(f"Invalid exemode {args.exemode}")

proteus_install = args.proteus_path
assert os.path.exists(proteus_install), f"Error: Proteus install path '{proteus_install}' does not exist!"
for env in env_configs:
env["PROTEUS_INSTALL_PATH"] = proteus_install
Comment on lines +495 to +496
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Where is that used?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

build instructions for RAJA perf

experiments = []
build_command = None
build_once = False
already_built = False
if "build" in benchmark_configs and "build_once" in benchmark_configs["build"]:
build_once = True

# custom toml wide level build command specified
if "build" in benchmark_configs and "command" in benchmark_configs["build"][args.machine]:
build_command = benchmark_configs["build"][args.machine]["command"]
else:
raise Exception(
"Build instructions must be supplied on a toml-wide level"
)

for benchmark in args.bench if args.bench else benchmark_configs:
if benchmark == "build":
continue
config = benchmark_configs[benchmark]
extra_args = config[args.machine][args.exemode]["args"] if "args" in config[args.machine][args.exemode] else ""
clean_command = benchmark_configs["build"]["clean"] if "build" in benchmark_configs and "clean" in benchmark_configs["build"] else None
experiments.append(
Executor(
benchmark,
Path.cwd() / Path(config[args.machine][args.exemode]),
Path.cwd() / Path(config[args.machine][args.exemode]["path"]),
Path(config[args.machine][args.exemode]["exe"]),
extra_args,
args.exemode,
build_command,
clean_command,
config["inputs"],
args.compiler,
args.proteus_path,
env_configs,
build_once,
already_built
)
)
already_built = True

def gather_profiler_results(metrics):
if args.machine == "amd":
Expand Down
Loading