Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add hipblaslt to the project. #24

Merged
merged 2 commits into from
Jan 23, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 4 additions & 1 deletion CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@ message(STATUS "ROCM_GIT_DIR is set to: ${ROCM_GIT_DIR}")

# Library specific enable flags.
option(THEROCK_ENABLE_RCCL "Enable the comm_libs/rccl sub-project" ON)
option(THEROCK_ENABLE_MATH_LIBS "Enable building of math libraries" ON)

# Initialize the install directory.
if(CMAKE_INSTALL_PREFIX_INITIALIZED_TO_DEFAULT)
Expand Down Expand Up @@ -78,7 +79,9 @@ add_subdirectory(base)
add_subdirectory(compiler)
add_subdirectory(core)
add_subdirectory(comm-libs)

if(THEROCK_ENABLE_MATH_LIBS)
add_subdirectory(math-libs)
endif()

# ################################################################################
# # Testing
Expand Down
3 changes: 3 additions & 0 deletions build_tools/fetch_sources.py
Original file line number Diff line number Diff line change
Expand Up @@ -129,6 +129,9 @@ def main(argv):
"rocminfo",
"rocprofiler-register",
"ROCR-Runtime",
# Math Libraries
"hipBLAS-common",
"hipBLASLt",
],
)
args = parser.parse_args(argv)
Expand Down
7 changes: 7 additions & 0 deletions cmake/therock_subproject.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -484,6 +484,13 @@ function(therock_cmake_subproject_glob_c_sources target_name)
target_sources("${target_name}" PRIVATE ${_files})
endfunction()

# Gets the dist/ directory for a sub-project.
function(therock_cmake_subproject_dist_dir out_var target_name)
_therock_assert_is_cmake_subproject("${target_name}")
get_target_property(_dir "${target_name}" THEROCK_DIST_DIR)
set("${out_var}" "${_dir}" PARENT_SCOPE)
endfunction()

# Merges all compile_commands.json files and exports them.
function(therock_subproject_merge_compile_commands)
if(NOT CMAKE_EXPORT_COMPILE_COMMANDS)
Expand Down
2 changes: 1 addition & 1 deletion cmake/therock_subproject_dep_provider.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,7 @@ function(therock_reparse_super_project_find_package superproject_path package_na
endif()

set(_rewritten ${UNUSED_UNPARSED_ARGUMENTS})
list(APPEND _rewritten CONFIG BYPASS_PROVIDER NO_DEFAULT_PATH PATHS ${superproject_path})
list(APPEND _rewritten BYPASS_PROVIDER NO_DEFAULT_PATH PATHS ${superproject_path})
list(JOIN _rewritten " " _rewritten_pretty)
message(STATUS "Resolving super-project find_package(${_rewritten_pretty})")
set(_therock_rewritten_superproject_find_package_sig ${_rewritten} PARENT_SCOPE)
Expand Down
64 changes: 64 additions & 0 deletions math-libs/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,64 @@
# Many things need to be configured with paths to additional LLVM tools.
# If a project is using an amd-hip or amd-llvm toolchain, then it will already
# have an implicit dep on the toolchain, so it is safe to reference binaries
# here without worrying about build deps.
therock_cmake_subproject_dist_dir(_toolchain_dir amd-llvm)
cmake_path(APPEND _toolchain_dir lib/llvm)

#################################################################################
# hipBLAS-common
#################################################################################

therock_cmake_subproject_declare(hipBLAS-common
EXTERNAL_SOURCE_DIR "hipBLAS-common"
CMAKE_ARGS
-DROCM_PATH=
-DROCM_DIR=
COMPILER_TOOLCHAIN
amd-hip
IGNORE_PACKAGES
# The current version of rccl needs to download a 2y old version of rocm-cmake
# to work and it will only do so if the system resolver reports it not found
# without any other error (which due to the signature, our resolver will
# hard fail). Once fixed, `rocm-core` should be added to the BUILD_DEPS
# and this removed: https://github.com/nod-ai/TheRock/issues/18
ROCM
RUNTIME_DEPS
hip-clr
)
therock_cmake_subproject_glob_c_sources(hipBLAS-common
SUBDIRS
library
)
therock_cmake_subproject_provide_package(hipBLAS-common hipblas-common lib/cmake/hipblas-common)
therock_cmake_subproject_activate(hipBLAS-common)

#################################################################################
# hipBLASLt
#################################################################################

therock_cmake_subproject_declare(hipBLASLt
EXTERNAL_SOURCE_DIR "hipBLASLt"
CMAKE_ARGS
-DHIP_PLATFORM=amd
-DROCM_PATH=
-DROCM_DIR=
-DTensile_LOGIC=
-DTensile_CODE_OBJECT_VERSION=default
-DTensile_CPU_THREADS=
-DTensile_LIBRARY_FORMAT=msgpack
"-DTensile_ROCM_ASSEMBLER_PATH=${_toolchain_dir}/bin/clang++"
"-DTensile_ROCM_OFFLOAD_BUNDLER_PATH=${_toolchain_dir}/bin/clang-offload-bundler"
# TODO: Enable clients.
COMPILER_TOOLCHAIN
amd-hip
BUILD_DEPS
hipBLAS-common
RUNTIME_DEPS
hip-clr
)
therock_cmake_subproject_glob_c_sources(hipBLASLt
SUBDIRS
.
)
therock_cmake_subproject_activate(hipBLASLt)
1 change: 1 addition & 0 deletions math-libs/hipBLAS-common
1 change: 1 addition & 0 deletions math-libs/hipBLASLt
9 changes: 9 additions & 0 deletions math-libs/post_hook_hipBLASLt.cmake
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
list(APPEND CMAKE_MODULE_PATH "${THEROCK_SOURCE_DIR}/cmake")
include(therock_rpath)

therock_set_install_rpath(
TARGETS
hipblaslt
PATHS
.
)
3 changes: 3 additions & 0 deletions math-libs/pre_hook_hipBLASLt.cmake
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
# Tensile needs to be explicitly configured with boilerplate flags needed to
# make the toolchain work. These come from our toolchain init.
set(Tensile_TOOLCHAIN_FLAGS "${CMAKE_CXX_FLAGS_INIT}")
Original file line number Diff line number Diff line change
@@ -0,0 +1,194 @@
From 2deed2807ef34fda8e713f1fcc38bfbcec1f759e Mon Sep 17 00:00:00 2001
From: Stella Laurenzo <[email protected]>
Date: Thu, 23 Jan 2025 14:07:26 -0800
Subject: [PATCH] Pin toolchain environment variables from configure->build
time.

Tensile uses a number of environment variables for controlling how it finds/uses its toolchain. However, there is no reliable way to pass these to hipBLASLt configure and have them stick at build time. This leads to all kinds of fallback paths on /opt/rocm and other issues.

This patch:

* Fixes the Tensile add_custom_commands to launch commands in an `env` with critical variables set.
* Uses project-consistent environment variables to find clang/assembler vs in two scripts vs hardcoding to /opt/rocm or requiring a ROCM_PATH.
* Adds CMake cache variables `Tensile_ROCM_OFFLOAD_BUNDLER_PATH` and `Tensile_ROCM_ASSEMBLER_PATH` to explicitly allow controlling these paths via configuration.
* Adds a `Tensile_TOOLCHAIN_FLAGS` env var and CMake setting. These flags will be prepended to any C/C++ compiler invocations and are used for explicit control of hip and bitcode library toolchain options (i.e. avoids spurious fallback to search heuristics that are less precise).
---
CMakeLists.txt | 3 ++
.../src/kernels/CompileSourceKernel.cmake | 12 +++--
.../src/kernels/compile_code_object.sh | 5 +-
tensilelite/Tensile/Ops/gen_assembly.sh | 2 +-
tensilelite/Tensile/TensileCreateLibrary.py | 5 +-
tensilelite/Tensile/cmake/TensileConfig.cmake | 48 ++++++++++++++++++-
6 files changed, 65 insertions(+), 10 deletions(-)

diff --git a/CMakeLists.txt b/CMakeLists.txt
index 68880a9b..f5c4532f 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -196,6 +196,9 @@ else()
set( Tensile_COMPILER "amdclang++" CACHE STRING "Tensile compiler")
set( Tensile_LIBRARY_FORMAT "msgpack" CACHE STRING "Tensile library format")
set( Tensile_CPU_THREADS "" CACHE STRING "Number of threads for Tensile parallel build")
+ set( Tensile_ROCM_OFFLOAD_BUNDLER_PATH "" CACHE STRING "Path to clang-offload-bundler (or auto-detect)")
+ set( Tensile_ROCM_ASSEMBLER_PATH "" CACHE STRING "Path to a rocm assembler driver (or auto-detect)")
+ set( Tensile_TOOLCHAIN_FLAGS "" CACHE STRING "Flags that must be passed to tensile-invoked compilers/assemblers")

option( Tensile_MERGE_FILES "Tensile to merge kernels and solutions files?" ON )
option( Tensile_SHORT_FILENAMES "Tensile to use short file names? Use if compiler complains they're too long." OFF )
diff --git a/library/src/amd_detail/rocblaslt/src/kernels/CompileSourceKernel.cmake b/library/src/amd_detail/rocblaslt/src/kernels/CompileSourceKernel.cmake
index 13cf5bd2..83467d51 100644
--- a/library/src/amd_detail/rocblaslt/src/kernels/CompileSourceKernel.cmake
+++ b/library/src/amd_detail/rocblaslt/src/kernels/CompileSourceKernel.cmake
@@ -30,7 +30,13 @@ function(CompileSourceKernel source archs buildIdKind outputFolder)
add_custom_target(MatrixTransformKernels ALL
DEPENDS ${outputFolder}/hipblasltTransform.hsaco
VERBATIM)
- add_custom_command(OUTPUT ${outputFolder}/hipblasltTransform.hsaco
- COMMAND bash ${CMAKE_CURRENT_SOURCE_DIR}/src/amd_detail/rocblaslt/src/kernels/compile_code_object.sh ${source} ${archs} ${CMAKE_BUILD_TYPE} ${buildIdKind} ${outputFolder}/hipblasltTransform.hsaco
- COMMENT "Compiling source kernels")
+ add_custom_command(
+ OUTPUT ${outputFolder}/hipblasltTransform.hsaco
+ COMMAND
+ # See script for environment variables it uses.
+ ${CMAKE_COMMAND} -E env
+ "CMAKE_CXX_COMPILER=${CMAKE_CXX_COMPILER}"
+ "Tensile_TOOLCHAIN_FLAGS=${Tensile_TOOLCHAIN_FLAGS}"
+ bash ${CMAKE_CURRENT_SOURCE_DIR}/src/amd_detail/rocblaslt/src/kernels/compile_code_object.sh ${source} ${archs} ${CMAKE_BUILD_TYPE} ${buildIdKind} ${outputFolder}/hipblasltTransform.hsaco
+ COMMENT "Compiling source kernels")
endfunction()
\ No newline at end of file
diff --git a/library/src/amd_detail/rocblaslt/src/kernels/compile_code_object.sh b/library/src/amd_detail/rocblaslt/src/kernels/compile_code_object.sh
index 4ff71ea0..ecd65b01 100644
--- a/library/src/amd_detail/rocblaslt/src/kernels/compile_code_object.sh
+++ b/library/src/amd_detail/rocblaslt/src/kernels/compile_code_object.sh
@@ -34,5 +34,6 @@ elif [ "$build_type" = "Debug" ]; then
fi

rocm_path="${ROCM_PATH:-/opt/rocm}"
-clang_path="${rocm_path}/bin/amdclang++"
-$clang_path -x hip "$sources" --offload-arch="${archs}" -c --offload-device-only -Xoffload-linker --build-id=$build_id_kind $additional_options -o "$dest"
\ No newline at end of file
+clang_path="${CMAKE_CXX_COMPILER:-${rocm_path}/bin/amdclang++}"
+clang_flags="${Tensile_TOOLCHAIN_FLAGS:-}"
+$clang_path ${clang_flags} -x hip "$sources" --offload-arch="${archs}" -c --offload-device-only -Xoffload-linker --build-id=$build_id_kind $additional_options -o "$dest"
\ No newline at end of file
diff --git a/tensilelite/Tensile/Ops/gen_assembly.sh b/tensilelite/Tensile/Ops/gen_assembly.sh
index 0b21b6c6..cded03b8 100755
--- a/tensilelite/Tensile/Ops/gen_assembly.sh
+++ b/tensilelite/Tensile/Ops/gen_assembly.sh
@@ -33,7 +33,7 @@ if ! [ -z ${ROCM_PATH+x} ]; then
rocm_path=${ROCM_PATH}
fi

-toolchain=${rocm_path}/llvm/bin/clang++
+toolchain="${TENSILE_ROCM_ASSEMBLER_PATH:-${rocm_path}/llvm/bin/clang++}"

. ${venv}/bin/activate

diff --git a/tensilelite/Tensile/TensileCreateLibrary.py b/tensilelite/Tensile/TensileCreateLibrary.py
index 331aed27..afd817cf 100644
--- a/tensilelite/Tensile/TensileCreateLibrary.py
+++ b/tensilelite/Tensile/TensileCreateLibrary.py
@@ -241,6 +241,7 @@ def buildSourceCodeObjectFile(CxxCompiler, outputPath, kernelFile):
if supportedCompiler(CxxCompiler):
archs, cmdlineArchs = splitArchs()

+ toolchain_flags = shlex.split(os.environ.get('Tensile_TOOLCHAIN_FLAGS', ''))
archFlags = ['--offload-arch=' + arch for arch in cmdlineArchs]

# needs to be fixed when Maneesh's change is made available
@@ -261,9 +262,9 @@ def buildSourceCodeObjectFile(CxxCompiler, outputPath, kernelFile):

if os.name == "nt":
hipFlags += ['-fms-extensions', '-fms-compatibility', '-fPIC', '-Wno-deprecated-declarations']
- compileArgs = launcher + [which(CxxCompiler)] + hipFlags + archFlags + [kernelFile, '-c', '-o', os.path.join(buildPath, objectFilename)]
+ compileArgs = launcher + [which(CxxCompiler)] + toolchain_flags + hipFlags + archFlags + [kernelFile, '-c', '-o', os.path.join(buildPath, objectFilename)]
else:
- compileArgs = launcher + [which(CxxCompiler)] + hipFlags + archFlags + [kernelFile, '-c', '-o', os.path.join(buildPath, objectFilename)]
+ compileArgs = launcher + [which(CxxCompiler)] + toolchain_flags + hipFlags + archFlags + [kernelFile, '-c', '-o', os.path.join(buildPath, objectFilename)]

if globalParameters["PrintCodeCommands"]:
print(CxxCompiler + ':' + ' '.join(compileArgs))
diff --git a/tensilelite/Tensile/cmake/TensileConfig.cmake b/tensilelite/Tensile/cmake/TensileConfig.cmake
index 49c151fe..063aa17c 100644
--- a/tensilelite/Tensile/cmake/TensileConfig.cmake
+++ b/tensilelite/Tensile/cmake/TensileConfig.cmake
@@ -72,6 +72,48 @@ endif()
add_subdirectory("${Tensile_ROOT}/Source" "Tensile")
include("${Tensile_ROOT}/Source/TensileCreateLibrary.cmake")

+# Gets a command line fragment that can be prepended to a command in order to
+# preserve toolchain options and environment variables into a child process.
+function(TensileGetEnvCommand out_var)
+ # Tensile uses a lot of environment variables for invoking the toolchain.
+ # Since any variables we have set here are configure-time, we have to arrange
+ # to include them in any build-time commands.
+ set(CommandEnv ${CMAKE_COMMAND} -E env)
+ list(APPEND CommandEnv "CMAKE_CXX_COMPILER=${CMAKE_CXX_COMPILER}")
+ list(APPEND CommandEnv "CMAKE_C_COMPILER=${CMAKE_C_COMPILER}")
+ if(CMAKE_CXX_COMPILER_LAUNCHER)
+ list(APPEND "Tensile_CMAKE_CXX_COMPILER_LAUNCHER=${CMAKE_CXX_COMPILER_LAUNCHER}")
+ endif()
+
+ # For environment variables that Tensile uses, accept them either as a CMake
+ # cache option (for better ergonomics) or from the configure time environment.
+ # Note that cache options spell "Tensile" in mixed case for consistency whereas
+ # environment vars of a certain age are all caps. New environment variables
+ # try to be consistent.
+ if(NOT Tensile_ROCM_OFFLOAD_BUNDLER_PATH)
+ set(Tensile_ROCM_OFFLOAD_BUNDLER_PATH $ENV{TENSILE_ROCM_OFFLOAD_BUNDLER_PATH})
+ endif()
+ if(Tensile_ROCM_OFFLOAD_BUNDLER_PATH)
+ list(APPEND CommandEnv "TENSILE_ROCM_OFFLOAD_BUNDLER_PATH=${Tensile_ROCM_OFFLOAD_BUNDLER_PATH}")
+ endif()
+
+ if(NOT Tensile_ROCM_ASSEMBLER_PATH)
+ set(Tensile_ROCM_ASSEMBLER_PATH $ENV{TENSILE_ROCM_ASSEMBLER_PATH})
+ endif()
+ if(Tensile_ROCM_ASSEMBLER_PATH)
+ list(APPEND CommandEnv "TENSILE_ROCM_ASSEMBLER_PATH=${Tensile_ROCM_ASSEMBLER_PATH}")
+ endif()
+
+ if(NOT Tensile_TOOLCHAIN_FLAGS)
+ set(Tensile_TOOLCHAIN_FLAGS $ENV{Tensile_TOOLCHAIN_FLAGS})
+ endif()
+ if(Tensile_TOOLCHAIN_FLAGS)
+ list(APPEND CommandEnv "Tensile_TOOLCHAIN_FLAGS=${Tensile_TOOLCHAIN_FLAGS}")
+ endif()
+ list(APPEND CommandEnv "--")
+ set("${out_var}" "${CommandEnv}" PARENT_SCOPE)
+endfunction()
+
# Output target: ${Tensile_VAR_PREFIX}_LIBRARY_TARGET. Ensures that the libs get built in Tensile_OUTPUT_PATH/library.
function(TensileCreateLibraryFiles
Tensile_LOGIC_PATH
@@ -210,7 +252,8 @@ function(TensileCreateLibraryFiles
set(Options ${Options} "--build-id=${Tensile_BUILD_ID}")
endif()

- set(CommandLine ${VIRTUALENV_BIN_DIR}/${VIRTUALENV_PYTHON_EXENAME} ${Script} ${Options} ${Tensile_LOGIC_PATH} ${Tensile_OUTPUT_PATH} HIP)
+ TensileGetEnvCommand(CommandEnv)
+ set(CommandLine ${CommandEnv} ${VIRTUALENV_BIN_DIR}/${VIRTUALENV_PYTHON_EXENAME} ${Script} ${Options} ${Tensile_LOGIC_PATH} ${Tensile_OUTPUT_PATH} HIP)
message(STATUS "Tensile_CREATE_COMMAND: ${CommandLine}")

if(Tensile_EMBED_LIBRARY)
@@ -272,6 +315,7 @@ function(TensileCreateExtOpLibraries OutputFolder ArchStr)
set(ext_op_library_path ${build_tmp_dir}/hipblasltExtOpLibrary.dat)
file(REMOVE ${ext_op_library_path})

+ TensileGetEnvCommand(CommandEnv)
add_custom_command(
OUTPUT ${OutputFolder}/hipblasltExtOpLibrary.dat
WORKING_DIRECTORY "${cwd}"
@@ -279,7 +323,7 @@ function(TensileCreateExtOpLibraries OutputFolder ArchStr)
COMMAND ${CMAKE_COMMAND} -E rm -rf ${build_tmp_dir}
COMMAND ${CMAKE_COMMAND} -E make_directory ${build_tmp_dir}
COMMAND ${CMAKE_COMMAND} -E make_directory ${OutputFolder}
- COMMAND bash "${script}" "\"${Archs}\"" "${build_tmp_dir}" "${VIRTUALENV_HOME_DIR}" "${Tensile_BUILD_ID}"
+ COMMAND ${CommandEnv} bash "${script}" "\"${Archs}\"" "${build_tmp_dir}" "${VIRTUALENV_HOME_DIR}" "${Tensile_BUILD_ID}"
COMMAND ${CMAKE_COMMAND} -E copy ${ext_op_library_path} ${build_tmp_dir}/extop_*.co ${OutputFolder}
)

--
2.43.0