Skip to content
This repository has been archived by the owner on Feb 15, 2024. It is now read-only.

Commit

Permalink
ATMI Release for ROCm v4.5
Browse files Browse the repository at this point in the history
  • Loading branch information
ashwinma committed Oct 28, 2021
1 parent 50b7143 commit 9e4b077
Show file tree
Hide file tree
Showing 2 changed files with 77 additions and 96 deletions.
73 changes: 43 additions & 30 deletions src/runtime/core/data.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -505,10 +505,10 @@ atmi_status_t DataTaskImpl::dispatch() {
hsa_amd_pointer_info_t dest_ptr_info;
src_ptr_info.size = sizeof(hsa_amd_pointer_info_t);
dest_ptr_info.size = sizeof(hsa_amd_pointer_info_t);
err = hsa_amd_pointer_info(reinterpret_cast<void *>(src), &src_ptr_info,
NULL, /* alloc fn ptr */
NULL, /* num_agents_accessible */
NULL); /* accessible agents */
err = hsa_amd_pointer_info(reinterpret_cast<void *>(const_cast<void *>(src)),
&src_ptr_info, NULL, /* alloc fn ptr */
NULL, /* num_agents_accessible */
NULL); /* accessible agents */
ErrorCheck(Checking src pointer info, err);
err = hsa_amd_pointer_info(reinterpret_cast<void *>(dest), &dest_ptr_info,
NULL, /* alloc fn ptr */
Expand Down Expand Up @@ -541,13 +541,17 @@ atmi_status_t DataTaskImpl::dispatch() {
} else if (src_data && !dest_data) {
type = Direction::ATMI_D2H;
src_agent = get_mem_agent(src_data->place());
dest_agent = src_agent;
dest_agent = cpu_agent;
// TODO(ashwin): can the two agents be the GPU agent itself? ROCr team: no
// dest_agent = src_agent;
src_ptr = src;
dest_ptr = dest;
} else if (!src_data && dest_data) {
type = Direction::ATMI_H2D;
dest_agent = get_mem_agent(dest_data->place());
src_agent = dest_agent;
src_agent = cpu_agent;
// TODO(ashwin): can the two agents be the GPU agent itself? ROCr team: no
// src_agent = dest_agent;
src_ptr = src;
dest_ptr = dest;
} else {
Expand All @@ -573,9 +577,10 @@ atmi_status_t DataTaskImpl::dispatch() {
// signal count = 2 (one for actual host-device copy and another
// for H2H copy to setup the device copy.
std::thread(
[](void *dst, const void *src, size_t size, hsa_agent_t agent,
Direction type, atmi_mem_place_t cpu, hsa_signal_t signal,
std::vector<hsa_signal_t> dep_signals, TaskImpl *task) {
[](void *dst, const void *src, size_t size, hsa_agent_t src_agent,
hsa_agent_t dest_agent, Direction type, atmi_mem_place_t cpu,
hsa_signal_t signal, std::vector<hsa_signal_t> dep_signals,
TaskImpl *task) {
atmi_status_t ret;
hsa_status_t err;
atl_dep_sync_t dep_sync_type =
Expand All @@ -584,6 +589,7 @@ atmi_status_t DataTaskImpl::dispatch() {
const void *src_ptr = src;
void *dest_ptr = dst;
ret = atmi_malloc(&temp_host_ptr, size, cpu);
assert(ret == ATMI_STATUS_SUCCESS && "temp atmi_malloc");
if (type == Direction::ATMI_H2D) {
memcpy(temp_host_ptr, src, size);
src_ptr = (const void *)temp_host_ptr;
Expand All @@ -596,27 +602,30 @@ atmi_status_t DataTaskImpl::dispatch() {
if (dep_sync_type == ATL_SYNC_BARRIER_PKT && !dep_signals.empty()) {
DEBUG_PRINT("SDMA-host for %p (%lu) with %lu dependencies\n", task,
task->id_, dep_signals.size());
err = hsa_amd_memory_async_copy(dest_ptr, agent, src_ptr, agent,
size, dep_signals.size(),
&(dep_signals[0]), signal);
err = hsa_amd_memory_async_copy(dest_ptr, dest_agent, src_ptr,
src_agent, size, dep_signals.size(),
dep_signals.data(), signal);
ErrorCheck(Copy async between memory pools, err);
} else {
DEBUG_PRINT("SDMA-host for %p (%lu)\n", task, task->id_);
err = hsa_amd_memory_async_copy(dest_ptr, agent, src_ptr, agent,
size, 0, NULL, signal);
err = hsa_amd_memory_async_copy(dest_ptr, dest_agent, src_ptr,
src_agent, size, 0, NULL, signal);
ErrorCheck(Copy async between memory pools, err);
}
task->set_state(ATMI_DISPATCHED);
hsa_signal_wait_acquire(signal, HSA_SIGNAL_CONDITION_EQ, 1,
UINT64_MAX, ATMI_WAIT_STATE);

// cleanup for D2H and H2D
if (type == Direction::ATMI_D2H) {
memcpy(dst, temp_host_ptr, size);
}
atmi_free(temp_host_ptr);
ret = atmi_free(temp_host_ptr);
assert(ret == ATMI_STATUS_SUCCESS && "temp atmi_free");
hsa_signal_subtract_acq_rel(signal, 1);
},
dest, src, size, src_agent, type, cpu, signal_, dep_signals, this)
dest, src, size, src_agent, dest_agent, type, cpu, signal_, dep_signals,
this)
.detach();
} else {
if (groupable_ == ATMI_TRUE) {
Expand All @@ -635,7 +644,7 @@ atmi_status_t DataTaskImpl::dispatch() {
dep_signals.size());
err = hsa_amd_memory_async_copy(dest_ptr, dest_agent, src_ptr, src_agent,
size, dep_signals.size(),
&(dep_signals[0]), signal_);
dep_signals.data(), signal_);
ErrorCheck(Copy async between memory pools, err);
} else {
DEBUG_PRINT("SDMA for %p (%lu)\n", this, id_);
Expand All @@ -648,7 +657,7 @@ atmi_status_t DataTaskImpl::dispatch() {
}

atmi_status_t Runtime::Memcpy(void *dest, const void *src, size_t size) {
atmi_status_t ret;
atmi_status_t ret = ATMI_STATUS_SUCCESS;
hsa_status_t err;

#ifndef USE_ROCR_PTR_INFO
Expand All @@ -659,10 +668,10 @@ atmi_status_t Runtime::Memcpy(void *dest, const void *src, size_t size) {
hsa_amd_pointer_info_t dest_ptr_info;
src_ptr_info.size = sizeof(hsa_amd_pointer_info_t);
dest_ptr_info.size = sizeof(hsa_amd_pointer_info_t);
err = hsa_amd_pointer_info(reinterpret_cast<void *>(src), &src_ptr_info,
NULL, /* alloc fn ptr */
NULL, /* num_agents_accessible */
NULL); /* accessible agents */
err = hsa_amd_pointer_info(reinterpret_cast<void *>(const_cast<void *>(src)),
&src_ptr_info, NULL, /* alloc fn ptr */
NULL, /* num_agents_accessible */
NULL); /* accessible agents */
ErrorCheck(Checking src pointer info, err);
err = hsa_amd_pointer_info(reinterpret_cast<void *>(dest), &dest_ptr_info,
NULL, /* alloc fn ptr */
Expand All @@ -685,21 +694,23 @@ atmi_status_t Runtime::Memcpy(void *dest, const void *src, size_t size) {
if (src_data && !dest_data) {
type = Direction::ATMI_D2H;
src_agent = get_mem_agent(src_data->place());
dest_agent = src_agent;
// dest_agent = cpu_agent; // FIXME: can the two agents be the GPU agent
// itself?
dest_agent = cpu_agent;
// TODO(ashwin): can the two agents be the GPU agent itself? ROCr team: no
// dest_agent = src_agent;
ret = atmi_malloc(&temp_host_ptr, size, cpu);
assert(ret == ATMI_STATUS_SUCCESS && "temp atmi_malloc");
// err = hsa_amd_agents_allow_access(1, &src_agent, NULL, temp_host_ptr);
// ErrorCheck(Allow access to ptr, err);
src_ptr = src;
dest_ptr = temp_host_ptr;
} else if (!src_data && dest_data) {
type = Direction::ATMI_H2D;
dest_agent = get_mem_agent(dest_data->place());
// src_agent = cpu_agent; // FIXME: can the two agents be the GPU agent
// itself?
src_agent = dest_agent;
src_agent = cpu_agent;
// TODO(ashwin): can the two agents be the GPU agent itself? ROCr team: no
// src_agent = dest_agent;
ret = atmi_malloc(&temp_host_ptr, size, cpu);
assert(ret == ATMI_STATUS_SUCCESS && "temp atmi_malloc");
memcpy(temp_host_ptr, src, size);
// FIXME: ideally lock would be the better approach, but we need to try to
// understand why the h2d copy segfaults if we dont have the below lines
Expand All @@ -722,20 +733,22 @@ atmi_status_t Runtime::Memcpy(void *dest, const void *src, size_t size) {
}
DEBUG_PRINT("Memcpy source agent: %lu\n", src_agent.handle);
DEBUG_PRINT("Memcpy dest agent: %lu\n", dest_agent.handle);
hsa_signal_store_release(IdentityCopySignal, 1);
hsa_signal_store_screlease(IdentityCopySignal, 1);
// hsa_signal_add_acq_rel(IdentityCopySignal, 1);
err = hsa_amd_memory_async_copy(dest_ptr, dest_agent, src_ptr, src_agent,
size, 0, NULL, IdentityCopySignal);
ErrorCheck(Copy async between memory pools, err);
hsa_signal_wait_acquire(IdentityCopySignal, HSA_SIGNAL_CONDITION_EQ, 0,
hsa_signal_wait_relaxed(IdentityCopySignal, HSA_SIGNAL_CONDITION_EQ, 0,
UINT64_MAX, ATMI_WAIT_STATE);

// cleanup for D2H and H2D
if (type == Direction::ATMI_D2H) {
memcpy(dest, temp_host_ptr, size);
ret = atmi_free(temp_host_ptr);
assert(ret == ATMI_STATUS_SUCCESS && "temp atmi_free");
} else if (type == Direction::ATMI_H2D) {
ret = atmi_free(temp_host_ptr);
assert(ret == ATMI_STATUS_SUCCESS && "temp atmi_free");
}
if (err != HSA_STATUS_SUCCESS || ret != ATMI_STATUS_SUCCESS)
ret = ATMI_STATUS_ERROR;
Expand Down
100 changes: 34 additions & 66 deletions src/runtime/core/system.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -291,7 +291,7 @@ void allow_access_to_all_gpu_agents(void *ptr) {
for (int i = 0; i < gpu_procs.size(); i++) {
agents.push_back(gpu_procs[i].agent());
}
err = hsa_amd_agents_allow_access(agents.size(), &agents[0], NULL, ptr);
err = hsa_amd_agents_allow_access(agents.size(), agents.data(), NULL, ptr);
ErrorCheck(Allow agents ptr access, err);
}

Expand Down Expand Up @@ -960,42 +960,6 @@ bool isImplicit(KernelArgMD::ValueKind value_kind) {
}
}

hsa_status_t validate_code_object(hsa_code_object_t code_object,
hsa_code_symbol_t symbol, void *data) {
hsa_status_t retVal = HSA_STATUS_SUCCESS;
std::set<std::string> *SymbolSet = static_cast<std::set<std::string> *>(data);
hsa_symbol_kind_t type;

uint32_t name_length;
hsa_status_t err;
err = hsa_code_symbol_get_info(symbol, HSA_CODE_SYMBOL_INFO_TYPE, &type);
ErrorCheck(Symbol info extraction, err);
DEBUG_PRINT("Exec Symbol type: %d\n", type);

if (type == HSA_SYMBOL_KIND_VARIABLE) {
err = hsa_code_symbol_get_info(symbol, HSA_CODE_SYMBOL_INFO_NAME_LENGTH,
&name_length);
ErrorCheck(Symbol info extraction, err);
char *name = reinterpret_cast<char *>(malloc(name_length + 1));
err = hsa_code_symbol_get_info(symbol, HSA_CODE_SYMBOL_INFO_NAME, name);
ErrorCheck(Symbol info extraction, err);
name[name_length] = 0;

if (SymbolSet->find(std::string(name)) != SymbolSet->end()) {
// Symbol already found. Return Error
DEBUG_PRINT("Symbol %s already found!\n", name);
retVal = HSA_STATUS_ERROR_VARIABLE_ALREADY_DEFINED;
} else {
SymbolSet->insert(std::string(name));
}

free(name);
} else {
DEBUG_PRINT("Symbol is an indirect function\n");
}
return retVal;
}

static amd_comgr_status_t getMetaBuf(const amd_comgr_metadata_node_t meta,
std::string *str) {
size_t size = 0;
Expand Down Expand Up @@ -1613,7 +1577,7 @@ hsa_status_t get_code_object_custom_metadata(atmi_platform_type_t platform,
HSA_STATUS_ERROR_INVALID_CODE_OBJECT);
}

hsa_status_t populate_InfoTables(hsa_executable_t executable,
hsa_status_t populate_InfoTables(hsa_executable_t executable, hsa_agent_t agent,
hsa_executable_symbol_t symbol, void *data) {
int gpu = *static_cast<int *>(data);
hsa_symbol_kind_t type;
Expand Down Expand Up @@ -1685,6 +1649,14 @@ hsa_status_t populate_InfoTables(hsa_executable_t executable,
ErrorCheck(Symbol info extraction, err);
name[name_length] = 0;

if (SymbolInfoTable[gpu].find(std::string(name)) !=
SymbolInfoTable[gpu].end()) {
// Symbol already found. Return Error
DEBUG_PRINT("Symbol %s already found!\n", name);
ErrorCheck(Symbol variable already defined check,
HSA_STATUS_ERROR_VARIABLE_ALREADY_DEFINED);
}

atl_symbol_info_t info;

err = hsa_executable_symbol_get_info(
Expand Down Expand Up @@ -1735,13 +1707,11 @@ atmi_status_t Runtime::RegisterModuleFromMemory(void **modules,
// GCN with base profile
agent_profile = HSA_PROFILE_FULL;
/* Create the empty executable. */
err = hsa_executable_create(agent_profile, HSA_EXECUTABLE_STATE_UNFROZEN, "",
&executable);
err = hsa_executable_create_alt(agent_profile,
HSA_DEFAULT_FLOAT_ROUNDING_MODE_DEFAULT, NULL,
&executable);
ErrorCheck(Create the executable, err);

// initially empty symbol set for every executable
std::set<std::string> SymbolSet;

bool module_load_success = false;
for (int i = 0; i < num_modules; i++) {
void *module_bytes = modules[i];
Expand All @@ -1756,24 +1726,19 @@ atmi_status_t Runtime::RegisterModuleFromMemory(void **modules,
ErrorCheckAndContinue(Getting custom code object metadata, err);
free(tmp_module);

// Deserialize code object.
hsa_code_object_t code_object = {0};
err = hsa_code_object_deserialize(module_bytes, module_size, NULL,
&code_object);
ErrorCheckAndContinue(Code Object Deserialization, err);
assert(0 != code_object.handle);

err = hsa_code_object_iterate_symbols(code_object, validate_code_object,
static_cast<void *>(&SymbolSet));
ErrorCheckAndContinue(Iterating over symbols for execuatable, err);
// Read code object.
hsa_code_object_reader_t code_obj_reader = {0};
err = hsa_code_object_reader_create_from_memory(module_bytes, module_size,
&code_obj_reader);
ErrorCheck(Create the code object reader, err);
assert(0 != code_obj_reader.handle);

/* Load the code object. */
err =
hsa_executable_load_code_object(executable, agent, code_object, NULL);
err = hsa_executable_load_agent_code_object(executable, agent,
code_obj_reader, NULL, NULL);
ErrorCheckAndContinue(Loading the code object, err);

// cannot iterate over symbols until executable is frozen

} else {
ErrorCheckAndContinue(Loading non - AMDGCN code object,
HSA_STATUS_ERROR_INVALID_CODE_OBJECT);
Expand All @@ -1783,20 +1748,23 @@ atmi_status_t Runtime::RegisterModuleFromMemory(void **modules,
DEBUG_PRINT("Modules loaded successful? %d\n", module_load_success);
if (module_load_success) {
/* Freeze the executable; it can now be queried for symbols. */
err = hsa_executable_freeze(executable, "");
err = hsa_executable_freeze(executable, NULL);
ErrorCheck(Freeze the executable, err);

err = hsa_executable_iterate_symbols(executable, populate_InfoTables,
static_cast<void *>(&gpu));
ErrorCheck(Iterating over symbols for execuatable, err);
// DEPRECATED API
// err = hsa_executable_iterate_symbols(executable, populate_InfoTables,
// static_cast<void *>(&gpu));
// ErrorCheck(Iterating over symbols for execuatable, err);

// err = hsa_executable_iterate_program_symbols(executable,
// iterate_program_symbols, &gpu);
// ErrorCheckAndContinue(Iterating over symbols for execuatable, err);
// TODO(ashwin): find out the difference between the below two iterator
// APIs. err = hsa_executable_iterate_program_symbols(executable,
// populate_InfoTables,
// static_cast<void *>(&gpu));
// ErrorCheck(Iterating over symbols for execuatable, err);

// err = hsa_executable_iterate_agent_symbols(executable,
// iterate_agent_symbols, &gpu);
// ErrorCheckAndContinue(Iterating over symbols for execuatable, err);
err = hsa_executable_iterate_agent_symbols(
executable, agent, populate_InfoTables, static_cast<void *>(&gpu));
ErrorCheck(Iterating over symbols for execuatable, err);

// save the executable and destroy during finalize
g_executables.push_back(executable);
Expand Down

0 comments on commit 9e4b077

Please sign in to comment.