Skip to content

Commit

Permalink
add HAVE_CUDA macros & cmake definitions; add gpu_init to wan_init
Browse files Browse the repository at this point in the history
  • Loading branch information
bozhang-hpc committed May 21, 2024
1 parent 26704f5 commit f81fb03
Show file tree
Hide file tree
Showing 2 changed files with 61 additions and 39 deletions.
40 changes: 2 additions & 38 deletions src/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -11,25 +11,19 @@ if(CMAKE_VERSION VERSION_GREATER_EQUAL 3.17)
if(CUDAToolkit_FOUND)
enable_language(CUDA)
include_directories(${CUDAToolkit_INCLUDE_DIRS})
add_definitions(-DHAVE_CUDA)
set(HAVE_CUDA TRUE)
endif()
else()
find_package(CUDA)
if(CUDA_FOUND)
enable_language(CUDA)
include_directories(${CUDA_INCLUDE_DIRS})
add_definitions(-DHAVE_CUDA)
set(HAVE_CUDA TRUE)
endif()
endif()

if(HAVE_CUDA)
find_package(GDRCOPY)
if(HAVE_GDRCOPY)
include_directories(${GDRCOPY_INCLUDE_DIRS})
add_definitions(-DHAVE_GDRCOPY)
endif()
endif()

# list of source files
if(HAVE_CUDA)
set(dspaces-src util.c bbox.c ssd_copy.cu ss_data_cuda.c ss_data.c dspaces-client.c dspaces-ops.c)
Expand Down Expand Up @@ -71,38 +65,8 @@ if(HAVE_CUDA)
target_link_libraries (dspaces cuda ${CUDA_LIBRARIES})
target_include_directories (dspaces PUBLIC ${CUDA_INCLUDE_DIRS})
endif()

if(HAVE_GDRCOPY)
target_link_libraries (dspaces ${GDRCOPY_LIBRARIES})
target_include_directories (dspaces PUBLIC ${GDRCOPY_INCLUDE_DIRS})
endif()
endif()



# if(HAVE_DRC)
# target_link_libraries (dspaces margo m pthread ${DRC_LIBRARIES})
# target_include_directories (dspaces PUBLIC $<INSTALL_INTERFACE:include> ${DRC_INCLUDE_DIRS})
# else()
# if(HAVE_GDRCOPY)
# if(CMAKE_VERSION VERSION_GREATER_EQUAL 3.17)
# target_link_libraries (dspaces margo m pthread CUDA::cudart CUDA::cuda_driver ${GDRCOPY_LIBRARIES})
# target_include_directories (dspaces PUBLIC $<INSTALL_INTERFACE:include> ${CUDAToolkit_INCLUDE_DIRS} ${GDRCOPY_INCLUDE_DIRS})
# else()
# target_link_libraries (dspaces margo m pthread cuda ${CUDA_LIBRARIES} ${GDRCOPY_LIBRARIES})
# target_include_directories (dspaces PUBLIC $<INSTALL_INTERFACE:include> ${CUDA_INCLUDE_DIRS} ${GDRCOPY_INCLUDE_DIRS})
# endif()
# else()
# if(CMAKE_VERSION VERSION_GREATER_EQUAL 3.17)
# target_link_libraries (dspaces margo m pthread CUDA::cudart CUDA::cuda_driver)
# target_include_directories (dspaces PUBLIC $<INSTALL_INTERFACE:include> ${CUDAToolkit_INCLUDE_DIRS})
# else()
# target_link_libraries (dspaces margo m pthread cuda ${CUDA_LIBRARIES})
# target_include_directories (dspaces PUBLIC $<INSTALL_INTERFACE:include> ${CUDA_INCLUDE_DIRS})
# endif()
# endif()
# endif()

# local include's BEFORE, in case old incompatable .h files in prefix/include
target_include_directories (dspaces BEFORE PUBLIC
$<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/../include>)
Expand Down
60 changes: 59 additions & 1 deletion src/dspaces-client.c
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,7 @@
#define MB (1024 * 1024)
#define BULK_TRANSFER_MAX (128 * MB)

#ifdef HAVE_CUDA
#define CUDA_ASSERT(x) \
do \
{ \
Expand Down Expand Up @@ -145,6 +146,7 @@

#define CUDA_MAX_CONCURRENT_KERNELS 128
#define DSPACES_CUDA_DEFAULT_CONCURRENT_KERNELS 128
#endif /* HAVE_CUDA */

static int g_is_initialized = 0;

Expand All @@ -166,6 +168,7 @@ struct sub_list_node {
int id;
};

#ifdef HAVE_CUDA
struct dspaces_cuda_dev_info {
int gdr_support;
int concurrency_enabled;
Expand All @@ -182,6 +185,7 @@ struct dspaces_cuda_info {
int num_concurrent_kernels;
struct dspaces_cuda_dev_info *dev_list;
};
#endif /* HAVE_CUDA */


struct dspaces_put_req {
Expand Down Expand Up @@ -224,7 +228,11 @@ struct dspaces_client {
int local_put_count; // used during finalize
int f_debug;
int f_final;

#ifdef HAVE_CUDA
struct dspaces_cuda_info cuda_info;
#endif /* HAVE_CUDA*/

int listener_init;
struct dspaces_put_req *put_reqs;
struct dspaces_put_req *put_reqs_end;
Expand Down Expand Up @@ -579,6 +587,7 @@ static int dspaces_init_internal(int rank, dspaces_client_t *c)
return dspaces_SUCCESS;
}

#ifdef HAVE_CUDA
static int cuda_max_concurrent_kernels_num(int dev_rank)
{
/*
Expand Down Expand Up @@ -692,6 +701,7 @@ static int dspaces_init_gpu(dspaces_client_t client, const char* listen_addr_str

return dspaces_SUCCESS;
}
#endif /* HAVE_CUDA */

static int dspaces_init_margo(dspaces_client_t client,
const char *listen_addr_str)
Expand Down Expand Up @@ -885,6 +895,7 @@ static int dspaces_post_init(dspaces_client_t client)
client->local_put_count = 0;
client->f_final = 0;

#ifdef HAVE_CUDA
int device, totdevice;
CUDA_ASSERT_RT_CLIENT(cudaGetDevice(&device));
CUDA_ASSERT_RT_CLIENT(cudaGetDeviceCount(&totdevice));
Expand All @@ -893,7 +904,7 @@ static int dspaces_post_init(dspaces_client_t client)
CUDA_ASSERT_RT_CLIENT(cudaMemGetInfo(&d_free, &d_total));
DEBUG_OUT("Rank %d: Device = %d/%d, Host Free Memory = %lld, Device Free Memory = %zu \n",
client->rank, device, totdevice, meminfo.MemAvailableMiB, d_free);

#endif /* HAVE_CUDA */

return (dspaces_SUCCESS);
}
Expand All @@ -914,10 +925,12 @@ int dspaces_init(int rank, dspaces_client_t *c)
return (ret);
}

#ifdef HAVE_CUDA
ret = dspaces_init_gpu(client, listen_addr_str);
if(ret != dspaces_SUCCESS) {
return (ret);
}
#endif /* HAVE_CUDA */

ret = dspaces_init_margo(client, listen_addr_str);

Expand Down Expand Up @@ -954,10 +967,12 @@ int dspaces_init_mpi(MPI_Comm comm, dspaces_client_t *c)
return (ret);
}

#ifdef HAVE_CUDA
ret = dspaces_init_gpu(client, listen_addr_str);
if(ret != dspaces_SUCCESS) {
return (ret);
}
#endif /* HAVE_CUDA */

ret = dspaces_init_margo(client, listen_addr_str);
free(listen_addr_str);
Expand Down Expand Up @@ -1000,6 +1015,14 @@ int dspaces_init_wan(const char *listen_addr_str, const char *conn_str,
if(ret != 0) {
return (ret);
}

#ifdef HAVE_CUDA
ret = dspaces_init_gpu(client, listen_addr_str);
if(ret != dspaces_SUCCESS) {
return (ret);
}
#endif /* HAVE_CUDA */

dspaces_init_margo(client, listen_addr_str);
if(ret != 0) {
return (ret);
Expand Down Expand Up @@ -1032,6 +1055,14 @@ int dspaces_init_wan_mpi(const char *listen_addr_str, const char *conn_str,
if(ret != 0) {
return (ret);
}

#ifdef HAVE_CUDA
ret = dspaces_init_gpu(client, listen_addr_str);
if(ret != dspaces_SUCCESS) {
return (ret);
}
#endif /* HAVE_CUDA */

ret = dspaces_init_margo(client, listen_addr_str);
if(ret != 0) {
return (ret);
Expand Down Expand Up @@ -1097,7 +1128,10 @@ int dspaces_fini(dspaces_client_t client)
free(client->server_address);
ls_free(client->dcg->ls);
free(client->dcg);

#ifdef HAVE_CUDA
free(client->cuda_info.dev_list);
#endif /* HAVE_CUDA */

margo_finalize(client->mid);

Expand Down Expand Up @@ -1316,6 +1350,7 @@ static int dspaces_cpu_put(dspaces_client_t client, const char *var_name, unsign
return (dspaces_cpu_put_tag(client, var_name, ver, elem_size, 0, ndim, lb, ub, data));
}

#ifdef HAVE_CUDA
static int cuda_put_tag_host_opt(dspaces_client_t client, const char *var_name,
unsigned int ver, int elem_size, int tag, int ndim,
uint64_t *lb, uint64_t *ub, const void *data)
Expand Down Expand Up @@ -1563,19 +1598,26 @@ static int dspaces_cuda_put(dspaces_client_t client, const char *var_name, unsig
}
return ret;
}
#endif /* HAVE_CUDA */

int dspaces_put(dspaces_client_t client, const char *var_name, unsigned int ver,
int elem_size, int ndim, uint64_t *lb, uint64_t *ub,
const void *data)
{
int ret;

#ifdef HAVE_CUDA
struct cudaPointerAttributes ptr_attr;
CUDA_ASSERT_RT_CLIENT(cudaPointerGetAttributes(&ptr_attr, data));
if(ptr_attr.type == cudaMemoryTypeDevice) {
ret = dspaces_cuda_put(client, var_name, ver, elem_size, ndim, lb, ub, data);
} else {
ret = dspaces_cpu_put(client, var_name, ver, elem_size, ndim, lb, ub, data);
}
#else
ret = dspaces_cpu_put(client, var_name, ver, elem_size, ndim, lb, ub, data);
#endif /* HAVE_CUDA */

return ret;
}

Expand All @@ -1584,13 +1626,19 @@ int dspaces_put_tag(dspaces_client_t client, const char *var_name, unsigned int
const void *data)
{
int ret;

#ifdef HAVE_CUDA
struct cudaPointerAttributes ptr_attr;
CUDA_ASSERT_RT_CLIENT(cudaPointerGetAttributes(&ptr_attr, data));
if(ptr_attr.type == cudaMemoryTypeDevice) {
ret = dspaces_cuda_put_tag(client, var_name, ver, elem_size, tag, ndim, lb, ub, data);
} else {
ret = dspaces_cpu_put_tag(client, var_name, ver, elem_size, tag, ndim, lb, ub, data);
}
#else
ret = dspaces_cpu_put_tag(client, var_name, ver, elem_size, tag, ndim, lb, ub, data);
#endif /* HAVE_CUDA */

return ret;
}

Expand Down Expand Up @@ -1918,6 +1966,7 @@ static int get_data(dspaces_client_t client, int num_odscs,
return 0;
}

#ifdef HAVE_CUDA
static int cuda_get_data_baseline(dspaces_client_t client, int num_odscs,
obj_descriptor req_obj, obj_descriptor *odsc_tab, void *data)
{
Expand Down Expand Up @@ -2412,6 +2461,7 @@ static int cuda_get_data_hybrid(dspaces_client_t client, int num_odscs,

return ret;
}
#endif /* HAVE_CUDA */

static int dspaces_init_listener(dspaces_client_t client)
{
Expand Down Expand Up @@ -2766,6 +2816,7 @@ int dspaces_cpu_get(dspaces_client_t client, const char *var_name, unsigned int
return (ret);
}

#ifdef HAVE_CUDA
static int dspaces_cuda_get(dspaces_client_t client, const char *var_name, unsigned int ver,
int elem_size, int ndim, uint64_t *lb, uint64_t *ub, void *data, int timeout)
{
Expand Down Expand Up @@ -2841,19 +2892,26 @@ static int dspaces_cuda_get(dspaces_client_t client, const char *var_name, unsig
}
return (ret);
}
#endif /* HAVE_CUDA */

int dspaces_get(dspaces_client_t client, const char *var_name, unsigned int ver,
int elem_size, int ndim, uint64_t *lb, uint64_t *ub, void *data,
int timeout)
{
int ret;

#ifdef HAVE_CUDA
struct cudaPointerAttributes ptr_attr;
CUDA_ASSERT_RT_CLIENT(cudaPointerGetAttributes(&ptr_attr, data));
if(ptr_attr.type == cudaMemoryTypeDevice) {
ret = dspaces_cuda_get(client, var_name, ver, elem_size, ndim, lb, ub, data, timeout);
} else {
ret = dspaces_cpu_get(client, var_name, ver, elem_size, ndim, lb, ub, data, timeout);
}
#else
ret = dspaces_cpu_get(client, var_name, ver, elem_size, ndim, lb, ub, data, timeout);
#endif /* HAVE_CUDA */

return ret;
}

Expand Down

0 comments on commit f81fb03

Please sign in to comment.