Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Adding cuda device synchronizes frequently and peeking at cuda errors #59

Open
wants to merge 1 commit into
base: develop
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -91,7 +91,7 @@ if (WITH_CALIPER)
endif()

if (WITH_AMS_DEBUG)
list(APPEND AMS_APP_DEFINES "-DLIBAMS_VERBOSE")
list(APPEND AMS_APP_DEFINES "-DAMS_DEBUG")
endif()

# ------------------------------------------------------------------------------
Expand Down
16 changes: 2 additions & 14 deletions src/AMSlib/wf/cuda/utilities.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
#include <iostream>

#include "wf/resource_manager.hpp"
#include "wf/debug.h"

//#include <stdio.h>
//#include <stdlib.h>
Expand All @@ -35,23 +36,10 @@ __device__ __inline__ int pow2i(int e) { return 1 << e; }

inline void __cudaSafeCall(cudaError err, const char* file, const int line)
{
#ifdef CUDA_ERROR_CHECK
if (cudaSuccess != err) {
fprintf(stderr,
"cudaSafeCall() failed at %s:%i : %s\n",
CFATAL(CUDA, (cudaSuccess != err), "cudaSafeCall() failed at %s:%i : %s\n",
file,
line,
cudaGetErrorString(err));

fprintf(stdout,
"cudaSafeCall() failed at %s:%i : %s\n",
file,
line,
cudaGetErrorString(err));
exit(-1);
}
#endif

return;
}

Expand Down
36 changes: 29 additions & 7 deletions src/AMSlib/wf/debug.h
Original file line number Diff line number Diff line change
Expand Up @@ -80,7 +80,12 @@ inline uint32_t getVerbosityLevel()

#define FATAL(id, ...) CFATAL(id, true, __VA_ARGS__)

#ifdef LIBAMS_VERBOSE
#define THROW(exception, msg) \
throw exception(std::string(__FILE__) + ":" + std::to_string(__LINE__) + \
" " + msg)


#ifdef AMS_DEBUG

#define CWARNING(id, condition, ...) \
AMSPRINT(id, condition, AMSVerbosity::AMSWARNING, YEL, __VA_ARGS__)
Expand Down Expand Up @@ -122,10 +127,23 @@ inline uint32_t getVerbosityLevel()
} \
} while (0);

#define THROW(exception, msg) \
throw exception(std::string(__FILE__) + ":" + std::to_string(__LINE__) + \
" " + msg)
#else // LIBAMS_VERBOSE is disabled
#ifdef __ENABLE_CUDA__
// NOTE: Regardless of condition we synchronize. We only emit a message based on condition.
#define _CAMSDebugDeviceSync(id, condition, fn, ln, ...) \
do{ \
AMSDeviceSync(fn, ln); \
CDEBUG(id, condition, __VA_ARGS__) \
}while(0);

#define CAMSDebugDeviceSync(id, condition, ...) _CAMSDebugDeviceSync(id, condition, __FILE__, __LINE__, __VA_ARGS__)
#define AMSDebugDeviceSync(id, ...) _CAMSDebugDeviceSync(id, true, __FILE__, __LINE__, __VA_ARGS__)
#else
#define CAMSDebugDeviceSync(id, condition, ...)
#define AMSDebugDeviceSync(id, ...)
#endif


#else // LIBAMS_DEBUG is disabled
#define CWARNING(id, condition, ...)

#define WARNING(id, ...)
Expand All @@ -138,7 +156,11 @@ inline uint32_t getVerbosityLevel()

#define DBG(id, ...)

#define REPORT_MEM_USAGE(id, phase) \

#define CAMSDebugDeviceSync(id, condition, ...)
#define AMSDebugDeviceSync(id, ...)

#endif // LIBAMS_VERBOSE
#endif // AMS_DEBUG

#endif // _OMPTARGET_DEBUG_H
#endif // __AMS_DEBUG__
9 changes: 9 additions & 0 deletions src/AMSlib/wf/device.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -209,6 +209,11 @@ inline void DtoHMemcpy(void *dest, void *src, size_t nBytes)
{
cudaMemcpy(dest, src, nBytes, cudaMemcpyDeviceToHost);
}

inline void AMSDeviceSync(const char* file, const int line){
__cudaSafeCall(cudaDeviceSynchronize(), file, line);
}

#else
PERFFASPECT()
inline void DtoDMemcpy(void *dest, void *src, size_t nBytes)
Expand Down Expand Up @@ -236,6 +241,10 @@ inline void DtoHMemcpy(void *dest, void *src, size_t nBytes)
std::cerr << "DtoH Memcpy Not Enabled" << std::endl;
exit(-1);
}

inline void AMSDeviceSync(const char* file, const int line){
std::cerr << "GPU Not enabled" << std::endl;
}
#endif

#endif
13 changes: 12 additions & 1 deletion src/AMSlib/wf/workflow.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -141,7 +141,6 @@ class AMSWorkflow
DB->store(actualElems, hInputs, hOutputs);
}
rm.deallocate(pPtr, AMSResourceType::PINNED);

return;
}

Expand Down Expand Up @@ -251,6 +250,7 @@ class AMSWorkflow
{
CALIPER(CALI_MARK_BEGIN("AMSEvaluate");)

CAMSDebugDeviceSync(Workflow, rId == 0, "DeviceSyncrhonize, No-External Errors")
CDEBUG(Workflow,
rId == 0,
"Entering Evaluate "
Expand All @@ -276,13 +276,17 @@ class AMSWorkflow
totalElements,
reinterpret_cast<const void **>(origInputs.data()),
reinterpret_cast<void **>(origOutputs.data()));

CAMSDebugDeviceSync(Workflow, rId == 0, "DeviceSyncrhonize, No-AppCall Errors")
CALIPER(CALI_MARK_END("PHYSICS MODULE");)
if (DB) {
CALIPER(CALI_MARK_BEGIN("DBSTORE");)
Store(totalElements, tmpIn, origOutputs);
CAMSDebugDeviceSync(Workflow, rId == 0, "DeviceSyncrhonize, No-Store Errors")
CALIPER(CALI_MARK_END("DBSTORE");)
}
CALIPER(CALI_MARK_END("AMSEvaluate");)
CAMSDebugDeviceSync(Workflow, rId == 0, "DeviceSyncrhonize, No-Evaluate Errors")
return;
}

Expand All @@ -300,6 +304,7 @@ class AMSWorkflow
CALIPER(CALI_MARK_BEGIN("UQ_MODULE");)
UQModel->evaluate(totalElements, origInputs, origOutputs, p_ml_acceptable);
CALIPER(CALI_MARK_END("UQ_MODULE");)
CAMSDebugDeviceSync(Workflow, rId == 0, "DeviceSyncrhonize, No-UQ/Surrogate Errors")

DBG(Workflow, "Computed Predicates")

Expand All @@ -324,6 +329,7 @@ class AMSWorkflow
const long packedElements = data_handler::pack(
appDataLoc, predicate, totalElements, origInputs, packedInputs);
CALIPER(CALI_MARK_END("PACK");)
CAMSDebugDeviceSync(Workflow, rId == 0, "DeviceSyncrhonize, No-Pack Errors")

// Pointer values which store output data values
// to be computed using the eos function.
Expand Down Expand Up @@ -351,12 +357,14 @@ class AMSWorkflow
CALIPER(CALI_MARK_END("LOAD BALANCE MODULE");)
#endif


// ---- 3b: call the physics module and store in the data base
if (packedElements > 0) {
CALIPER(CALI_MARK_BEGIN("PHYSICS MODULE");)
AppCall(probDescr, lbElements, iPtr, oPtr);
CALIPER(CALI_MARK_END("PHYSICS MODULE");)
}
CAMSDebugDeviceSync(Workflow, rId == 0, "DeviceSyncrhonize, No-AppCall Errors")

#ifdef __ENABLE_MPI__
CALIPER(CALI_MARK_BEGIN("LOAD BALANCE MODULE");)
Expand All @@ -372,6 +380,7 @@ class AMSWorkflow
data_handler::unpack(
appDataLoc, predicate, totalElements, packedOutputs, origOutputs);
CALIPER(CALI_MARK_END("UNPACK");)
CAMSDebugDeviceSync(Workflow, rId == 0, "DeviceSyncrhonize, No-UnPack Errors")

DBG(Workflow, "Finished physics evaluation")

Expand All @@ -383,6 +392,7 @@ class AMSWorkflow
Store(packedElements, packedInputs, packedOutputs);
CALIPER(CALI_MARK_END("DBSTORE");)
}
CAMSDebugDeviceSync(Workflow, rId == 0, "DeviceSyncrhonize, No-Store Errors")

// -----------------------------------------------------------------
// Deallocate temporal data
Expand All @@ -405,6 +415,7 @@ class AMSWorkflow

REPORT_MEM_USAGE(Workflow, "End")
CALIPER(CALI_MARK_END("AMSEvaluate");)
CAMSDebugDeviceSync(Workflow, rId == 0, "DeviceSyncrhonize, No-Evaluate Errors")
}
};

Expand Down
Loading