diff --git a/CMakeLists.txt b/CMakeLists.txt index ac996c12..1789c81c 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -91,7 +91,7 @@ if (WITH_CALIPER) endif() if (WITH_AMS_DEBUG) - list(APPEND AMS_APP_DEFINES "-DLIBAMS_VERBOSE") + list(APPEND AMS_APP_DEFINES "-DAMS_DEBUG") endif() # ------------------------------------------------------------------------------ diff --git a/src/AMSlib/wf/cuda/utilities.cuh b/src/AMSlib/wf/cuda/utilities.cuh index 3969688a..90fe26d4 100644 --- a/src/AMSlib/wf/cuda/utilities.cuh +++ b/src/AMSlib/wf/cuda/utilities.cuh @@ -16,6 +16,7 @@ #include #include "wf/resource_manager.hpp" +#include "wf/debug.h" //#include //#include @@ -35,23 +36,10 @@ __device__ __inline__ int pow2i(int e) { return 1 << e; } inline void __cudaSafeCall(cudaError err, const char* file, const int line) { -#ifdef CUDA_ERROR_CHECK - if (cudaSuccess != err) { - fprintf(stderr, - "cudaSafeCall() failed at %s:%i : %s\n", + CFATAL(CUDA, (cudaSuccess != err), "cudaSafeCall() failed at %s:%i : %s\n", file, line, cudaGetErrorString(err)); - - fprintf(stdout, - "cudaSafeCall() failed at %s:%i : %s\n", - file, - line, - cudaGetErrorString(err)); - exit(-1); - } -#endif - return; } diff --git a/src/AMSlib/wf/debug.h b/src/AMSlib/wf/debug.h index 4927c32e..c00dd1ee 100644 --- a/src/AMSlib/wf/debug.h +++ b/src/AMSlib/wf/debug.h @@ -80,7 +80,12 @@ inline uint32_t getVerbosityLevel() #define FATAL(id, ...) CFATAL(id, true, __VA_ARGS__) -#ifdef LIBAMS_VERBOSE +#define THROW(exception, msg) \ + throw exception(std::string(__FILE__) + ":" + std::to_string(__LINE__) + \ + " " + msg) + + +#ifdef AMS_DEBUG #define CWARNING(id, condition, ...) \ AMSPRINT(id, condition, AMSVerbosity::AMSWARNING, YEL, __VA_ARGS__) @@ -122,10 +127,23 @@ inline uint32_t getVerbosityLevel() } \ } while (0); -#define THROW(exception, msg) \ - throw exception(std::string(__FILE__) + ":" + std::to_string(__LINE__) + \ - " " + msg) -#else // LIBAMS_VERBOSE is disabled +#ifdef __ENABLE_CUDA__ +// NOTE: Regardless of condition we synchronize. We only emit a message based on condition. +#define _CAMSDebugDeviceSync(id, condition, fn, ln, ...) \ + do{ \ + AMSDeviceSync(fn, ln); \ + CDEBUG(id, condition, __VA_ARGS__) \ + }while(0); + +#define CAMSDebugDeviceSync(id, condition, ...) _CAMSDebugDeviceSync(id, condition, __FILE__, __LINE__, __VA_ARGS__) +#define AMSDebugDeviceSync(id, ...) _CAMSDebugDeviceSync(id, true, __FILE__, __LINE__, __VA_ARGS__) +#else +#define CAMSDebugDeviceSync(id, condition, ...) +#define AMSDebugDeviceSync(id, ...) +#endif + + +#else // LIBAMS_DEBUG is disabled #define CWARNING(id, condition, ...) #define WARNING(id, ...) @@ -138,7 +156,11 @@ inline uint32_t getVerbosityLevel() #define DBG(id, ...) +#define REPORT_MEM_USAGE(id, phase) \ + +#define CAMSDebugDeviceSync(id, condition, ...) +#define AMSDebugDeviceSync(id, ...) -#endif // LIBAMS_VERBOSE +#endif // AMS_DEBUG -#endif // _OMPTARGET_DEBUG_H +#endif // __AMS_DEBUG__ diff --git a/src/AMSlib/wf/device.hpp b/src/AMSlib/wf/device.hpp index 73b784c0..e57c13aa 100644 --- a/src/AMSlib/wf/device.hpp +++ b/src/AMSlib/wf/device.hpp @@ -209,6 +209,11 @@ inline void DtoHMemcpy(void *dest, void *src, size_t nBytes) { cudaMemcpy(dest, src, nBytes, cudaMemcpyDeviceToHost); } + +inline void AMSDeviceSync(const char* file, const int line){ + __cudaSafeCall(cudaDeviceSynchronize(), file, line); +} + #else PERFFASPECT() inline void DtoDMemcpy(void *dest, void *src, size_t nBytes) @@ -236,6 +241,10 @@ inline void DtoHMemcpy(void *dest, void *src, size_t nBytes) std::cerr << "DtoH Memcpy Not Enabled" << std::endl; exit(-1); } + +inline void AMSDeviceSync(const char* file, const int line){ + std::cerr << "GPU Not enabled" << std::endl; +} #endif #endif diff --git a/src/AMSlib/wf/workflow.hpp b/src/AMSlib/wf/workflow.hpp index 2f1c8656..3f7a6684 100644 --- a/src/AMSlib/wf/workflow.hpp +++ b/src/AMSlib/wf/workflow.hpp @@ -141,7 +141,6 @@ class AMSWorkflow DB->store(actualElems, hInputs, hOutputs); } rm.deallocate(pPtr, AMSResourceType::PINNED); - return; } @@ -251,6 +250,7 @@ class AMSWorkflow { CALIPER(CALI_MARK_BEGIN("AMSEvaluate");) + CAMSDebugDeviceSync(Workflow, rId == 0, "DeviceSyncrhonize, No-External Errors") CDEBUG(Workflow, rId == 0, "Entering Evaluate " @@ -276,13 +276,17 @@ class AMSWorkflow totalElements, reinterpret_cast(origInputs.data()), reinterpret_cast(origOutputs.data())); + + CAMSDebugDeviceSync(Workflow, rId == 0, "DeviceSyncrhonize, No-AppCall Errors") CALIPER(CALI_MARK_END("PHYSICS MODULE");) if (DB) { CALIPER(CALI_MARK_BEGIN("DBSTORE");) Store(totalElements, tmpIn, origOutputs); + CAMSDebugDeviceSync(Workflow, rId == 0, "DeviceSyncrhonize, No-Store Errors") CALIPER(CALI_MARK_END("DBSTORE");) } CALIPER(CALI_MARK_END("AMSEvaluate");) + CAMSDebugDeviceSync(Workflow, rId == 0, "DeviceSyncrhonize, No-Evaluate Errors") return; } @@ -300,6 +304,7 @@ class AMSWorkflow CALIPER(CALI_MARK_BEGIN("UQ_MODULE");) UQModel->evaluate(totalElements, origInputs, origOutputs, p_ml_acceptable); CALIPER(CALI_MARK_END("UQ_MODULE");) + CAMSDebugDeviceSync(Workflow, rId == 0, "DeviceSyncrhonize, No-UQ/Surrogate Errors") DBG(Workflow, "Computed Predicates") @@ -324,6 +329,7 @@ class AMSWorkflow const long packedElements = data_handler::pack( appDataLoc, predicate, totalElements, origInputs, packedInputs); CALIPER(CALI_MARK_END("PACK");) + CAMSDebugDeviceSync(Workflow, rId == 0, "DeviceSyncrhonize, No-Pack Errors") // Pointer values which store output data values // to be computed using the eos function. @@ -351,12 +357,14 @@ class AMSWorkflow CALIPER(CALI_MARK_END("LOAD BALANCE MODULE");) #endif + // ---- 3b: call the physics module and store in the data base if (packedElements > 0) { CALIPER(CALI_MARK_BEGIN("PHYSICS MODULE");) AppCall(probDescr, lbElements, iPtr, oPtr); CALIPER(CALI_MARK_END("PHYSICS MODULE");) } + CAMSDebugDeviceSync(Workflow, rId == 0, "DeviceSyncrhonize, No-AppCall Errors") #ifdef __ENABLE_MPI__ CALIPER(CALI_MARK_BEGIN("LOAD BALANCE MODULE");) @@ -372,6 +380,7 @@ class AMSWorkflow data_handler::unpack( appDataLoc, predicate, totalElements, packedOutputs, origOutputs); CALIPER(CALI_MARK_END("UNPACK");) + CAMSDebugDeviceSync(Workflow, rId == 0, "DeviceSyncrhonize, No-UnPack Errors") DBG(Workflow, "Finished physics evaluation") @@ -383,6 +392,7 @@ class AMSWorkflow Store(packedElements, packedInputs, packedOutputs); CALIPER(CALI_MARK_END("DBSTORE");) } + CAMSDebugDeviceSync(Workflow, rId == 0, "DeviceSyncrhonize, No-Store Errors") // ----------------------------------------------------------------- // Deallocate temporal data @@ -405,6 +415,7 @@ class AMSWorkflow REPORT_MEM_USAGE(Workflow, "End") CALIPER(CALI_MARK_END("AMSEvaluate");) + CAMSDebugDeviceSync(Workflow, rId == 0, "DeviceSyncrhonize, No-Evaluate Errors") } };