Skip to content

Commit

Permalink
Merge branch 'fp16-staging' into fp16_bruteforce_staging
Browse files Browse the repository at this point in the history
  • Loading branch information
shajder committed Dec 18, 2023
2 parents 2944911 + 0fa6f23 commit ac328eb
Show file tree
Hide file tree
Showing 27 changed files with 1,634 additions and 1,520 deletions.
15 changes: 15 additions & 0 deletions test_common/harness/errorHelpers.h
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,21 @@ static int vlog_win32(const char *format, ...);
log_error(msg, ##__VA_ARGS__); \
return TEST_FAIL; \
}
#define test_fail_and_cleanup(errRet, cleanup, msg, ...) \
{ \
log_error(msg, ##__VA_ARGS__); \
errRet = TEST_FAIL; \
goto cleanup; \
}
#define test_error_and_cleanup(errCode, cleanup, msg, ...) \
{ \
auto errCodeResult = errCode; \
if (errCodeResult != CL_SUCCESS) \
{ \
print_error(errCodeResult, msg); \
goto cleanup; \
} \
}
#define test_error(errCode, msg) test_error_ret(errCode, msg, errCode)
#define test_error_fail(errCode, msg) test_error_ret(errCode, msg, TEST_FAIL)
#define test_error_ret(errCode, msg, retValue) \
Expand Down
8 changes: 8 additions & 0 deletions test_common/harness/fpcontrol.h
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,8 @@ typedef int64_t FPU_mode_type;
#elif defined(__PPC__)
#include <fpu_control.h>
extern __thread fpu_control_t fpu_control;
#elif defined(__mips__)
#include "mips/m32c1.h"
#endif
// Set the reference hardware floating point unit to FTZ mode
inline void ForceFTZ(FPU_mode_type *mode)
Expand All @@ -65,6 +67,8 @@ inline void ForceFTZ(FPU_mode_type *mode)
__asm__ volatile("mrs %0, fpcr" : "=r"(fpscr));
*mode = fpscr;
__asm__ volatile("msr fpcr, %0" ::"r"(fpscr | (1U << 24)));
#elif defined(__mips__)
fpa_bissr(FPA_CSR_FS);
#else
#error ForceFTZ needs an implentation
#endif
Expand All @@ -91,6 +95,8 @@ inline void DisableFTZ(FPU_mode_type *mode)
__asm__ volatile("mrs %0, fpcr" : "=r"(fpscr));
*mode = fpscr;
__asm__ volatile("msr fpcr, %0" ::"r"(fpscr & ~(1U << 24)));
#elif defined(__mips__)
fpa_bicsr(FPA_CSR_FS);
#else
#error DisableFTZ needs an implentation
#endif
Expand All @@ -109,6 +115,8 @@ inline void RestoreFPState(FPU_mode_type *mode)
// Add 64 bit support
#elif defined(__aarch64__)
__asm__ volatile("msr fpcr, %0" ::"r"(*mode));
#elif defined(__mips__)
// Mips runs by default with DAZ=1 FTZ=1
#else
#error RestoreFPState needs an implementation
#endif
Expand Down
7 changes: 7 additions & 0 deletions test_common/harness/rounding_mode.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -197,6 +197,8 @@ RoundingMode get_round(void)
#include <xmmintrin.h>
#elif defined(__PPC__)
#include <fpu_control.h>
#elif defined(__mips__)
#include "mips/m32c1.h"
#endif
void *FlushToZero(void)
{
Expand All @@ -219,6 +221,9 @@ void *FlushToZero(void)
flags |= _FPU_MASK_NI;
_FPU_SETCW(flags);
return NULL;
#elif defined(__mips__)
fpa_bissr(FPA_CSR_FS);
return NULL;
#else
#error Unknown arch
#endif
Expand Down Expand Up @@ -247,6 +252,8 @@ void UnFlushToZero(void *p)
_FPU_GETCW(flags);
flags &= ~_FPU_MASK_NI;
_FPU_SETCW(flags);
#elif defined(__mips__)
fpa_bicsr(FPA_CSR_FS);
#else
#error Unknown arch
#endif
Expand Down
2 changes: 2 additions & 0 deletions test_common/harness/testHarness.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1298,6 +1298,8 @@ void PrintArch(void)
vlog("ARCH:\taarch64\n");
#elif defined(_WIN32)
vlog("ARCH:\tWindows\n");
#elif defined(__mips__)
vlog("ARCH:\tmips\n");
#else
#error unknown arch
#endif
Expand Down
2 changes: 1 addition & 1 deletion test_conformance/allocations/allocation_execute.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -140,7 +140,7 @@ int check_image(cl_command_queue queue, cl_mem mem) {
}


#define NUM_OF_WORK_ITEMS 8192*2
#define NUM_OF_WORK_ITEMS (8192 * 32)

int execute_kernel(cl_context context, cl_command_queue *queue, cl_device_id device_id, int test, cl_mem mems[], int number_of_mems_used, int verify_checksum) {

Expand Down
4 changes: 2 additions & 2 deletions test_conformance/c11_atomics/common.h
Original file line number Diff line number Diff line change
Expand Up @@ -644,8 +644,8 @@ class CBasicTestMemOrderScope
}

private:
TExplicitMemoryOrderType _memoryOrder;
TExplicitMemoryScopeType _memoryScope;
TExplicitMemoryOrderType _memoryOrder = MEMORY_ORDER_EMPTY;
TExplicitMemoryScopeType _memoryScope = MEMORY_SCOPE_EMPTY;
};

template <typename HostAtomicType, typename HostDataType>
Expand Down
6 changes: 3 additions & 3 deletions test_conformance/c11_atomics/host_atomics.h
Original file line number Diff line number Diff line change
Expand Up @@ -53,12 +53,12 @@ enum TExplicitMemoryOrderType
#endif

#define HOST_ATOMIC_INTPTR_T32 HOST_ATOMIC_INT
#define HOST_ATOMIC_UINTPTR_T32 HOST_ATOMIC_INT
#define HOST_ATOMIC_UINTPTR_T32 HOST_ATOMIC_UINT
#define HOST_ATOMIC_SIZE_T32 HOST_ATOMIC_UINT
#define HOST_ATOMIC_PTRDIFF_T32 HOST_ATOMIC_INT

#define HOST_ATOMIC_INTPTR_T64 HOST_ATOMIC_LONG
#define HOST_ATOMIC_UINTPTR_T64 HOST_ATOMIC_LONG
#define HOST_ATOMIC_UINTPTR_T64 HOST_ATOMIC_ULONG
#define HOST_ATOMIC_SIZE_T64 HOST_ATOMIC_ULONG
#define HOST_ATOMIC_PTRDIFF_T64 HOST_ATOMIC_LONG

Expand All @@ -82,7 +82,7 @@ enum TExplicitMemoryOrderType
#define HOST_SIZE_T64 cl_ulong
#define HOST_PTRDIFF_T64 cl_long

#define HOST_FLAG cl_uint
#define HOST_FLAG cl_int

// host atomic functions
void host_atomic_thread_fence(TExplicitMemoryOrderType order);
Expand Down
170 changes: 113 additions & 57 deletions test_conformance/common/vulkan_wrapper/opencl_vulkan_wrapper.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@ pfnclEnqueueAcquireExternalMemObjectsKHR
pfnclEnqueueReleaseExternalMemObjectsKHR
clEnqueueReleaseExternalMemObjectsKHRptr;
pfnclReleaseSemaphoreKHR clReleaseSemaphoreKHRptr;
pfnclGetSemaphoreHandleForTypeKHR clGetSemaphoreHandleForTypeKHRptr;

void init_cl_vk_ext(cl_platform_id opencl_platform)
{
Expand Down Expand Up @@ -69,6 +70,15 @@ void init_cl_vk_ext(cl_platform_id opencl_platform)
throw std::runtime_error("Failed to get the function pointer of "
"clCreateSemaphoreWithPropertiesKHRptr!");
}

clGetSemaphoreHandleForTypeKHRptr = (pfnclGetSemaphoreHandleForTypeKHR)
clGetExtensionFunctionAddressForPlatform(
opencl_platform, "clGetSemaphoreHandleForTypeKHR");
if (NULL == clGetSemaphoreHandleForTypeKHRptr)
{
throw std::runtime_error("Failed to get the function pointer of "
"clGetSemaphoreHandleForTypeKHRptr!");
}
}

cl_int setMaxImageDimensions(cl_device_id deviceID, size_t &max_width,
Expand Down Expand Up @@ -522,8 +532,8 @@ clExternalMemory::clExternalMemory(const clExternalMemory &externalMemory)

clExternalMemory::clExternalMemory(
const VulkanDeviceMemory *deviceMemory,
VulkanExternalMemoryHandleType externalMemoryHandleType, uint64_t offset,
uint64_t size, cl_context context, cl_device_id deviceId)
VulkanExternalMemoryHandleType externalMemoryHandleType, uint64_t size,
cl_context context, cl_device_id deviceId)
{
int err = 0;
m_externalMemory = NULL;
Expand All @@ -548,9 +558,9 @@ clExternalMemory::clExternalMemory(
{
case VULKAN_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD:
#ifdef _WIN32
log_info("Opaque file descriptors are not supported on Windows\n");
ASSERT(0);
#endif
log_info("Opaque file descriptors are not supported on Windows\n");
fd = (int)deviceMemory->getHandle(externalMemoryHandleType);
err = check_external_memory_handle_type(
devList[0], CL_EXTERNAL_MEMORY_HANDLE_OPAQUE_FD_KHR);
Expand Down Expand Up @@ -595,10 +605,11 @@ clExternalMemory::clExternalMemory(
throw std::runtime_error("Unsupported external memory type\n ");
}

extMemProperties.push_back((cl_mem_properties)CL_DEVICE_HANDLE_LIST_KHR);
extMemProperties.push_back(
(cl_mem_properties)CL_MEM_DEVICE_HANDLE_LIST_KHR);
extMemProperties.push_back((cl_mem_properties)devList[0]);
extMemProperties.push_back(
(cl_mem_properties)CL_DEVICE_HANDLE_LIST_END_KHR);
(cl_mem_properties)CL_MEM_DEVICE_HANDLE_LIST_END_KHR);
extMemProperties.push_back(0);

m_externalMemory = clCreateBufferWithProperties(
Expand Down Expand Up @@ -691,10 +702,11 @@ clExternalMemoryImage::clExternalMemoryImage(
throw std::runtime_error("getCLImageInfoFromVkImageInfo failed!!!");
}

extMemProperties1.push_back((cl_mem_properties)CL_DEVICE_HANDLE_LIST_KHR);
extMemProperties1.push_back(
(cl_mem_properties)CL_MEM_DEVICE_HANDLE_LIST_KHR);
extMemProperties1.push_back((cl_mem_properties)devList[0]);
extMemProperties1.push_back(
(cl_mem_properties)CL_DEVICE_HANDLE_LIST_END_KHR);
(cl_mem_properties)CL_MEM_DEVICE_HANDLE_LIST_END_KHR);
extMemProperties1.push_back(0);
m_externalMemory = clCreateImageWithProperties(
context, extMemProperties1.data(), CL_MEM_READ_WRITE, &img_format,
Expand Down Expand Up @@ -726,56 +738,19 @@ clExternalMemoryImage::clExternalMemoryImage() {}
// clExternalSemaphore implementation //
//////////////////////////////////////////

clExternalSemaphore::clExternalSemaphore(
const clExternalSemaphore &externalSemaphore)
: m_externalSemaphore(externalSemaphore.m_externalSemaphore)
{}

clExternalSemaphore::clExternalSemaphore(
const VulkanSemaphore &semaphore, cl_context context,
VulkanExternalSemaphoreHandleType externalSemaphoreHandleType,
cl_device_id deviceId)
: m_deviceSemaphore(semaphore)
{

cl_int err = 0;
cl_device_id devList[] = { deviceId, NULL };

switch (externalSemaphoreHandleType)
{
case VULKAN_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD:
if (!is_extension_available(devList[0],
"cl_khr_external_semaphore_opaque_fd"))
{
throw std::runtime_error("Device does not support "
"cl_khr_external_semaphore_opaque_fd "
"extension \n");
}
break;
case VULKAN_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_WIN32_NT:
case VULKAN_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_WIN32_KMT:
case VULKAN_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_WIN32_NT_KMT:
if (!is_extension_available(devList[0],
"cl_khr_external_semaphore_win32"))
{
throw std::runtime_error(
"Device does not support "
"cl_khr_external_semaphore_win32 extension\n");
}
break;
case VULKAN_EXTERNAL_SEMAPHORE_HANDLE_TYPE_SYNC_FD:
if (!is_extension_available(devList[0],
"cl_khr_external_semaphore_sync_fd"))
{
throw std::runtime_error(
"Device does not support cl_khr_external_semaphore_sync_fd "
"extension \n");
}
break;
default:
throw std::runtime_error(
"Unsupported external semaphore handle type\n");
break;
}
m_externalHandleType = externalSemaphoreHandleType;
m_externalSemaphore = nullptr;
m_device = deviceId;
m_context = context;

std::vector<cl_semaphore_properties_khr> sema_props{
(cl_semaphore_properties_khr)CL_SEMAPHORE_TYPE_KHR,
Expand Down Expand Up @@ -846,10 +821,10 @@ clExternalSemaphore::clExternalSemaphore(
}

sema_props.push_back(
(cl_semaphore_properties_khr)CL_DEVICE_HANDLE_LIST_KHR);
(cl_semaphore_properties_khr)CL_SEMAPHORE_DEVICE_HANDLE_LIST_KHR);
sema_props.push_back((cl_semaphore_properties_khr)devList[0]);
sema_props.push_back(
(cl_semaphore_properties_khr)CL_DEVICE_HANDLE_LIST_END_KHR);
(cl_semaphore_properties_khr)CL_SEMAPHORE_DEVICE_HANDLE_LIST_END_KHR);
sema_props.push_back(0);
m_externalSemaphore =
clCreateSemaphoreWithPropertiesKHRptr(context, sema_props.data(), &err);
Expand All @@ -871,16 +846,97 @@ clExternalSemaphore::~clExternalSemaphore() noexcept(false)
}
}

void clExternalSemaphore::signal(cl_command_queue cmd_queue)
int clExternalSemaphore::signal(cl_command_queue cmd_queue)
{
clEnqueueSignalSemaphoresKHRptr(cmd_queue, 1, &m_externalSemaphore, NULL, 0,
NULL, NULL);
int err = clEnqueueSignalSemaphoresKHRptr(
cmd_queue, 1, &m_externalSemaphore, NULL, 0, NULL, nullptr);
if (err != CL_SUCCESS)
{
return err;
}

if (m_externalHandleType == VULKAN_EXTERNAL_SEMAPHORE_HANDLE_TYPE_SYNC_FD)
{
err = clGetSemaphoreHandleForTypeKHRptr(m_externalSemaphore, m_device,
CL_SEMAPHORE_HANDLE_SYNC_FD_KHR,
sizeof(int), &fd, nullptr);
if (err != CL_SUCCESS)
{
log_error("Failed to export fd from semaphore\n");
return err;
}

VkImportSemaphoreFdInfoKHR import = {};
import.sType = VK_STRUCTURE_TYPE_IMPORT_SEMAPHORE_FD_INFO_KHR;
import.semaphore = m_deviceSemaphore;
import.fd = fd;
import.pNext = nullptr;
import.handleType = VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_SYNC_FD_BIT_KHR;
import.flags = 0;

VkResult res =
vkImportSemaphoreFdKHR(m_deviceSemaphore.getDevice(), &import);
ASSERT(res == VK_SUCCESS);
if (res != VK_SUCCESS)
{
err = CL_INVALID_OPERATION;
}
}

return err;
}

void clExternalSemaphore::wait(cl_command_queue cmd_queue)
int clExternalSemaphore::wait(cl_command_queue cmd_queue)
{
clEnqueueWaitSemaphoresKHRptr(cmd_queue, 1, &m_externalSemaphore, NULL, 0,
NULL, NULL);
int err = CL_SUCCESS;
if (m_externalHandleType == VULKAN_EXTERNAL_SEMAPHORE_HANDLE_TYPE_SYNC_FD)
{
cl_int err = 0;
cl_device_id devList[] = { m_device, NULL };
std::vector<cl_semaphore_properties_khr> sema_props{
(cl_semaphore_properties_khr)CL_SEMAPHORE_TYPE_KHR,
(cl_semaphore_properties_khr)CL_SEMAPHORE_TYPE_BINARY_KHR,
};
fd = (int)m_deviceSemaphore.getHandle(m_externalHandleType);

err = check_external_semaphore_handle_type(
devList[0], CL_SEMAPHORE_HANDLE_SYNC_FD_KHR);
if (CL_SUCCESS != err)
{
log_error("CL_SEMAPHORE_HANDLE_SYNC_FD_KHR not supported\n");
return err;
}

sema_props.push_back(
(cl_semaphore_properties_khr)CL_SEMAPHORE_HANDLE_SYNC_FD_KHR);
sema_props.push_back((cl_semaphore_properties_khr)fd);

sema_props.push_back(0);

if (m_externalSemaphore)
{
err = clReleaseSemaphoreKHRptr(m_externalSemaphore);
if (err != CL_SUCCESS)
{
log_error("Failed to release CL external semaphore\n");
return err;
}
m_externalSemaphore = nullptr;
}

m_externalSemaphore = clCreateSemaphoreWithPropertiesKHRptr(
m_context, sema_props.data(), &err);
if (CL_SUCCESS != err)
{
log_error("clCreateSemaphoreWithPropertiesKHRptr failed with %d\n",
err);
return err;
}
}

err = clEnqueueWaitSemaphoresKHRptr(cmd_queue, 1, &m_externalSemaphore,
NULL, 0, NULL, NULL);
return err;
}

cl_semaphore_khr &clExternalSemaphore::getCLSemaphore()
Expand Down
Loading

0 comments on commit ac328eb

Please sign in to comment.