diff --git a/test_common/harness/rounding_mode.h b/test_common/harness/rounding_mode.h index 6f52f0a00b..bdc6bb98e1 100644 --- a/test_common/harness/rounding_mode.h +++ b/test_common/harness/rounding_mode.h @@ -42,10 +42,11 @@ typedef enum kshort = 3, kuint = 4, kint = 5, - kfloat = 6, - kdouble = 7, - kulong = 8, - klong = 9, + khalf = 6, + kfloat = 7, + kdouble = 8, + kulong = 9, + klong = 10, // This goes last kTypeCount diff --git a/test_conformance/conversions/basic_test_conversions.cpp b/test_conformance/conversions/basic_test_conversions.cpp index 1020638af9..b5f59deab8 100644 --- a/test_conformance/conversions/basic_test_conversions.cpp +++ b/test_conformance/conversions/basic_test_conversions.cpp @@ -48,6 +48,7 @@ #include #include +#include #include "basic_test_conversions.h" @@ -86,9 +87,13 @@ int gWimpyReductionFactor = 128; int gSkipTesting = 0; int gForceFTZ = 0; int gIsRTZ = 0; +int gForceHalfFTZ = 0; +int gIsHalfRTZ = 0; uint32_t gSimdSize = 1; int gHasDouble = 0; int gTestDouble = 1; +int gHasHalfs = 0; +int gTestHalfs = 1; const char *sizeNames[] = { "", "", "2", "3", "4", "8", "16" }; int vectorSizes[] = { 1, 1, 2, 3, 4, 8, 16 }; int gMinVectorSize = 0; @@ -100,6 +105,8 @@ int argCount = 0; double SubtractTime(uint64_t endTime, uint64_t startTime); +cl_half_rounding_mode DataInitInfo::halfRoundingMode = CL_HALF_RTE; +cl_half_rounding_mode ConversionsTest::defaultHalfRoundingMode = CL_HALF_RTE; // clang-format off // for readability sake keep this section unformatted @@ -256,8 +263,30 @@ std::vector DataInitInfo::specialValuesDouble = { MAKE_HEX_DOUBLE(0x1.fffffffefffffp62, 0x1fffffffefffffLL, 10), MAKE_HEX_DOUBLE(0x1.ffffffffp62, 0x1ffffffffLL, 30), MAKE_HEX_DOUBLE(0x1.ffffffff00001p62, 0x1ffffffff00001LL, 10), }; -// clang-format on +// A table of more difficult cases to get right +std::vector DataInitInfo::specialValuesHalf = { + 0xffff, + 0x0000, + 0x0001, + 0x7c00, /*INFINITY*/ + 0xfc00, /*-INFINITY*/ + 0x8000, /*-0*/ + 0x7bff, /*HALF_MAX*/ + 0x0400, /*HALF_MIN*/ + 0x03ff, /* Largest denormal */ + 0x3c00, /* 1 */ + 0xbc00, /* -1 */ + 0x3555, /*nearest value to 1/3*/ + 0x3bff, /*largest number less than one*/ + 0xc000, /* -2 */ + 0xfbff, /* -HALF_MAX */ + 0x8400, /* -HALF_MIN */ + 0x4248, /* M_PI_H */ + 0xc248, /* -M_PI_H */ + 0xbbff, /* Largest negative fraction */ +}; +// clang-format on // Windows (since long double got deprecated) sets the x87 to 53-bit precision // (that's x87 default state). This causes problems with the tests that @@ -282,15 +311,32 @@ static inline void Force64BitFPUPrecision(void) #endif } - -template -int CalcRefValsPat::check_result(void *test, uint32_t count, - int vectorSize) +template +int CalcRefValsPat::check_result(void *test, + uint32_t count, + int vectorSize) { const cl_uchar *a = (const cl_uchar *)gAllowZ; - if (std::is_integral::value) - { // char/uchar/short/ushort/int/uint/long/ulong + if (is_half()) + { + const cl_half *t = (const cl_half *)test; + const cl_half *c = (const cl_half *)gRef; + + for (uint32_t i = 0; i < count; i++) + if (t[i] != c[i] && + // Allow nan's to be binary different + !((t[i] & 0x7fff) > 0x7C00 && (c[i] & 0x7fff) > 0x7C00) + && !(a[i] != (cl_uchar)0 && t[i] == (c[i] & 0x8000))) + { + vlog( + "\nError for vector size %d found at 0x%8.8x: *%a vs %a\n", + vectorSize, i, HTF(c[i]), HTF(t[i])); + return i + 1; + } + } + else if (std::is_integral::value) + { // char/uchar/short/ushort/half/int/uint/long/ulong const OutType *t = (const OutType *)test; const OutType *c = (const OutType *)gRef; for (uint32_t i = 0; i < count; i++) @@ -388,6 +434,20 @@ cl_int CustomConversionsTest::Run() continue; } + // skip half if we don't have it + if (!gTestHalfs && (inType == khalf || outType == khalf)) + { + if (gHasHalfs) + { + vlog_error("\t *** convert_%sn%s%s( %sn ) FAILED ** \n", + gTypeNames[outType], gSaturationNames[sat], + gRoundingModeNames[round], gTypeNames[inType]); + vlog("\t\tcl_khr_fp16 enabled, but half testing turned " + "off.\n"); + } + continue; + } + // skip longs on embedded if (!gHasLong && (inType == klong || outType == klong || inType == kulong @@ -427,8 +487,8 @@ ConversionsTest::ConversionsTest(cl_device_id device, cl_context context, cl_command_queue queue) : context(context), device(device), queue(queue), num_elements(0), typeIterator({ cl_uchar(0), cl_char(0), cl_ushort(0), cl_short(0), - cl_uint(0), cl_int(0), cl_float(0), cl_double(0), - cl_ulong(0), cl_long(0) }) + cl_uint(0), cl_int(0), cl_half(0), cl_float(0), + cl_double(0), cl_ulong(0), cl_long(0) }) {} @@ -445,11 +505,31 @@ cl_int ConversionsTest::Run() cl_int ConversionsTest::SetUp(int elements) { num_elements = elements; + if (is_extension_available(device, "cl_khr_fp16")) + { + const cl_device_fp_config fpConfigHalf = + get_default_rounding_mode(device, CL_DEVICE_HALF_FP_CONFIG); + if ((fpConfigHalf & CL_FP_ROUND_TO_NEAREST) != 0) + { + DataInitInfo::halfRoundingMode = CL_HALF_RTE; + ConversionsTest::defaultHalfRoundingMode = CL_HALF_RTE; + } + else if ((fpConfigHalf & CL_FP_ROUND_TO_ZERO) != 0) + { + DataInitInfo::halfRoundingMode = CL_HALF_RTZ; + ConversionsTest::defaultHalfRoundingMode = CL_HALF_RTZ; + } + else + { + log_error("Error while acquiring half rounding mode"); + return TEST_FAIL; + } + } + return CL_SUCCESS; } - -template +template void ConversionsTest::TestTypesConversion(const Type &inType, const Type &outType, int &testNumber, int startMinVectorSize) @@ -470,7 +550,8 @@ void ConversionsTest::TestTypesConversion(const Type &inType, sat = (SaturationMode)(sat + 1)) { // skip illegal saturated conversions to float type - if (kSaturated == sat && (outType == kfloat || outType == kdouble)) + if (kSaturated == sat + && (outType == kfloat || outType == kdouble || outType == khalf)) { continue; } @@ -507,6 +588,20 @@ void ConversionsTest::TestTypesConversion(const Type &inType, continue; } + // skip half if we don't have it + if (!gTestHalfs && (inType == khalf || outType == khalf)) + { + if (gHasHalfs) + { + vlog_error("\t *** convert_%sn%s%s( %sn ) FAILED ** \n", + gTypeNames[outType], gSaturationNames[sat], + gRoundingModeNames[round], gTypeNames[inType]); + vlog("\t\tcl_khr_fp16 enabled, but half testing turned " + "off.\n"); + } + continue; + } + // Skip the implicit converts if the rounding mode is // not default or test is saturated if (0 == startMinVectorSize) @@ -517,7 +612,8 @@ void ConversionsTest::TestTypesConversion(const Type &inType, gMinVectorSize = 0; } - if ((error = DoTest(outType, inType, sat, round))) + if ((error = DoTest(outType, inType, + sat, round))) { vlog_error("\t *** %d) convert_%sn%s%s( %sn ) " "FAILED ** \n", @@ -529,8 +625,7 @@ void ConversionsTest::TestTypesConversion(const Type &inType, } } - -template +template int ConversionsTest::DoTest(Type outType, Type inType, SaturationMode sat, RoundingMode round) { @@ -541,7 +636,7 @@ int ConversionsTest::DoTest(Type outType, Type inType, SaturationMode sat, cl_uint threads = GetThreadCount(); DataInitInfo info = { 0, 0, outType, inType, sat, round, threads }; - DataInfoSpec init_info(info); + DataInfoSpec init_info(info); WriteInputBufferInfo writeInputBufferInfo; int vectorSize; int error = 0; @@ -564,7 +659,7 @@ int ConversionsTest::DoTest(Type outType, Type inType, SaturationMode sat, for (vectorSize = gMinVectorSize; vectorSize < gMaxVectorSize; vectorSize++) { writeInputBufferInfo.calcInfo[vectorSize].reset( - new CalcRefValsPat()); + new CalcRefValsPat()); writeInputBufferInfo.calcInfo[vectorSize]->program = conv_test::MakeProgram( outType, inType, sat, round, vectorSize, @@ -597,6 +692,11 @@ int ConversionsTest::DoTest(Type outType, Type inType, SaturationMode sat, if (round == kDefaultRoundingMode && gIsRTZ) init_info.round = round = kRoundTowardZero; } + else if (std::is_same::value && OutFP) + { + if (round == kDefaultRoundingMode && gIsHalfRTZ) + init_info.round = round = kRoundTowardZero; + } // Figure out how many elements are in a work block // we handle 64-bit types a bit differently. @@ -764,6 +864,10 @@ int ConversionsTest::DoTest(Type outType, Type inType, SaturationMode sat, vlog("Input value: 0x%8.8x ", ((unsigned int *)gIn)[error - 1]); break; + case khalf: + vlog("Input value: %a ", + HTF(((cl_half *)gIn)[error - 1])); + break; case kfloat: vlog("Input value: %a ", ((float *)gIn)[error - 1]); break; @@ -901,8 +1005,6 @@ double SubtractTime(uint64_t endTime, uint64_t startTime) } #endif -//////////////////////////////////////////////////////////////////////////////// - static void setAllowZ(uint8_t *allow, uint32_t *x, cl_uint count) { cl_uint i; @@ -951,6 +1053,112 @@ void MapResultValuesComplete(const std::unique_ptr &info) // destroyed automatically soon after we exit. } +template static bool isnan_fp(const T &v) +{ + if (std::is_same::value) + { + uint16_t h_exp = (((cl_half)v) >> (CL_HALF_MANT_DIG - 1)) & 0x1F; + uint16_t h_mant = ((cl_half)v) & 0x3FF; + return (h_exp == 0x1F && h_mant != 0); + } + else + { +#if !defined(_WIN32) + return std::isnan(v); +#else + return _isnan(v); +#endif + } +} + +template +void ZeroNanToIntCases(cl_uint count, void *mapped, Type outType) +{ + InType *inp = (InType *)gIn; + for (auto j = 0; j < count; j++) + { + if (isnan_fp(inp[j])) + memset((char *)mapped + j * gTypeSizes[outType], 0, + gTypeSizes[outType]); + } +} + +template +void FixNanToFltConversions(InType *inp, OutType *outp, cl_uint count) +{ + if (std::is_same::value) + { + for (auto j = 0; j < count; j++) + if (isnan_fp(inp[j]) && isnan_fp(outp[j])) + outp[j] = 0x7e00; // HALF_NAN + } + else + { + for (auto j = 0; j < count; j++) + if (isnan_fp(inp[j]) && isnan_fp(outp[j])) outp[j] = NAN; + } +} + +void FixNanConversions(Type outType, Type inType, void *d, cl_uint count) +{ + if (outType != kfloat && outType != kdouble && outType != khalf) + { + if (inType == kfloat) + ZeroNanToIntCases(count, d, outType); + else if (inType == kdouble) + ZeroNanToIntCases(count, d, outType); + else if (inType == khalf) + ZeroNanToIntCases(count, d, outType); + } + else if (inType == kfloat || inType == kdouble || inType == khalf) + { + // outtype and intype is float or double or half. NaN conversions for + // float/double/half could be any NaN + if (inType == kfloat) + { + float *inp = (float *)gIn; + if (outType == kdouble) + { + double *outp = (double *)d; + FixNanToFltConversions(inp, outp, count); + } + else if (outType == khalf) + { + cl_half *outp = (cl_half *)d; + FixNanToFltConversions(inp, outp, count); + } + } + else if (inType == kdouble) + { + double *inp = (double *)gIn; + if (outType == kfloat) + { + float *outp = (float *)d; + FixNanToFltConversions(inp, outp, count); + } + else if (outType == khalf) + { + cl_half *outp = (cl_half *)d; + FixNanToFltConversions(inp, outp, count); + } + } + else if (inType == khalf) + { + cl_half *inp = (cl_half *)gIn; + if (outType == kfloat) + { + float *outp = (float *)d; + FixNanToFltConversions(inp, outp, count); + } + else if (outType == kdouble) + { + double *outp = (double *)d; + FixNanToFltConversions(inp, outp, count); + } + } + } +} + void CL_CALLBACK CalcReferenceValuesComplete(cl_event e, cl_int status, void *data) @@ -963,7 +1171,6 @@ void CL_CALLBACK CalcReferenceValuesComplete(cl_event e, cl_int status, Type outType = info->parent->outType; // the data type of the conversion result Type inType = info->parent->inType; // the data type of the conversion input - size_t j; cl_int error; cl_event doneBarrier = info->parent->doneBarrier; @@ -985,51 +1192,7 @@ void CL_CALLBACK CalcReferenceValuesComplete(cl_event e, cl_int status, // Patch up NaNs conversions to integer to zero -- these can be converted to // any integer - if (outType != kfloat && outType != kdouble) - { - if (inType == kfloat) - { - float *inp = (float *)gIn; - for (j = 0; j < count; j++) - { - if (isnan(inp[j])) - memset((char *)mapped + j * gTypeSizes[outType], 0, - gTypeSizes[outType]); - } - } - if (inType == kdouble) - { - double *inp = (double *)gIn; - for (j = 0; j < count; j++) - { - if (isnan(inp[j])) - memset((char *)mapped + j * gTypeSizes[outType], 0, - gTypeSizes[outType]); - } - } - } - else if (inType == kfloat || inType == kdouble) - { // outtype and intype is float or double. NaN conversions for float <-> - // double can be any NaN - if (inType == kfloat && outType == kdouble) - { - float *inp = (float *)gIn; - double *outp = (double *)mapped; - for (j = 0; j < count; j++) - { - if (isnan(inp[j]) && isnan(outp[j])) outp[j] = NAN; - } - } - if (inType == kdouble && outType == kfloat) - { - double *inp = (double *)gIn; - float *outp = (float *)mapped; - for (j = 0; j < count; j++) - { - if (isnan(inp[j]) && isnan(outp[j])) outp[j] = NAN; - } - } - } + FixNanConversions(outType, inType, mapped, count); if (memcmp(mapped, gRef, count * gTypeSizes[outType])) info->result = @@ -1077,12 +1240,8 @@ void CL_CALLBACK CalcReferenceValuesComplete(cl_event e, cl_int status, // CalcReferenceValuesComplete exit. } -// - namespace conv_test { -//////////////////////////////////////////////////////////////////////////////// - cl_int InitData(cl_uint job_id, cl_uint thread_id, void *p) { DataInitBase *info = (DataInitBase *)p; @@ -1092,8 +1251,6 @@ cl_int InitData(cl_uint job_id, cl_uint thread_id, void *p) return CL_SUCCESS; } -//////////////////////////////////////////////////////////////////////////////// - cl_int PrepareReference(cl_uint job_id, cl_uint thread_id, void *p) { DataInitBase *info = (DataInitBase *)p; @@ -1102,7 +1259,6 @@ cl_int PrepareReference(cl_uint job_id, cl_uint thread_id, void *p) Type inType = info->inType; Type outType = info->outType; RoundingMode round = info->round; - size_t j; Force64BitFPUPrecision(); @@ -1110,7 +1266,6 @@ cl_int PrepareReference(cl_uint job_id, cl_uint thread_id, void *p) void *a = (cl_uchar *)gAllowZ + job_id * count; void *d = (cl_uchar *)gRef + job_id * count * gTypeSizes[info->outType]; - if (outType != inType) { // create the reference while we wait @@ -1144,7 +1299,33 @@ cl_int PrepareReference(cl_uint job_id, cl_uint thread_id, void *p) qcom_sat = info->sat; #endif - RoundingMode oldRound = set_round(round, outType); + RoundingMode oldRound; + if (outType == khalf) + { + oldRound = set_round(kRoundToNearestEven, kfloat); + switch (round) + { + default: + case kDefaultRoundingMode: + DataInitInfo::halfRoundingMode = + ConversionsTest::defaultHalfRoundingMode; + break; + case kRoundToNearestEven: + DataInitInfo::halfRoundingMode = CL_HALF_RTE; + break; + case kRoundUp: + DataInitInfo::halfRoundingMode = CL_HALF_RTP; + break; + case kRoundDown: + DataInitInfo::halfRoundingMode = CL_HALF_RTN; + break; + case kRoundTowardZero: + DataInitInfo::halfRoundingMode = CL_HALF_RTZ; + break; + } + } + else + oldRound = set_round(round, outType); if (info->sat) info->conv_array_sat(d, s, count); @@ -1161,6 +1342,11 @@ cl_int PrepareReference(cl_uint job_id, cl_uint thread_id, void *p) if (inType == kfloat || outType == kfloat) setAllowZ((uint8_t *)a, (uint32_t *)s, count); } + if (gForceHalfFTZ) + { + if (inType == khalf || outType == khalf) + setAllowZ((uint8_t *)a, (uint32_t *)s, count); + } } else { @@ -1170,55 +1356,11 @@ cl_int PrepareReference(cl_uint job_id, cl_uint thread_id, void *p) // Patch up NaNs conversions to integer to zero -- these can be converted to // any integer - if (info->outType != kfloat && info->outType != kdouble) - { - if (inType == kfloat) - { - float *inp = (float *)s; - for (j = 0; j < count; j++) - { - if (isnan(inp[j])) - memset((char *)d + j * gTypeSizes[outType], 0, - gTypeSizes[outType]); - } - } - if (inType == kdouble) - { - double *inp = (double *)s; - for (j = 0; j < count; j++) - { - if (isnan(inp[j])) - memset((char *)d + j * gTypeSizes[outType], 0, - gTypeSizes[outType]); - } - } - } - else if (inType == kfloat || inType == kdouble) - { // outtype and intype is float or double. NaN conversions for float <-> - // double can be any NaN - if (inType == kfloat && outType == kdouble) - { - float *inp = (float *)s; - for (j = 0; j < count; j++) - { - if (isnan(inp[j])) ((double *)d)[j] = NAN; - } - } - if (inType == kdouble && outType == kfloat) - { - double *inp = (double *)s; - for (j = 0; j < count; j++) - { - if (isnan(inp[j])) ((float *)d)[j] = NAN; - } - } - } + FixNanConversions(outType, inType, d, count); return CL_SUCCESS; } -//////////////////////////////////////////////////////////////////////////////// - uint64_t GetTime(void) { #if defined(__APPLE__) @@ -1233,8 +1375,6 @@ uint64_t GetTime(void) #endif } -//////////////////////////////////////////////////////////////////////////////// - // Note: not called reentrantly void WriteInputBufferComplete(void *data) { @@ -1295,8 +1435,6 @@ void WriteInputBufferComplete(void *data) // automatically soon after we exit. } -//////////////////////////////////////////////////////////////////////////////// - cl_program MakeProgram(Type outType, Type inType, SaturationMode sat, RoundingMode round, int vectorSize, cl_kernel *outKernel) { @@ -1308,6 +1446,9 @@ cl_program MakeProgram(Type outType, Type inType, SaturationMode sat, if (outType == kdouble || inType == kdouble) source << "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n"; + if (outType == khalf || inType == khalf) + source << "#pragma OPENCL EXTENSION cl_khr_fp16 : enable\n"; + // Create the program. This is a bit complicated because we are trying to // avoid byte and short stores. if (0 == vectorSize) @@ -1408,7 +1549,7 @@ cl_program MakeProgram(Type outType, Type inType, SaturationMode sat, *outKernel = NULL; const char *flags = NULL; - if (gForceFTZ) flags = "-cl-denorms-are-zero"; + if (gForceFTZ || gForceHalfFTZ) flags = "-cl-denorms-are-zero"; // build it std::string sourceString = source.str(); diff --git a/test_conformance/conversions/basic_test_conversions.h b/test_conformance/conversions/basic_test_conversions.h index 2314ee748b..6fe88461db 100644 --- a/test_conformance/conversions/basic_test_conversions.h +++ b/test_conformance/conversions/basic_test_conversions.h @@ -30,6 +30,8 @@ #include #endif +#include + #include "harness/mt19937.h" #include "harness/testHarness.h" #include "harness/typeWrappers.h" @@ -76,6 +78,8 @@ extern cl_mem gInBuffer; extern cl_mem gOutBuffers[]; extern int gHasDouble; extern int gTestDouble; +extern int gHasHalfs; +extern int gTestHalfs; extern int gWimpyMode; extern int gWimpyReductionFactor; extern int gSkipTesting; @@ -87,6 +91,8 @@ extern int gReportAverageTimes; extern int gStartTestNumber; extern int gEndTestNumber; extern int gIsRTZ; +extern int gForceHalfFTZ; +extern int gIsHalfRTZ; extern void *gIn; extern void *gRef; extern void *gAllowZ; @@ -135,7 +141,7 @@ struct CalcRefValsBase cl_int result; }; -template +template struct CalcRefValsPat : CalcRefValsBase { int check_result(void *, uint32_t, int) override; @@ -162,8 +168,12 @@ struct WriteInputBufferInfo }; // Must be aligned with Type enums! -using TypeIter = std::tuple; +using TypeIter = + std::tuple; + +// hardcoded solution needed due to typeid confusing cl_ushort/cl_half +constexpr bool isTypeFp[] = { 0, 0, 0, 0, 0, 0, 1, 1, 1, 0, 0 }; // Helper test fixture for constructing OpenCL objects used in testing // a variety of simple command-buffer enqueue scenarios. @@ -179,13 +189,13 @@ struct ConversionsTest // Test body returning an OpenCL error code cl_int Run(); - template + template int DoTest(Type outType, Type inType, SaturationMode sat, RoundingMode round); - template + template void TestTypesConversion(const Type &inType, const Type &outType, int &tn, - const int smvs); + int startMinVectorSize); protected: cl_context context; @@ -195,6 +205,9 @@ struct ConversionsTest size_t num_elements; TypeIter typeIterator; + +public: + static cl_half_rounding_mode defaultHalfRoundingMode; }; struct CustomConversionsTest : ConversionsTest @@ -221,17 +234,18 @@ int MakeAndRunTest(cl_device_id device, cl_context context, struct TestType { - template bool testType(Type in) + template bool testType(Type in) { switch (in) { default: return false; case kuchar: return std::is_same::value; case kchar: return std::is_same::value; - case kushort: return std::is_same::value; + case kushort: return std::is_same::value && !FP; case kshort: return std::is_same::value; case kuint: return std::is_same::value; case kint: return std::is_same::value; + case khalf: return std::is_same::value && FP; case kfloat: return std::is_same::value; case kdouble: return std::is_same::value; case kulong: return std::is_same::value; @@ -263,13 +277,15 @@ struct IterOverTypes : public TestType typename InType> void iterate_in_type(const InType &t) { - if (!testType(inType)) vlog_error("Unexpected data type!\n"); + if (!testType(inType)) + vlog_error("Unexpected data type!\n"); - if (!testType(outType)) vlog_error("Unexpected data type!\n"); + if (!testType(outType)) + vlog_error("Unexpected data type!\n"); // run the conversions - test.TestTypesConversion(inType, outType, testNumber, - startMinVectorSize); + test.TestTypesConversion( + inType, outType, testNumber, startMinVectorSize); inType = (Type)(inType + 1); } @@ -337,11 +353,13 @@ struct IterOverSelectedTypes : public TestType typename InType> void iterate_in_type(const InType &t) { - if (testType(inType) && testType(outType)) + if (testType(inType) + && testType(outType)) { // run selected conversion // testing of the result will happen afterwards - test.DoTest(outType, inType, saturation, rounding); + test.DoTest( + outType, inType, saturation, rounding); } } diff --git a/test_conformance/conversions/conversions_data_info.h b/test_conformance/conversions/conversions_data_info.h index c62d11ae75..043c509d1f 100644 --- a/test_conformance/conversions/conversions_data_info.h +++ b/test_conformance/conversions/conversions_data_info.h @@ -28,8 +28,11 @@ extern bool qcom_sat; extern roundingMode qcom_rm; #endif +#include + #include "harness/mt19937.h" #include "harness/rounding_mode.h" +#include "harness/typeWrappers.h" #include @@ -60,11 +63,17 @@ struct DataInitInfo RoundingMode round; cl_uint threads; + static cl_half_rounding_mode halfRoundingMode; static std::vector specialValuesUInt; static std::vector specialValuesFloat; static std::vector specialValuesDouble; + static std::vector specialValuesHalf; }; +#define HFF(num) cl_half_from_float(num, DataInitInfo::halfRoundingMode) +#define HTF(num) cl_half_to_float(num) +#define HFD(num) cl_half_from_double(num, DataInitInfo::halfRoundingMode) + struct DataInitBase : public DataInitInfo { virtual ~DataInitBase() = default; @@ -75,7 +84,7 @@ struct DataInitBase : public DataInitInfo virtual void init(const cl_uint &, const cl_uint &) {} }; -template +template struct DataInfoSpec : public DataInitBase { explicit DataInfoSpec(const DataInitInfo &agg); @@ -98,6 +107,16 @@ struct DataInfoSpec : public DataInitBase std::vector mdv; + constexpr bool is_in_half() const + { + return (std::is_same::value && InFP); + } + + constexpr bool is_out_half() const + { + return (std::is_same::value && OutFP); + } + void conv_array(void *out, void *in, size_t n) override { for (size_t i = 0; i < n; i++) @@ -125,19 +144,22 @@ struct DataInfoSpec : public DataInitBase } }; -template -DataInfoSpec::DataInfoSpec(const DataInitInfo &agg) +template +DataInfoSpec::DataInfoSpec( + const DataInitInfo &agg) : DataInitBase(agg), mdv(0) { if (std::is_same::value) ranges = std::make_pair(CL_FLT_MIN, CL_FLT_MAX); else if (std::is_same::value) ranges = std::make_pair(CL_DBL_MIN, CL_DBL_MAX); + else if (std::is_same::value && OutFP) + ranges = std::make_pair(HFF(CL_HALF_MIN), HFF(CL_HALF_MAX)); else if (std::is_same::value) ranges = std::make_pair(0, CL_UCHAR_MAX); else if (std::is_same::value) ranges = std::make_pair(CL_CHAR_MIN, CL_CHAR_MAX); - else if (std::is_same::value) + else if (std::is_same::value && !OutFP) ranges = std::make_pair(0, CL_USHRT_MAX); else if (std::is_same::value) ranges = std::make_pair(CL_SHRT_MIN, CL_SHRT_MAX); @@ -158,12 +180,12 @@ DataInfoSpec::DataInfoSpec(const DataInitInfo &agg) InType outMax = static_cast(ranges.second); InType eps = std::is_same::value ? (InType) FLT_EPSILON : (InType) DBL_EPSILON; - if (std::is_integral::value) + if (std::is_integral::value && !OutFP) { // to char/uchar/short/ushort/int/uint/long/ulong if (sizeof(OutType)<=sizeof(cl_short)) { // to char/uchar/short/ushort clamp_ranges= - {{outMin-0.5f, outMax + 0.5f - outMax * 0.5f * eps}, + {{outMin-0.5f, outMax + 0.5f - outMax * 0.5f * eps}, {outMin-0.5f, outMax + 0.5f - outMax * 0.5f * eps}, {outMin-1.0f+(std::is_signed::value?outMax:0.5f)*eps, outMax-1.f}, {outMin-0.0f, outMax - outMax * 0.5f * eps }, @@ -249,11 +271,55 @@ DataInfoSpec::DataInfoSpec(const DataInitInfo &agg) } } } + else if (is_in_half()) + { + float outMin = static_cast(ranges.first); + float outMax = static_cast(ranges.second); + float eps = CL_HALF_EPSILON; + cl_half_rounding_mode prev_half_round = DataInitInfo::halfRoundingMode; + DataInitInfo::halfRoundingMode = CL_HALF_RTZ; + + if (std::is_integral::value) + { // to char/uchar/short/ushort/int/uint/long/ulong + if (sizeof(OutType)<=sizeof(cl_char) || std::is_same::value) + { // to char/uchar + clamp_ranges= + {{HFF(outMin-0.5f), HFF(outMax + 0.5f - outMax * 0.5f * eps)}, + {HFF(outMin-0.5f), HFF(outMax + 0.5f - outMax * 0.5f * eps)}, + {HFF(outMin-1.0f+(std::is_signed::value?outMax:0.5f)*eps), HFF(outMax-1.f)}, + {HFF(outMin-0.0f), HFF(outMax - outMax * 0.5f * eps) }, + {HFF(outMin-1.0f+(std::is_signed::value?outMax:0.5f)*eps), HFF(outMax - outMax * 0.5f * eps)}}; + } + else + { // to ushort/int/uint/long/ulong + if (std::is_signed::value) + { + clamp_ranges= + { {HFF(-CL_HALF_MAX), HFF(CL_HALF_MAX)}, + {HFF(-CL_HALF_MAX), HFF(CL_HALF_MAX)}, + {HFF(-CL_HALF_MAX), HFF(CL_HALF_MAX)}, + {HFF(-CL_HALF_MAX), HFF(CL_HALF_MAX)}, + {HFF(-CL_HALF_MAX), HFF(CL_HALF_MAX)}}; + } + else + { + clamp_ranges= + { {HFF(outMin), HFF(CL_HALF_MAX)}, + {HFF(outMin), HFF(CL_HALF_MAX)}, + {HFF(outMin), HFF(CL_HALF_MAX)}, + {HFF(outMin), HFF(CL_HALF_MAX)}, + {HFF(outMin), HFF(CL_HALF_MAX)}}; + } + } + } + + DataInitInfo::halfRoundingMode = prev_half_round; + } // clang-format on } -template -float DataInfoSpec::round_to_int(float f) +template +float DataInfoSpec::round_to_int(float f) { static const float magic[2] = { MAKE_HEX_FLOAT(0x1.0p23f, 0x1, 23), -MAKE_HEX_FLOAT(0x1.0p23f, 0x1, 23) }; @@ -281,8 +347,9 @@ float DataInfoSpec::round_to_int(float f) return f; } -template -long long DataInfoSpec::round_to_int_and_clamp(double f) +template +long long +DataInfoSpec::round_to_int_and_clamp(double f) { static const double magic[2] = { MAKE_HEX_DOUBLE(0x1.0p52, 0x1LL, 52), MAKE_HEX_DOUBLE(-0x1.0p52, -0x1LL, 52) }; @@ -313,8 +380,8 @@ long long DataInfoSpec::round_to_int_and_clamp(double f) return (long long)f; } -template -OutType DataInfoSpec::absolute(const OutType &x) +template +OutType DataInfoSpec::absolute(const OutType &x) { union { cl_uint u; @@ -331,17 +398,30 @@ OutType DataInfoSpec::absolute(const OutType &x) return u.f; } -template -void DataInfoSpec::conv(OutType *out, InType *in) +template constexpr bool is_half() +{ + return (std::is_same::value && fp); +} + +template +void DataInfoSpec::conv(OutType *out, InType *in) { - if (std::is_same::value) + if (std::is_same::value || is_in_half()) { cl_float inVal = *in; + if (std::is_same::value) + { + inVal = HTF(*in); + } if (std::is_floating_point::value) { *out = (OutType)inVal; } + else if (is_out_half()) + { + *out = HFF(*in); + } else if (std::is_same::value) { #if defined(_MSC_VER) && (defined(_M_IX86) || defined(_M_X64)) @@ -376,6 +456,8 @@ void DataInfoSpec::conv(OutType *out, InType *in) { if (std::is_same::value) *out = (OutType)*in; + else if (is_out_half()) + *out = static_cast(HFD(*in)); else *out = rint(*in); } @@ -417,7 +499,7 @@ void DataInfoSpec::conv(OutType *out, InType *in) *out = (vi == 0 ? 0.0 : static_cast(vi)); #endif } - else if (std::is_same::value) + else if (std::is_same::value || is_out_half()) { cl_float outVal = 0.f; @@ -463,7 +545,9 @@ void DataInfoSpec::conv(OutType *out, InType *in) #endif #endif - *out = outVal; + *out = std::is_same::value + ? static_cast(HFF(outVal)) + : outVal; } else { @@ -484,6 +568,8 @@ void DataInfoSpec::conv(OutType *out, InType *in) // Per IEEE-754-2008 5.4.1, 0 always converts to +0.0 *out = (*in == 0 ? 0.0 : *in); } + else if (is_out_half()) + *out = static_cast(HFF(*in == 0 ? 0.f : *in)); else { *out = (OutType)*in; @@ -494,19 +580,26 @@ void DataInfoSpec::conv(OutType *out, InType *in) #define CLAMP(_lo, _x, _hi) \ ((_x) < (_lo) ? (_lo) : ((_x) > (_hi) ? (_hi) : (_x))) -template -void DataInfoSpec::conv_sat(OutType *out, InType *in) +template +void DataInfoSpec::conv_sat(OutType *out, + InType *in) { - if (std::is_floating_point::value) + if (std::is_floating_point::value || is_in_half()) { - if (std::is_floating_point::value) - { // in float/double, out float/double - *out = (OutType)(*in); + cl_float inVal = *in; + if (is_in_half()) inVal = HTF(*in); + + if (std::is_floating_point::value || is_out_half()) + { // in half/float/double, out half/float/double + if (is_out_half()) + *out = static_cast(HFF(inVal)); + else + *out = (OutType)(is_in_half() ? inVal : *in); } - else if ((std::is_same::value) + else if ((std::is_same::value || is_in_half()) && std::is_same::value) { - cl_float x = round_to_int(*in); + cl_float x = round_to_int(is_in_half() ? HTF(*in) : *in); #if defined(_MSC_VER) && (defined(_M_IX86) || defined(_M_X64)) // VS2005 (at least) on x86 uses fistp to store the float as a @@ -534,47 +627,57 @@ void DataInfoSpec::conv_sat(OutType *out, InType *in) } #else *out = x >= MAKE_HEX_DOUBLE(0x1.0p64, 0x1LL, 64) - ? 0xFFFFFFFFFFFFFFFFULL - : x < 0 ? 0 : (OutType)x; + ? (OutType)0xFFFFFFFFFFFFFFFFULL + : x < 0 ? 0 + : (OutType)x; #endif } - else if ((std::is_same::value) + else if ((std::is_same::value || is_in_half()) && std::is_same::value) { - cl_float f = round_to_int(*in); + cl_float f = round_to_int(is_in_half() ? HTF(*in) : *in); *out = f >= MAKE_HEX_DOUBLE(0x1.0p63, 0x1LL, 63) - ? 0x7FFFFFFFFFFFFFFFULL + ? (OutType)0x7FFFFFFFFFFFFFFFULL : f < MAKE_HEX_DOUBLE(-0x1.0p63, -0x1LL, 63) - ? 0x8000000000000000LL - : (OutType)f; + ? (OutType)0x8000000000000000LL + : (OutType)f; } else if (std::is_same::value && std::is_same::value) { InType f = rint(*in); *out = f >= MAKE_HEX_DOUBLE(0x1.0p64, 0x1LL, 64) - ? 0xFFFFFFFFFFFFFFFFULL - : f < 0 ? 0 : (OutType)f; + ? (OutType)0xFFFFFFFFFFFFFFFFULL + : f < 0 ? 0 + : (OutType)f; } else if (std::is_same::value && std::is_same::value) { InType f = rint(*in); *out = f >= MAKE_HEX_DOUBLE(0x1.0p63, 0x1LL, 63) - ? 0x7FFFFFFFFFFFFFFFULL + ? (OutType)0x7FFFFFFFFFFFFFFFULL : f < MAKE_HEX_DOUBLE(-0x1.0p63, -0x1LL, 63) - ? 0x8000000000000000LL - : (OutType)f; + ? (OutType)0x8000000000000000LL + : (OutType)f; } else - { // in float/double, out char/uchar/short/ushort/int/uint - *out = - CLAMP(ranges.first, round_to_int_and_clamp(*in), ranges.second); + { // in half/float/double, out char/uchar/short/ushort/int/uint + *out = CLAMP(ranges.first, + round_to_int_and_clamp(is_in_half() ? inVal : *in), + ranges.second); } } else if (std::is_integral::value && std::is_integral::value) { + if (is_out_half()) + { + *out = std::is_signed::value + ? static_cast(HFF((cl_float)*in)) + : absolute(static_cast(HFF((cl_float)*in))); + } + else { if ((std::is_signed::value && std::is_signed::value) @@ -612,14 +715,40 @@ void DataInfoSpec::conv_sat(OutType *out, InType *in) } } -template -void DataInfoSpec::init(const cl_uint &job_id, - const cl_uint &thread_id) +template +void DataInfoSpec::init(const cl_uint &job_id, + const cl_uint &thread_id) { uint64_t ulStart = start; void *pIn = (char *)gIn + job_id * size * gTypeSizes[inType]; - if (std::is_integral::value) + if (is_in_half()) + { + cl_half *o = (cl_half *)pIn; + int i; + + if (gIsEmbedded) + for (i = 0; i < size; i++) + o[i] = (cl_half)genrand_int32(mdv[thread_id]); + else + for (i = 0; i < size; i++) o[i] = (cl_half)((i + ulStart) % 0xffff); + + if (0 == ulStart) + { + size_t tableSize = specialValuesHalf.size() + * sizeof(decltype(specialValuesHalf)::value_type); + if (sizeof(InType) * size < tableSize) + tableSize = sizeof(InType) * size; + memcpy((char *)(o + i) - tableSize, &specialValuesHalf.front(), + tableSize); + } + + if (kUnsaturated == sat) + { + for (i = 0; i < size; i++) o[i] = clamp(o[i]); + } + } + else if (std::is_integral::value) { InType *o = (InType *)pIn; if (sizeof(InType) <= sizeof(cl_short)) @@ -774,10 +903,10 @@ void DataInfoSpec::init(const cl_uint &job_id, } } -template -InType DataInfoSpec::clamp(const InType &in) +template +InType DataInfoSpec::clamp(const InType &in) { - if (std::is_integral::value) + if (std::is_integral::value && !OutFP) { if (std::is_same::value) { @@ -789,6 +918,11 @@ InType DataInfoSpec::clamp(const InType &in) return dclamp(clamp_ranges[round].first, in, clamp_ranges[round].second); } + else if (std::is_same::value && InFP) + { + return HFF(fclamp(HTF(clamp_ranges[round].first), HTF(in), + HTF(clamp_ranges[round].second))); + } } return in; } diff --git a/test_conformance/conversions/test_conversions.cpp b/test_conformance/conversions/test_conversions.cpp index dab61dc500..b7d6b07156 100644 --- a/test_conformance/conversions/test_conversions.cpp +++ b/test_conformance/conversions/test_conversions.cpp @@ -73,9 +73,9 @@ static void PrintUsage(void); test_status InitCL(cl_device_id device); -const char *gTypeNames[kTypeCount] = { "uchar", "char", "ushort", "short", - "uint", "int", "float", "double", - "ulong", "long" }; +const char *gTypeNames[kTypeCount] = { "uchar", "char", "ushort", "short", + "uint", "int", "half", "float", + "double", "ulong", "long" }; const char *gRoundingModeNames[kRoundingModeCount] = { "", "_rte", "_rtp", "_rtn", "_rtz" }; @@ -83,9 +83,9 @@ const char *gRoundingModeNames[kRoundingModeCount] = { "", "_rte", "_rtp", const char *gSaturationNames[2] = { "", "_sat" }; size_t gTypeSizes[kTypeCount] = { - sizeof(cl_uchar), sizeof(cl_char), sizeof(cl_ushort), sizeof(cl_short), - sizeof(cl_uint), sizeof(cl_int), sizeof(cl_float), sizeof(cl_double), - sizeof(cl_ulong), sizeof(cl_long), + sizeof(cl_uchar), sizeof(cl_char), sizeof(cl_ushort), sizeof(cl_short), + sizeof(cl_uint), sizeof(cl_int), sizeof(cl_half), sizeof(cl_float), + sizeof(cl_double), sizeof(cl_ulong), sizeof(cl_long), }; char appName[64] = "ctest"; @@ -221,13 +221,17 @@ static int ParseArgs(int argc, const char **argv) switch (*arg) { case 'd': gTestDouble ^= 1; break; + case 'h': gTestHalfs ^= 1; break; case 'l': gSkipTesting ^= 1; break; case 'm': gMultithread ^= 1; break; case 'w': gWimpyMode ^= 1; break; case '[': parseWimpyReductionFactor(arg, gWimpyReductionFactor); break; - case 'z': gForceFTZ ^= 1; break; + case 'z': + gForceFTZ ^= 1; + gForceHalfFTZ ^= 1; + break; case 't': gTimeResults ^= 1; break; case 'a': gReportAverageTimes ^= 1; break; case '1': @@ -355,7 +359,6 @@ static void PrintUsage(void) } - test_status InitCL(cl_device_id device) { int error, i; @@ -412,6 +415,50 @@ test_status InitCL(cl_device_id device) } gTestDouble &= gHasDouble; + if (is_extension_available(device, "cl_khr_fp16")) + { + gHasHalfs = 1; + + cl_device_fp_config floatCapabilities = 0; + if ((error = clGetDeviceInfo(device, CL_DEVICE_HALF_FP_CONFIG, + sizeof(floatCapabilities), + &floatCapabilities, NULL))) + floatCapabilities = 0; + + if (0 == (CL_FP_DENORM & floatCapabilities)) gForceHalfFTZ ^= 1; + + if (0 == (floatCapabilities & CL_FP_ROUND_TO_NEAREST)) + { + char profileStr[128] = ""; + // Verify that we are an embedded profile device + if ((error = clGetDeviceInfo(device, CL_DEVICE_PROFILE, + sizeof(profileStr), profileStr, NULL))) + { + vlog_error("FAILURE: Could not get device profile: error %d\n", + error); + return TEST_FAIL; + } + + if (strcmp(profileStr, "EMBEDDED_PROFILE")) + { + vlog_error( + "FAILURE: non-embedded profile device does not support " + "CL_FP_ROUND_TO_NEAREST\n"); + return TEST_FAIL; + } + + if (0 == (floatCapabilities & CL_FP_ROUND_TO_ZERO)) + { + vlog_error("FAILURE: embedded profile device supports neither " + "CL_FP_ROUND_TO_NEAREST or CL_FP_ROUND_TO_ZERO\n"); + return TEST_FAIL; + } + + gIsHalfRTZ = 1; + } + } + gTestHalfs &= gHasHalfs; + // detect whether profile of the device is embedded char profile[1024] = ""; if ((error = clGetDeviceInfo(device, CL_DEVICE_PROFILE, sizeof(profile), @@ -492,8 +539,12 @@ test_status InitCL(cl_device_id device) vlog("\tSubnormal values supported for floats? %s\n", no_yes[0 != (CL_FP_DENORM & floatCapabilities)]); vlog("\tTesting with FTZ mode ON for floats? %s\n", no_yes[0 != gForceFTZ]); + vlog("\tTesting with FTZ mode ON for halfs? %s\n", + no_yes[0 != gForceHalfFTZ]); vlog("\tTesting with default RTZ mode for floats? %s\n", no_yes[0 != gIsRTZ]); + vlog("\tTesting with default RTZ mode for halfs? %s\n", + no_yes[0 != gIsHalfRTZ]); vlog("\tHas Double? %s\n", no_yes[0 != gHasDouble]); if (gHasDouble) vlog("\tTest Double? %s\n", no_yes[0 != gTestDouble]); vlog("\tHas Long? %s\n", no_yes[0 != gHasLong]); @@ -503,5 +554,3 @@ test_status InitCL(cl_device_id device) vlog("\n"); return TEST_PASS; } - -