Skip to content

Commit

Permalink
[SYCL][libclc] Add generic addrspace overloads of math builtins
Browse files Browse the repository at this point in the history
The generic implementations of the math builtins which take pointer
arguments were using unqualified address spaces. This could either
resolve to the generic address space or the private address space,
depending on whether the target supports the generic address space or
not.

The newer unified OpenCL C specification is clearer in mandating that
all targets must provide overloads on the explicitly qualified
'private' address space, as well as optionally defining ones on the
(unqualified) generic address space. This meant that most of these math
builtins were lacking one overload: either the private or generic one,
depending on which target was compiling the builtins.

One notable exception here is NVIDIA, which maps the private and generic
address spaces to the same target address space. Thus declaring builtins
overloaded on these two address spaces results in a mangling clash,
which we can't have. Therefore we now say that NVIDIA targets don't
support the generic address space for the purposes of these builtins. In
reality, the builtins with the private address space are functionally
equivalent to the generic ones, so users won't notice.

For the sake of code clarity, although the 'generic' keyword is
technically reserved, we know that clang defines it to be the
corresponding unqualified generic address space, so we use that to be
explicit. We always compile with clang so it shouldn't be a problem with
portability.

With this we can also enable a LIT test for HIP, which was previously
failing as it couldn't find the generic address space overloads of fract
and lgamma_r.

There are other builtins that this treatment (may) need applied to, such
as the vload and vstore variants. Those will be handled in a subsequent
patch.
  • Loading branch information
frasercrmck committed Mar 19, 2024
1 parent feb80ba commit b6068f0
Show file tree
Hide file tree
Showing 16 changed files with 103 additions and 20 deletions.
15 changes: 13 additions & 2 deletions libclc/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -400,6 +400,7 @@ foreach( t ${LIBCLC_TARGETS_TO_BUILD} )
endif()
message( " DEVICE: ${d} ( ${${d}_aliases} )" )

set ( supports_generic_addrspace TRUE )
if ( ${ARCH} STREQUAL "spirv" OR ${ARCH} STREQUAL "spirv64" )
if( ${ARCH} STREQUAL "spirv" )
set( t "spir--" )
Expand All @@ -416,6 +417,14 @@ foreach( t ${LIBCLC_TARGETS_TO_BUILD} )
elseif( ${ARCH} STREQUAL "nvptx" OR ${ARCH} STREQUAL "nvptx64" )
set( build_flags )
set( opt_flags -O3 "--nvvm-reflect-enable=false" )
# Note: when declaring builtins, we don't consider NVIDIA as supporting
# the generic address space. This is because it maps to the same target
# address space as the private address space, resulting in a mangling
# clash.
# Since we can't declare builtins overloaded on both address spaces
# simultaneously, we choose declare the builtins using the private space,
# which will also work for the generic address space.
set( supports_generic_addrspace FALSE )
elseif( ${ARCH} STREQUAL "clspv64" )
set( t "spir64--" )
set( build_flags "-Wno-unknown-assumption")
Expand All @@ -437,8 +446,10 @@ foreach( t ${LIBCLC_TARGETS_TO_BUILD} )
"+cl_khr_fp16,"
"+__opencl_c_3d_image_writes,"
"+__opencl_c_images,"
"+cl_khr_3d_image_writes,"
"+__opencl_c_generic_address_space")
"+cl_khr_3d_image_writes")
if(supports_generic_addrspace)
string( APPEND CL_3_0_EXTENSIONS ",+__opencl_c_generic_address_space" )
endif()
list( APPEND flags ${CL_3_0_EXTENSIONS})

# Add platform specific flags
Expand Down
11 changes: 6 additions & 5 deletions libclc/generic/lib/math/fract.inc
Original file line number Diff line number Diff line change
Expand Up @@ -31,18 +31,19 @@
#define ZERO 0.0h
#endif

_CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE fract(__CLC_GENTYPE x, private __CLC_GENTYPE *iptr) {
return __spirv_ocl_fract(x, iptr);
}


#define FRACT_DEF(addrspace) \
_CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE fract(__CLC_GENTYPE x, addrspace __CLC_GENTYPE *iptr) { \
return __spirv_ocl_fract(x, iptr); \
}

FRACT_DEF(private);
FRACT_DEF(local);
FRACT_DEF(global);
#if __OPENCL_C_VERSION__ == CL_VERSION_2_0 || \
(__OPENCL_C_VERSION__ >= CL_VERSION_3_0 && \
defined(__opencl_c_generic_address_space))
FRACT_DEF(generic);
#endif

#undef MIN_CONSTANT
#undef ZERO
9 changes: 9 additions & 0 deletions libclc/generic/lib/math/frexp.cl
Original file line number Diff line number Diff line change
Expand Up @@ -15,3 +15,12 @@
#define __CLC_ADDRESS_SPACE local
#include <clc/math/gentype.inc>
#undef __CLC_ADDRESS_SPACE

#if __OPENCL_C_VERSION__ == CL_VERSION_2_0 || \
(__OPENCL_C_VERSION__ >= CL_VERSION_3_0 && \
defined(__opencl_c_generic_address_space))
#define __CLC_BODY <frexp.inc>
#define __CLC_ADDRESS_SPACE generic
#include <clc/math/gentype.inc>
#undef __CLC_ADDRESS_SPACE
#endif
9 changes: 7 additions & 2 deletions libclc/generic/lib/math/modf.inc
Original file line number Diff line number Diff line change
Expand Up @@ -28,20 +28,25 @@
#define ZERO 0.0h
#endif

_CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE modf(__CLC_GENTYPE x, __CLC_GENTYPE *iptr) {
_CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE modf(__CLC_GENTYPE x, private __CLC_GENTYPE *iptr) {
*iptr = trunc(x);
return copysign(isinf(x) ? ZERO : x - *iptr, x);
}

#define MODF_DEF(addrspace) \
_CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE modf(__CLC_GENTYPE x, addrspace __CLC_GENTYPE *iptr) { \
__CLC_GENTYPE private_iptr; \
private __CLC_GENTYPE private_iptr; \
__CLC_GENTYPE ret = modf(x, &private_iptr); \
*iptr = private_iptr; \
return ret; \
}

MODF_DEF(local);
MODF_DEF(global);
#if __OPENCL_C_VERSION__ == CL_VERSION_2_0 || \
(__OPENCL_C_VERSION__ >= CL_VERSION_3_0 && \
defined(__opencl_c_generic_address_space))
MODF_DEF(generic);
#endif

#undef ZERO
9 changes: 9 additions & 0 deletions libclc/generic/lib/math/remquo.cl
Original file line number Diff line number Diff line change
Expand Up @@ -15,3 +15,12 @@
#define __CLC_ADDRESS_SPACE private
#include <clc/math/gentype.inc>
#undef __CLC_ADDRESS_SPACE

#if __OPENCL_C_VERSION__ == CL_VERSION_2_0 || \
(__OPENCL_C_VERSION__ >= CL_VERSION_3_0 && \
defined(__opencl_c_generic_address_space))
#define __CLC_BODY <remquo.inc>
#define __CLC_ADDRESS_SPACE generic
#include <clc/math/gentype.inc>
#undef __CLC_ADDRESS_SPACE
#endif
6 changes: 3 additions & 3 deletions libclc/generic/lib/math/remquo.inc
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
// TODO: Enable half precision when the sw routine is implemented
#if __CLC_FPSIZE > 16
_CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE remquo(__CLC_GENTYPE x, __CLC_GENTYPE y, __CLC_ADDRESS_SPACE __CLC_INTN *q) {
__CLC_INTN local_q;
__CLC_GENTYPE ret = __clc_remquo(x, y, &local_q);
*q = local_q;
private __CLC_INTN private_q;
__CLC_GENTYPE ret = __clc_remquo(x, y, &private_q);
*q = private_q;
return ret;
}
#endif
5 changes: 5 additions & 0 deletions libclc/generic/lib/math/sincos.inc
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,11 @@
__CLC_DECLARE_SINCOS(global, __CLC_GENTYPE)
__CLC_DECLARE_SINCOS(local, __CLC_GENTYPE)
__CLC_DECLARE_SINCOS(private, __CLC_GENTYPE)
#if __OPENCL_C_VERSION__ == CL_VERSION_2_0 || \
(__OPENCL_C_VERSION__ >= CL_VERSION_3_0 && \
defined(__opencl_c_generic_address_space))
__CLC_DECLARE_SINCOS(generic, __CLC_GENTYPE)
#endif

#undef __CLC_DECLARE_SINCOS
#endif
7 changes: 6 additions & 1 deletion libclc/generic/libspirv/math/fract.inc
Original file line number Diff line number Diff line change
Expand Up @@ -38,14 +38,19 @@ __spirv_ocl_fract(__CLC_GENTYPE x, private __CLC_GENTYPE *iptr) {
#define FRACT_DEF(addrspace) \
_CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE __spirv_ocl_fract( \
__CLC_GENTYPE x, addrspace __CLC_GENTYPE *iptr) { \
__CLC_GENTYPE private_iptr; \
private __CLC_GENTYPE private_iptr; \
__CLC_GENTYPE ret = __spirv_ocl_fract(x, &private_iptr); \
*iptr = private_iptr; \
return ret; \
}

FRACT_DEF(local);
FRACT_DEF(global);
#if __OPENCL_C_VERSION__ == CL_VERSION_2_0 || \
(__OPENCL_C_VERSION__ >= CL_VERSION_3_0 && \
defined(__opencl_c_generic_address_space))
FRACT_DEF(generic);
#endif

#undef MIN_CONSTANT
#undef ZERO
Expand Down
9 changes: 9 additions & 0 deletions libclc/generic/libspirv/math/frexp.cl
Original file line number Diff line number Diff line change
Expand Up @@ -23,3 +23,12 @@
#define __CLC_ADDRESS_SPACE local
#include <clc/math/gentype.inc>
#undef __CLC_ADDRESS_SPACE

#if __OPENCL_C_VERSION__ == CL_VERSION_2_0 || \
(__OPENCL_C_VERSION__ >= CL_VERSION_3_0 && \
defined(__opencl_c_generic_address_space))
#define __CLC_BODY <frexp.inc>
#define __CLC_ADDRESS_SPACE generic
#include <clc/math/gentype.inc>
#undef __CLC_ADDRESS_SPACE
#endif
9 changes: 9 additions & 0 deletions libclc/generic/libspirv/math/lgamma_r.cl
Original file line number Diff line number Diff line change
Expand Up @@ -658,3 +658,12 @@ _CLC_V_V_VP_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, half, __spirv_ocl_lgamma_r, half,
#define __CLC_BODY <lgamma_r.inc>
#include <clc/math/gentype.inc>
#undef __CLC_ADDRSPACE

#if __OPENCL_C_VERSION__ == CL_VERSION_2_0 || \
(__OPENCL_C_VERSION__ >= CL_VERSION_3_0 && \
defined(__opencl_c_generic_address_space))
#define __CLC_ADDRSPACE generic
#define __CLC_BODY <lgamma_r.inc>
#include <clc/math/gentype.inc>
#undef __CLC_ADDRSPACE
#endif
2 changes: 1 addition & 1 deletion libclc/generic/libspirv/math/lgamma_r.inc
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@

_CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE
__spirv_ocl_lgamma_r(__CLC_GENTYPE x, __CLC_ADDRSPACE __CLC_INTN *iptr) {
__CLC_INTN private_iptr;
private __CLC_INTN private_iptr;
__CLC_GENTYPE ret = __spirv_ocl_lgamma_r(x, &private_iptr);
*iptr = private_iptr;
return ret;
Expand Down
10 changes: 8 additions & 2 deletions libclc/generic/libspirv/math/modf.inc
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@
#endif

_CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE __spirv_ocl_modf(__CLC_GENTYPE x,
__CLC_GENTYPE *iptr) {
private __CLC_GENTYPE *iptr) {
*iptr = __spirv_ocl_trunc(x);
return __spirv_ocl_copysign(
__CLC_CONVERT_NATN(__spirv_IsInf(x)) ? ZERO : x - *iptr, x);
Expand All @@ -32,7 +32,7 @@ _CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE __spirv_ocl_modf(__CLC_GENTYPE x,
#define MODF_DEF(addrspace) \
_CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE __spirv_ocl_modf( \
__CLC_GENTYPE x, addrspace __CLC_GENTYPE *iptr) { \
__CLC_GENTYPE private_iptr; \
private __CLC_GENTYPE private_iptr; \
__CLC_GENTYPE ret = __spirv_ocl_modf(x, &private_iptr); \
*iptr = private_iptr; \
return ret; \
Expand All @@ -41,5 +41,11 @@ _CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE __spirv_ocl_modf(__CLC_GENTYPE x,
MODF_DEF(local);
MODF_DEF(global);

#if __OPENCL_C_VERSION__ == CL_VERSION_2_0 || \
(__OPENCL_C_VERSION__ >= CL_VERSION_3_0 && \
defined(__opencl_c_generic_address_space))
MODF_DEF(generic);
#endif

#undef __CLC_CONVERT_NATN
#undef ZERO
9 changes: 9 additions & 0 deletions libclc/generic/libspirv/math/remquo.cl
Original file line number Diff line number Diff line change
Expand Up @@ -24,3 +24,12 @@
#define __CLC_ADDRESS_SPACE private
#include <clc/math/gentype.inc>
#undef __CLC_ADDRESS_SPACE

#if __OPENCL_C_VERSION__ == CL_VERSION_2_0 || \
(__OPENCL_C_VERSION__ >= CL_VERSION_3_0 && \
defined(__opencl_c_generic_address_space))
#define __CLC_BODY <remquo.inc>
#define __CLC_ADDRESS_SPACE generic
#include <clc/math/gentype.inc>
#undef __CLC_ADDRESS_SPACE
#endif
6 changes: 3 additions & 3 deletions libclc/generic/libspirv/math/remquo.inc
Original file line number Diff line number Diff line change
Expand Up @@ -8,8 +8,8 @@

_CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE __spirv_ocl_remquo(
__CLC_GENTYPE x, __CLC_GENTYPE y, __CLC_ADDRESS_SPACE __CLC_INTN *q) {
__CLC_INTN local_q;
__CLC_GENTYPE ret = __clc_remquo(x, y, &local_q);
*q = local_q;
private __CLC_INTN private_q;
__CLC_GENTYPE ret = __clc_remquo(x, y, &private_q);
*q = private_q;
return ret;
}
6 changes: 6 additions & 0 deletions libclc/generic/libspirv/math/sincos.inc
Original file line number Diff line number Diff line change
Expand Up @@ -16,4 +16,10 @@ __CLC_DECLARE_SINCOS(global, __CLC_GENTYPE)
__CLC_DECLARE_SINCOS(local, __CLC_GENTYPE)
__CLC_DECLARE_SINCOS(private, __CLC_GENTYPE)

#if __OPENCL_C_VERSION__ == CL_VERSION_2_0 || \
(__OPENCL_C_VERSION__ >= CL_VERSION_3_0 && \
defined(__opencl_c_generic_address_space))
__CLC_DECLARE_SINCOS(generic, __CLC_GENTYPE)
#endif

#undef __CLC_DECLARE_SINCOS
1 change: 0 additions & 1 deletion sycl/test-e2e/USM/math.cpp
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
// UNSUPPORTED: hip
// RUN: %{build} -o %t.out
// RUN: %{run} %t.out

Expand Down

0 comments on commit b6068f0

Please sign in to comment.