Skip to content

Commit

Permalink
[libclc][hip] Fix half shuffles and reenable reduction test (#13016)
Browse files Browse the repository at this point in the history
- Fix broken half shuffles on amd.
- Reenable Reduction test.

Fix is to bitcast to the storage type of half (unsigned short) without
doing a type conversion, before then extending to int for the shuffle.

---------

Signed-off-by: JackAKirk <[email protected]>
  • Loading branch information
JackAKirk authored Mar 19, 2024
1 parent feb80ba commit b13a3c4
Show file tree
Hide file tree
Showing 2 changed files with 30 additions and 7 deletions.
34 changes: 30 additions & 4 deletions libclc/amdgcn-amdhsa/libspirv/misc/sub_group_shuffle.cl
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,13 @@ __AMDGCN_CLC_SUBGROUP_SUB_I32(unsigned short, t);

#ifdef cl_khr_fp16
#pragma OPENCL EXTENSION cl_khr_fp16 : enable
__AMDGCN_CLC_SUBGROUP_SUB_I32(half, DF16_);
_CLC_DEF half _Z28__spirv_SubgroupShuffleINTELIDF16_ET_S0_j(
half Data, unsigned int InvocationId) {
unsigned short tmp = as_ushort(Data);
tmp = (unsigned short)_Z28__spirv_SubgroupShuffleINTELIiET_S0_j((int)tmp,
InvocationId);
return as_half(tmp);
}
#endif // cl_khr_fp16

#undef __AMDGCN_CLC_SUBGROUP_SUB_I32
Expand Down Expand Up @@ -170,7 +176,13 @@ __AMDGCN_CLC_SUBGROUP_XOR_SUB_I32(unsigned char, h);
__AMDGCN_CLC_SUBGROUP_XOR_SUB_I32(short, s);
__AMDGCN_CLC_SUBGROUP_XOR_SUB_I32(unsigned short, t);
#ifdef cl_khr_fp16
__AMDGCN_CLC_SUBGROUP_XOR_SUB_I32(half, DF16_);
_CLC_DEF half _Z31__spirv_SubgroupShuffleXorINTELIDF16_ET_S0_j(
half Data, unsigned int InvocationId) {
unsigned short tmp = as_ushort(Data);
tmp = (unsigned short)_Z31__spirv_SubgroupShuffleXorINTELIiET_S0_j(
(int)tmp, InvocationId);
return as_half(tmp);
}
#endif // cl_khr_fp16
#undef __AMDGCN_CLC_SUBGROUP_XOR_SUB_I32

Expand Down Expand Up @@ -321,7 +333,14 @@ __AMDGCN_CLC_SUBGROUP_UP_SUB_I32(short, s);
__AMDGCN_CLC_SUBGROUP_UP_SUB_I32(unsigned short, t);
// half
#ifdef cl_khr_fp16
__AMDGCN_CLC_SUBGROUP_UP_SUB_I32(half, DF16_);
_CLC_DEF half _Z30__spirv_SubgroupShuffleUpINTELIDF16_ET_S0_S0_j(
half previous, half current, unsigned int delta) {
unsigned short tmpP = as_ushort(previous);
unsigned short tmpC = as_ushort(current);
tmpC = (unsigned short)_Z30__spirv_SubgroupShuffleUpINTELIiET_S0_S0_j(
(int)tmpP, (int)tmpC, delta);
return as_half(tmpC);
}
#endif // cl_khr_fp16
#undef __AMDGCN_CLC_SUBGROUP_UP_SUB_I32

Expand Down Expand Up @@ -474,7 +493,14 @@ __AMDGCN_CLC_SUBGROUP_DOWN_TO_I32(short, s);
__AMDGCN_CLC_SUBGROUP_DOWN_TO_I32(unsigned short, t);
// half
#ifdef cl_khr_fp16
__AMDGCN_CLC_SUBGROUP_DOWN_TO_I32(half, DF16_);
_CLC_DEF half _Z32__spirv_SubgroupShuffleDownINTELIDF16_ET_S0_S0_j(
half current, half next, unsigned int delta) {
unsigned short tmpC = as_ushort(current);
unsigned short tmpN = as_ushort(next);
tmpC = (unsigned short)_Z32__spirv_SubgroupShuffleDownINTELIiET_S0_S0_j(
(int)tmpC, (int)tmpN, delta);
return as_half(tmpC);
}
#endif // cl_khr_fp16
#undef __AMDGCN_CLC_SUBGROUP_DOWN_TO_I32

Expand Down
3 changes: 0 additions & 3 deletions sycl/test-e2e/Reduction/reduction_nd_ext_half.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -7,9 +7,6 @@
// work group size not bigger than 1`.
// XFAIL: hip_nvidia

// Incorrect result on AMD.
// XFAIL: hip_amd

// Windows doesn't yet have full shutdown().
// UNSUPPORTED: ze_debug && windows

Expand Down

0 comments on commit b13a3c4

Please sign in to comment.