[AIE2] Replace bfloat16 class with typedef __b16.

Xilinx · Jun 12, 2024 · d854d13 · d854d13
1 parent abae161
commit d854d13
Show file tree

Hide file tree

Showing 5 changed files with 196 additions and 472 deletions.
diff --git a/clang/lib/Headers/aiebase_typedefs.h b/clang/lib/Headers/aiebase_typedefs.h
@@ -36,19 +36,11 @@ struct cint16 {
 };
 #endif
 
-#if defined(__AIENGINE__) && !defined(__PTHREAD_API__)
-#define COMPLEX_TYPE(type)                                                     \
-  typedef struct {                                                             \
-    type chess_storage(% (sizeof(type) * 2)) real;                             \
-    type imag;                                                                 \
-  } c##type
-#else
 #define COMPLEX_TYPE(type)                                                     \
   typedef struct {                                                             \
     type real;                                                                 \
     type imag;                                                                 \
   } c##type
-#endif
 
 COMPLEX_TYPE(int8_t);
 COMPLEX_TYPE(int32_t);
@@ -220,69 +212,8 @@ typedef int32_t v2int32 __attribute__((__vector_size__(8)));
 
 #if __AIEARCH__ == 20
 typedef int32_t addr_t;
-#ifdef __cplusplus
 /* bfloat16 type */
-class bfloat16 {
-  __bf16 m0;
-
-public:
-  bfloat16() = default;
-  constexpr inline bfloat16(float a0);
-
-  inline operator float() const {
-    const uint16_t I16 = __builtin_bit_cast(const uint16_t, m0);
-    uint32_t I32 = int32_t(I16) << 16;
-    return __builtin_bit_cast(float, I32);
-  }
-  inline operator __bf16() const { return m0; }
-  inline explicit operator int() const { return __builtin_bfloat16_to_int(m0); }
-  inline explicit operator char() const {
-    return (char)(int)*this;
-  }
-  inline explicit operator signed char() const {
-    return (signed char)(int)*this;
-  }
-  inline explicit operator signed short() const {
-    return (signed short)(int)*this;
-  }
-  inline explicit operator unsigned() const { return (unsigned)(int)*this; }
-  inline explicit operator unsigned char() const {
-    return (unsigned char)(unsigned)*this;
-  }
-  inline explicit operator unsigned short() const {
-    return (unsigned short)(unsigned)*this;
-  }
-};
-/* arithmetic operation with bfloat16 operands */
-#define BFLOAT16_OPS(type)                                                     \
-  /* Implicit cast from __bf16 to bfloat16 */                                  \
-  inline bfloat16 operator/(bfloat16 a, type b) {                              \
-    return (float(a) / float(b));                                              \
-  }                                                                            \
-  inline bfloat16 operator*(bfloat16 a, type b) {                              \
-    return (float(a) * float(b));                                              \
-  }                                                                            \
-  inline bfloat16 operator+(bfloat16 a, type b) {                              \
-    return (float(a) + float(b));                                              \
-  }                                                                            \
-  inline bfloat16 operator-(bfloat16 a, type b) {                              \
-    return (float(a) - float(b));                                              \
-  }
-
-BFLOAT16_OPS(bfloat16)
-BFLOAT16_OPS(float)
-BFLOAT16_OPS(int)
-#undef BFLOAT16_OPS
-
-inline bfloat16 operator-(bfloat16 a) { return bfloat16(0) - a; }
-inline bfloat16 operator+(bfloat16 a) { return bfloat16(0) + a; }
-
-/* compare operation with bfloat16 operands */
-inline bool operator>(bfloat16 a, bfloat16 b) { return ((float)b < (float)a); }
-inline bool operator<=(bfloat16 a, bfloat16 b) { return !((float)a > (float)b); }
-inline bool operator!=(bfloat16 a, bfloat16 b) { return !((float)a == (float)b); }
-inline bool operator==(bfloat16 a, bfloat16 b) { return (float)a == (float)b; }
-#endif
+typedef __bf16 bfloat16;
 /* 8-bit types */
 typedef buint8_t v2uint4;
 typedef bint8_t v2int4;

diff --git a/clang/lib/Headers/aiev2intrin.h b/clang/lib/Headers/aiev2intrin.h
@@ -44,27 +44,6 @@
 #include "aiev2_core.h"
 #include "aiev2_vld_sparse.h"
 // clang-format on
-// float_to_bfloat16
-constexpr inline bfloat16::bfloat16(float a0) {
-  uint32_t I32 = __builtin_bit_cast(unsigned int, a0);
-
-  // This is a temporary implementation to make 'normal' constants
-  // work. Denormalized input and values rounding up near the extrema of
-  // the range might give strange effects
-
-  const bool Rnd = (I32 & 0x8000) != 0;
-  const bool Sticky = (I32 & 0x7FFF) != 0;
-  const bool Lsb = (I32 & 0x10000) != 0;
-  // The tie case (Rnd & !Sticky) should make the result even
-  if (Rnd && (Sticky || Lsb)) {
-    // FIXME: carry into exp kind of works out, carry into sign does not.
-    I32 += 0x10000;
-  }
-
-  const uint16_t IBF = I32 >> 16;
-  m0 = __builtin_bit_cast(__bf16, IBF);
-}
-
 #endif /* __cplusplus */
 
 // Locks

diff --git a/clang/test/CodeGen/aie/aie2/aie2-scl2vec-intrinsic.cpp b/clang/test/CodeGen/aie/aie2/aie2-scl2vec-intrinsic.cpp
@@ -556,9 +556,9 @@ v16float test_broadcast_float(float b) {
 
 // CHECK-LABEL: @_Z16test_shiftl_elemDv64_ai(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[SEXT:%.*]] = shl i32 [[S:%.*]], 24
-// CHECK-NEXT:    [[CONV_I1:%.*]] = ashr exact i32 [[SEXT]], 24
-// CHECK-NEXT:    [[TMP0:%.*]] = tail call noundef <64 x i8> @llvm.aie2.vbroadcast8.I512(i32 [[CONV_I1]])
+// CHECK-NEXT:    [[SEXT_I:%.*]] = shl i32 [[S:%.*]], 24
+// CHECK-NEXT:    [[CONV_I_I:%.*]] = ashr exact i32 [[SEXT_I]], 24
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call noundef <64 x i8> @llvm.aie2.vbroadcast8.I512(i32 [[CONV_I_I]])
 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast <64 x i8> [[V:%.*]] to <16 x i32>
 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast <64 x i8> [[TMP0]] to <16 x i32>
 // CHECK-NEXT:    [[TMP3:%.*]] = tail call <16 x i32> @llvm.aie2.vshift.I512.I512(<16 x i32> [[TMP1]], <16 x i32> [[TMP2]], i32 0, i32 1)
@@ -581,8 +581,8 @@ v16int32 test_shiftl_elem(v16int32 v, int s) {
 
 // CHECK-LABEL: @_Z16test_shiftl_elemDv32_tj(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[CONV_I1:%.*]] = and i32 [[S:%.*]], 65535
-// CHECK-NEXT:    [[TMP0:%.*]] = tail call noundef <32 x i16> @llvm.aie2.vbroadcast16.I512(i32 [[CONV_I1]])
+// CHECK-NEXT:    [[CONV_I_I:%.*]] = and i32 [[S:%.*]], 65535
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call noundef <32 x i16> @llvm.aie2.vbroadcast16.I512(i32 [[CONV_I_I]])
 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast <32 x i16> [[V:%.*]] to <16 x i32>
 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast <32 x i16> [[TMP0]] to <16 x i32>
 // CHECK-NEXT:    [[TMP3:%.*]] = tail call <16 x i32> @llvm.aie2.vshift.I512.I512(<16 x i32> [[TMP1]], <16 x i32> [[TMP2]], i32 0, i32 2)
@@ -595,8 +595,8 @@ v32uint16 test_shiftl_elem(v32uint16 v, unsigned int s) {
 
 // CHECK-LABEL: @_Z16test_shiftr_elemDv32_si(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[SEXT:%.*]] = shl i32 [[S:%.*]], 16
-// CHECK-NEXT:    [[CONV_I_I:%.*]] = ashr exact i32 [[SEXT]], 16
+// CHECK-NEXT:    [[SEXT_I:%.*]] = shl i32 [[S:%.*]], 16
+// CHECK-NEXT:    [[CONV_I_I:%.*]] = ashr exact i32 [[SEXT_I]], 16
 // CHECK-NEXT:    [[TMP0:%.*]] = tail call noundef <32 x i16> @llvm.aie2.vbroadcast16.I512(i32 [[CONV_I_I]])
 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast <32 x i16> [[TMP0]] to <16 x i32>
 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast <32 x i16> [[V:%.*]] to <16 x i32>
@@ -611,8 +611,8 @@ v32int16 test_shiftr_elem(v32int16 v, int s) {
 //
 // CHECK-LABEL: @_Z16test_shiftr_elemDv64_hj(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[CONV_I1:%.*]] = and i32 [[S:%.*]], 255
-// CHECK-NEXT:    [[TMP0:%.*]] = tail call noundef <64 x i8> @llvm.aie2.vbroadcast8.I512(i32 [[CONV_I1]])
+// CHECK-NEXT:    [[CONV_I_I:%.*]] = and i32 [[S:%.*]], 255
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call noundef <64 x i8> @llvm.aie2.vbroadcast8.I512(i32 [[CONV_I_I]])
 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast <64 x i8> [[TMP0]] to <16 x i32>
 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast <64 x i8> [[V:%.*]] to <16 x i32>
 // CHECK-NEXT:    [[TMP3:%.*]] = tail call <16 x i32> @llvm.aie2.vshift.I512.I512(<16 x i32> [[TMP1]], <16 x i32> [[TMP2]], i32 0, i32 63)
@@ -993,8 +993,8 @@ char test_ext_elem(v64int8 v, int idx) {
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast <64 x i8> [[V:%.*]] to <32 x i16>
 // CHECK-NEXT:    [[TMP1:%.*]] = tail call i32 @llvm.aie2.vextract.elem16.I512(<32 x i16> [[TMP0]], i32 [[IDX:%.*]], i32 1)
-// CHECK-NEXT:    [[CONV_I:%.*]] = trunc i32 [[TMP1]] to i16
-// CHECK-NEXT:    [[TMP2:%.*]] = bitcast i16 [[CONV_I]] to <2 x i8>
+// CHECK-NEXT:    [[CONV_I_I:%.*]] = trunc i32 [[TMP1]] to i16
+// CHECK-NEXT:    [[TMP2:%.*]] = bitcast i16 [[CONV_I_I]] to <2 x i8>
 // CHECK-NEXT:    ret <2 x i8> [[TMP2]]
 //
 v2int8 test_ext_v2int8(v64int8 v, int idx) {
@@ -1005,8 +1005,8 @@ v2int8 test_ext_v2int8(v64int8 v, int idx) {
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast <64 x i8> [[V:%.*]] to <32 x i16>
 // CHECK-NEXT:    [[TMP1:%.*]] = tail call i32 @llvm.aie2.vextract.elem16.I512(<32 x i16> [[TMP0]], i32 2, i32 1)
-// CHECK-NEXT:    [[CONV_I:%.*]] = trunc i32 [[TMP1]] to i16
-// CHECK-NEXT:    [[TMP2:%.*]] = bitcast i16 [[CONV_I]] to <2 x i8>
+// CHECK-NEXT:    [[CONV_I_I:%.*]] = trunc i32 [[TMP1]] to i16
+// CHECK-NEXT:    [[TMP2:%.*]] = bitcast i16 [[CONV_I_I]] to <2 x i8>
 // CHECK-NEXT:    ret <2 x i8> [[TMP2]]
 //
 v2int8 test_ext_v2int8_idx2(v64int8 v) {
@@ -1191,10 +1191,9 @@ v32bfloat16 test_shiftx(v32bfloat16 a, v32bfloat16 b, int step, int shift) {
     return shiftx(a, b, step, shift);
 }
 
-// CHECK-LABEL: @_Z11test_insertDv32_u6__bf16i8bfloat16(
+// CHECK-LABEL: @_Z11test_insertDv32_u6__bf16iu6__bf16(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[B_COERCE_FCA_0_EXTRACT_I:%.*]] = extractvalue [[CLASS_BFLOAT16:%.*]] [[B_COERCE:%.*]], 0
-// CHECK-NEXT:    [[TMP0:%.*]] = tail call noundef <32 x bfloat> @llvm.aie2.vinsert16.bf512(<32 x bfloat> [[V:%.*]], i32 [[IDX:%.*]], bfloat [[B_COERCE_FCA_0_EXTRACT_I]])
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call noundef <32 x bfloat> @llvm.aie2.vinsert16.bf512(<32 x bfloat> [[V:%.*]], i32 [[IDX:%.*]], bfloat [[B:%.*]])
 // CHECK-NEXT:    ret <32 x bfloat> [[TMP0]]
 //
 v32bfloat16 test_insert(v32bfloat16 v, int idx, bfloat16 b) {
@@ -1229,10 +1228,9 @@ v32bfloat16 test_insert(v32bfloat16 v, int idx, unsigned long long b) {
   return insert(v, idx,(v4bfloat16)b);
 }
 
-// CHECK-LABEL: @_Z29test_broadcast_to_v32bfloat168bfloat16(
+// CHECK-LABEL: @_Z29test_broadcast_to_v32bfloat16u6__bf16(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[B_COERCE_FCA_0_EXTRACT_I:%.*]] = extractvalue [[CLASS_BFLOAT16:%.*]] [[B_COERCE:%.*]], 0
-// CHECK-NEXT:    [[TMP0:%.*]] = tail call noundef <32 x bfloat> @llvm.aie2.vbroadcast16.bf512(bfloat [[B_COERCE_FCA_0_EXTRACT_I]])
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call noundef <32 x bfloat> @llvm.aie2.vbroadcast16.bf512(bfloat [[B:%.*]])
 // CHECK-NEXT:    ret <32 x bfloat> [[TMP0]]
 //
 v32bfloat16 test_broadcast_to_v32bfloat16 (bfloat16 b) { return broadcast_to_v32bfloat16(b); }
@@ -1251,10 +1249,9 @@ v32bfloat16 test_broadcast_to_v32bfloat16 (v2bfloat16 b) { return broadcast_to_v
 //
 v32bfloat16 test_broadcast_to_v32bfloat16 (v4bfloat16 b) { return broadcast_to_v32bfloat16(b); }
 
-// CHECK-LABEL: @_Z21test_shuffle_bfloat168bfloat16j(
+// CHECK-LABEL: @_Z21test_shuffle_bfloat16u6__bf16j(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[B_COERCE_FCA_0_EXTRACT_I:%.*]] = extractvalue [[CLASS_BFLOAT16:%.*]] [[B_COERCE:%.*]], 0
-// CHECK-NEXT:    [[TMP0:%.*]] = tail call noundef <32 x bfloat> @llvm.aie2.vbcst.shuffle.bf16(bfloat [[B_COERCE_FCA_0_EXTRACT_I]], i32 [[M:%.*]])
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call noundef <32 x bfloat> @llvm.aie2.vbcst.shuffle.bf16(bfloat [[B:%.*]], i32 [[M:%.*]])
 // CHECK-NEXT:    ret <32 x bfloat> [[TMP0]]
 //
 v32bfloat16 test_shuffle_bfloat16(bfloat16 b, unsigned int m) {  return shuffle_bfloat16(b,m) ;}
@@ -1264,10 +1261,9 @@ v32bfloat16 test_shuffle_bfloat16(bfloat16 b, unsigned int m) {  return shuffle_
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast <32 x bfloat> [[V:%.*]] to <32 x i16>
 // CHECK-NEXT:    [[TMP1:%.*]] = tail call i32 @llvm.aie2.vextract.elem16.I512(<32 x i16> [[TMP0]], i32 [[IDX:%.*]], i32 [[SIGN:%.*]])
-// CHECK-NEXT:    [[ELEM_0_EXTRACT_TRUNC_I:%.*]] = trunc i32 [[TMP1]] to i16
-// CHECK-NEXT:    [[TMP2:%.*]] = bitcast i16 [[ELEM_0_EXTRACT_TRUNC_I]] to bfloat
-// CHECK-NEXT:    [[DOTFCA_0_INSERT_I:%.*]] = insertvalue [[CLASS_BFLOAT16:%.*]] poison, bfloat [[TMP2]], 0
-// CHECK-NEXT:    ret [[CLASS_BFLOAT16]] [[DOTFCA_0_INSERT_I]]
+// CHECK-NEXT:    [[ELEM_SROA_0_0_EXTRACT_TRUNC_I:%.*]] = trunc i32 [[TMP1]] to i16
+// CHECK-NEXT:    [[TMP2:%.*]] = bitcast i16 [[ELEM_SROA_0_0_EXTRACT_TRUNC_I]] to bfloat
+// CHECK-NEXT:    ret bfloat [[TMP2]]
 //
 bfloat16 test_ext_elem(v32bfloat16 v, int idx, int sign) {
   return ext_elem(v, idx, sign);