Skip to content

Commit

Permalink
[AIE2] Replace bfloat16 class with typedef __b16.
Browse files Browse the repository at this point in the history
  • Loading branch information
SagarMaheshwari99 committed Jun 12, 2024
1 parent abae161 commit d854d13
Show file tree
Hide file tree
Showing 5 changed files with 196 additions and 472 deletions.
71 changes: 1 addition & 70 deletions clang/lib/Headers/aiebase_typedefs.h
Original file line number Diff line number Diff line change
Expand Up @@ -36,19 +36,11 @@ struct cint16 {
};
#endif

#if defined(__AIENGINE__) && !defined(__PTHREAD_API__)
#define COMPLEX_TYPE(type) \
typedef struct { \
type chess_storage(% (sizeof(type) * 2)) real; \
type imag; \
} c##type
#else
#define COMPLEX_TYPE(type) \
typedef struct { \
type real; \
type imag; \
} c##type
#endif

COMPLEX_TYPE(int8_t);
COMPLEX_TYPE(int32_t);
Expand Down Expand Up @@ -220,69 +212,8 @@ typedef int32_t v2int32 __attribute__((__vector_size__(8)));

#if __AIEARCH__ == 20
typedef int32_t addr_t;
#ifdef __cplusplus
/* bfloat16 type */
class bfloat16 {
__bf16 m0;

public:
bfloat16() = default;
constexpr inline bfloat16(float a0);

inline operator float() const {
const uint16_t I16 = __builtin_bit_cast(const uint16_t, m0);
uint32_t I32 = int32_t(I16) << 16;
return __builtin_bit_cast(float, I32);
}
inline operator __bf16() const { return m0; }
inline explicit operator int() const { return __builtin_bfloat16_to_int(m0); }
inline explicit operator char() const {
return (char)(int)*this;
}
inline explicit operator signed char() const {
return (signed char)(int)*this;
}
inline explicit operator signed short() const {
return (signed short)(int)*this;
}
inline explicit operator unsigned() const { return (unsigned)(int)*this; }
inline explicit operator unsigned char() const {
return (unsigned char)(unsigned)*this;
}
inline explicit operator unsigned short() const {
return (unsigned short)(unsigned)*this;
}
};
/* arithmetic operation with bfloat16 operands */
#define BFLOAT16_OPS(type) \
/* Implicit cast from __bf16 to bfloat16 */ \
inline bfloat16 operator/(bfloat16 a, type b) { \
return (float(a) / float(b)); \
} \
inline bfloat16 operator*(bfloat16 a, type b) { \
return (float(a) * float(b)); \
} \
inline bfloat16 operator+(bfloat16 a, type b) { \
return (float(a) + float(b)); \
} \
inline bfloat16 operator-(bfloat16 a, type b) { \
return (float(a) - float(b)); \
}

BFLOAT16_OPS(bfloat16)
BFLOAT16_OPS(float)
BFLOAT16_OPS(int)
#undef BFLOAT16_OPS

inline bfloat16 operator-(bfloat16 a) { return bfloat16(0) - a; }
inline bfloat16 operator+(bfloat16 a) { return bfloat16(0) + a; }

/* compare operation with bfloat16 operands */
inline bool operator>(bfloat16 a, bfloat16 b) { return ((float)b < (float)a); }
inline bool operator<=(bfloat16 a, bfloat16 b) { return !((float)a > (float)b); }
inline bool operator!=(bfloat16 a, bfloat16 b) { return !((float)a == (float)b); }
inline bool operator==(bfloat16 a, bfloat16 b) { return (float)a == (float)b; }
#endif
typedef __bf16 bfloat16;
/* 8-bit types */
typedef buint8_t v2uint4;
typedef bint8_t v2int4;
Expand Down
21 changes: 0 additions & 21 deletions clang/lib/Headers/aiev2intrin.h
Original file line number Diff line number Diff line change
Expand Up @@ -44,27 +44,6 @@
#include "aiev2_core.h"
#include "aiev2_vld_sparse.h"
// clang-format on
// float_to_bfloat16
constexpr inline bfloat16::bfloat16(float a0) {
uint32_t I32 = __builtin_bit_cast(unsigned int, a0);

// This is a temporary implementation to make 'normal' constants
// work. Denormalized input and values rounding up near the extrema of
// the range might give strange effects

const bool Rnd = (I32 & 0x8000) != 0;
const bool Sticky = (I32 & 0x7FFF) != 0;
const bool Lsb = (I32 & 0x10000) != 0;
// The tie case (Rnd & !Sticky) should make the result even
if (Rnd && (Sticky || Lsb)) {
// FIXME: carry into exp kind of works out, carry into sign does not.
I32 += 0x10000;
}

const uint16_t IBF = I32 >> 16;
m0 = __builtin_bit_cast(__bf16, IBF);
}

#endif /* __cplusplus */

// Locks
Expand Down
48 changes: 22 additions & 26 deletions clang/test/CodeGen/aie/aie2/aie2-scl2vec-intrinsic.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -556,9 +556,9 @@ v16float test_broadcast_float(float b) {

// CHECK-LABEL: @_Z16test_shiftl_elemDv64_ai(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[SEXT:%.*]] = shl i32 [[S:%.*]], 24
// CHECK-NEXT: [[CONV_I1:%.*]] = ashr exact i32 [[SEXT]], 24
// CHECK-NEXT: [[TMP0:%.*]] = tail call noundef <64 x i8> @llvm.aie2.vbroadcast8.I512(i32 [[CONV_I1]])
// CHECK-NEXT: [[SEXT_I:%.*]] = shl i32 [[S:%.*]], 24
// CHECK-NEXT: [[CONV_I_I:%.*]] = ashr exact i32 [[SEXT_I]], 24
// CHECK-NEXT: [[TMP0:%.*]] = tail call noundef <64 x i8> @llvm.aie2.vbroadcast8.I512(i32 [[CONV_I_I]])
// CHECK-NEXT: [[TMP1:%.*]] = bitcast <64 x i8> [[V:%.*]] to <16 x i32>
// CHECK-NEXT: [[TMP2:%.*]] = bitcast <64 x i8> [[TMP0]] to <16 x i32>
// CHECK-NEXT: [[TMP3:%.*]] = tail call <16 x i32> @llvm.aie2.vshift.I512.I512(<16 x i32> [[TMP1]], <16 x i32> [[TMP2]], i32 0, i32 1)
Expand All @@ -581,8 +581,8 @@ v16int32 test_shiftl_elem(v16int32 v, int s) {

// CHECK-LABEL: @_Z16test_shiftl_elemDv32_tj(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[CONV_I1:%.*]] = and i32 [[S:%.*]], 65535
// CHECK-NEXT: [[TMP0:%.*]] = tail call noundef <32 x i16> @llvm.aie2.vbroadcast16.I512(i32 [[CONV_I1]])
// CHECK-NEXT: [[CONV_I_I:%.*]] = and i32 [[S:%.*]], 65535
// CHECK-NEXT: [[TMP0:%.*]] = tail call noundef <32 x i16> @llvm.aie2.vbroadcast16.I512(i32 [[CONV_I_I]])
// CHECK-NEXT: [[TMP1:%.*]] = bitcast <32 x i16> [[V:%.*]] to <16 x i32>
// CHECK-NEXT: [[TMP2:%.*]] = bitcast <32 x i16> [[TMP0]] to <16 x i32>
// CHECK-NEXT: [[TMP3:%.*]] = tail call <16 x i32> @llvm.aie2.vshift.I512.I512(<16 x i32> [[TMP1]], <16 x i32> [[TMP2]], i32 0, i32 2)
Expand All @@ -595,8 +595,8 @@ v32uint16 test_shiftl_elem(v32uint16 v, unsigned int s) {

// CHECK-LABEL: @_Z16test_shiftr_elemDv32_si(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[SEXT:%.*]] = shl i32 [[S:%.*]], 16
// CHECK-NEXT: [[CONV_I_I:%.*]] = ashr exact i32 [[SEXT]], 16
// CHECK-NEXT: [[SEXT_I:%.*]] = shl i32 [[S:%.*]], 16
// CHECK-NEXT: [[CONV_I_I:%.*]] = ashr exact i32 [[SEXT_I]], 16
// CHECK-NEXT: [[TMP0:%.*]] = tail call noundef <32 x i16> @llvm.aie2.vbroadcast16.I512(i32 [[CONV_I_I]])
// CHECK-NEXT: [[TMP1:%.*]] = bitcast <32 x i16> [[TMP0]] to <16 x i32>
// CHECK-NEXT: [[TMP2:%.*]] = bitcast <32 x i16> [[V:%.*]] to <16 x i32>
Expand All @@ -611,8 +611,8 @@ v32int16 test_shiftr_elem(v32int16 v, int s) {
//
// CHECK-LABEL: @_Z16test_shiftr_elemDv64_hj(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[CONV_I1:%.*]] = and i32 [[S:%.*]], 255
// CHECK-NEXT: [[TMP0:%.*]] = tail call noundef <64 x i8> @llvm.aie2.vbroadcast8.I512(i32 [[CONV_I1]])
// CHECK-NEXT: [[CONV_I_I:%.*]] = and i32 [[S:%.*]], 255
// CHECK-NEXT: [[TMP0:%.*]] = tail call noundef <64 x i8> @llvm.aie2.vbroadcast8.I512(i32 [[CONV_I_I]])
// CHECK-NEXT: [[TMP1:%.*]] = bitcast <64 x i8> [[TMP0]] to <16 x i32>
// CHECK-NEXT: [[TMP2:%.*]] = bitcast <64 x i8> [[V:%.*]] to <16 x i32>
// CHECK-NEXT: [[TMP3:%.*]] = tail call <16 x i32> @llvm.aie2.vshift.I512.I512(<16 x i32> [[TMP1]], <16 x i32> [[TMP2]], i32 0, i32 63)
Expand Down Expand Up @@ -993,8 +993,8 @@ char test_ext_elem(v64int8 v, int idx) {
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = bitcast <64 x i8> [[V:%.*]] to <32 x i16>
// CHECK-NEXT: [[TMP1:%.*]] = tail call i32 @llvm.aie2.vextract.elem16.I512(<32 x i16> [[TMP0]], i32 [[IDX:%.*]], i32 1)
// CHECK-NEXT: [[CONV_I:%.*]] = trunc i32 [[TMP1]] to i16
// CHECK-NEXT: [[TMP2:%.*]] = bitcast i16 [[CONV_I]] to <2 x i8>
// CHECK-NEXT: [[CONV_I_I:%.*]] = trunc i32 [[TMP1]] to i16
// CHECK-NEXT: [[TMP2:%.*]] = bitcast i16 [[CONV_I_I]] to <2 x i8>
// CHECK-NEXT: ret <2 x i8> [[TMP2]]
//
v2int8 test_ext_v2int8(v64int8 v, int idx) {
Expand All @@ -1005,8 +1005,8 @@ v2int8 test_ext_v2int8(v64int8 v, int idx) {
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = bitcast <64 x i8> [[V:%.*]] to <32 x i16>
// CHECK-NEXT: [[TMP1:%.*]] = tail call i32 @llvm.aie2.vextract.elem16.I512(<32 x i16> [[TMP0]], i32 2, i32 1)
// CHECK-NEXT: [[CONV_I:%.*]] = trunc i32 [[TMP1]] to i16
// CHECK-NEXT: [[TMP2:%.*]] = bitcast i16 [[CONV_I]] to <2 x i8>
// CHECK-NEXT: [[CONV_I_I:%.*]] = trunc i32 [[TMP1]] to i16
// CHECK-NEXT: [[TMP2:%.*]] = bitcast i16 [[CONV_I_I]] to <2 x i8>
// CHECK-NEXT: ret <2 x i8> [[TMP2]]
//
v2int8 test_ext_v2int8_idx2(v64int8 v) {
Expand Down Expand Up @@ -1191,10 +1191,9 @@ v32bfloat16 test_shiftx(v32bfloat16 a, v32bfloat16 b, int step, int shift) {
return shiftx(a, b, step, shift);
}

// CHECK-LABEL: @_Z11test_insertDv32_u6__bf16i8bfloat16(
// CHECK-LABEL: @_Z11test_insertDv32_u6__bf16iu6__bf16(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[B_COERCE_FCA_0_EXTRACT_I:%.*]] = extractvalue [[CLASS_BFLOAT16:%.*]] [[B_COERCE:%.*]], 0
// CHECK-NEXT: [[TMP0:%.*]] = tail call noundef <32 x bfloat> @llvm.aie2.vinsert16.bf512(<32 x bfloat> [[V:%.*]], i32 [[IDX:%.*]], bfloat [[B_COERCE_FCA_0_EXTRACT_I]])
// CHECK-NEXT: [[TMP0:%.*]] = tail call noundef <32 x bfloat> @llvm.aie2.vinsert16.bf512(<32 x bfloat> [[V:%.*]], i32 [[IDX:%.*]], bfloat [[B:%.*]])
// CHECK-NEXT: ret <32 x bfloat> [[TMP0]]
//
v32bfloat16 test_insert(v32bfloat16 v, int idx, bfloat16 b) {
Expand Down Expand Up @@ -1229,10 +1228,9 @@ v32bfloat16 test_insert(v32bfloat16 v, int idx, unsigned long long b) {
return insert(v, idx,(v4bfloat16)b);
}

// CHECK-LABEL: @_Z29test_broadcast_to_v32bfloat168bfloat16(
// CHECK-LABEL: @_Z29test_broadcast_to_v32bfloat16u6__bf16(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[B_COERCE_FCA_0_EXTRACT_I:%.*]] = extractvalue [[CLASS_BFLOAT16:%.*]] [[B_COERCE:%.*]], 0
// CHECK-NEXT: [[TMP0:%.*]] = tail call noundef <32 x bfloat> @llvm.aie2.vbroadcast16.bf512(bfloat [[B_COERCE_FCA_0_EXTRACT_I]])
// CHECK-NEXT: [[TMP0:%.*]] = tail call noundef <32 x bfloat> @llvm.aie2.vbroadcast16.bf512(bfloat [[B:%.*]])
// CHECK-NEXT: ret <32 x bfloat> [[TMP0]]
//
v32bfloat16 test_broadcast_to_v32bfloat16 (bfloat16 b) { return broadcast_to_v32bfloat16(b); }
Expand All @@ -1251,10 +1249,9 @@ v32bfloat16 test_broadcast_to_v32bfloat16 (v2bfloat16 b) { return broadcast_to_v
//
v32bfloat16 test_broadcast_to_v32bfloat16 (v4bfloat16 b) { return broadcast_to_v32bfloat16(b); }

// CHECK-LABEL: @_Z21test_shuffle_bfloat168bfloat16j(
// CHECK-LABEL: @_Z21test_shuffle_bfloat16u6__bf16j(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[B_COERCE_FCA_0_EXTRACT_I:%.*]] = extractvalue [[CLASS_BFLOAT16:%.*]] [[B_COERCE:%.*]], 0
// CHECK-NEXT: [[TMP0:%.*]] = tail call noundef <32 x bfloat> @llvm.aie2.vbcst.shuffle.bf16(bfloat [[B_COERCE_FCA_0_EXTRACT_I]], i32 [[M:%.*]])
// CHECK-NEXT: [[TMP0:%.*]] = tail call noundef <32 x bfloat> @llvm.aie2.vbcst.shuffle.bf16(bfloat [[B:%.*]], i32 [[M:%.*]])
// CHECK-NEXT: ret <32 x bfloat> [[TMP0]]
//
v32bfloat16 test_shuffle_bfloat16(bfloat16 b, unsigned int m) { return shuffle_bfloat16(b,m) ;}
Expand All @@ -1264,10 +1261,9 @@ v32bfloat16 test_shuffle_bfloat16(bfloat16 b, unsigned int m) { return shuffle_
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = bitcast <32 x bfloat> [[V:%.*]] to <32 x i16>
// CHECK-NEXT: [[TMP1:%.*]] = tail call i32 @llvm.aie2.vextract.elem16.I512(<32 x i16> [[TMP0]], i32 [[IDX:%.*]], i32 [[SIGN:%.*]])
// CHECK-NEXT: [[ELEM_0_EXTRACT_TRUNC_I:%.*]] = trunc i32 [[TMP1]] to i16
// CHECK-NEXT: [[TMP2:%.*]] = bitcast i16 [[ELEM_0_EXTRACT_TRUNC_I]] to bfloat
// CHECK-NEXT: [[DOTFCA_0_INSERT_I:%.*]] = insertvalue [[CLASS_BFLOAT16:%.*]] poison, bfloat [[TMP2]], 0
// CHECK-NEXT: ret [[CLASS_BFLOAT16]] [[DOTFCA_0_INSERT_I]]
// CHECK-NEXT: [[ELEM_SROA_0_0_EXTRACT_TRUNC_I:%.*]] = trunc i32 [[TMP1]] to i16
// CHECK-NEXT: [[TMP2:%.*]] = bitcast i16 [[ELEM_SROA_0_0_EXTRACT_TRUNC_I]] to bfloat
// CHECK-NEXT: ret bfloat [[TMP2]]
//
bfloat16 test_ext_elem(v32bfloat16 v, int idx, int sign) {
return ext_elem(v, idx, sign);
Expand Down
Loading

0 comments on commit d854d13

Please sign in to comment.