From 878368911369131daab0247f19906aeb2fd2370c Mon Sep 17 00:00:00 2001 From: Hamza Khallouki Date: Mon, 6 Jan 2025 10:12:50 +0000 Subject: [PATCH] [AIE] Add addressing intrinsics using dims_2d_t/dims_3d_t --- clang/lib/Headers/aie2p_addr.h | 26 ++++ clang/lib/Headers/aiev2_addr.h | 21 ++++ clang/test/CodeGen/aie/aie-addr-intrinsic.cpp | 114 +++++++++++++++++- 3 files changed, 160 insertions(+), 1 deletion(-) diff --git a/clang/lib/Headers/aie2p_addr.h b/clang/lib/Headers/aie2p_addr.h index e1c25f502e0a..d95f86e2ee8b 100644 --- a/clang/lib/Headers/aie2p_addr.h +++ b/clang/lib/Headers/aie2p_addr.h @@ -33,6 +33,8 @@ struct dims_2d_t { : num1(size1), inc1(inc1), inc2(inc2), count1(0) {}; dims_2d_t(unsigned int size1, int inc1, int inc2, addr_t count1) : num1(size1), inc1(inc1), inc2(inc2), count1(count1) {}; + // Default constructor + dims_2d_t() : num1(0), inc1(0), inc2(0), count1(0) {}; }; INTRINSIC(dims_2d_t) @@ -114,6 +116,9 @@ struct dims_3d_t { int inc3, addr_t count1, addr_t count2) : num1(size1), inc1(inc1), num2(size2), inc2(inc2), inc3(inc3), count1(count1), count2(count2) {}; + // Default constructor + dims_3d_t() + : num1(0), inc1(0), num2(0), inc2(0), inc3(0), count1(0), count2(0) {}; }; INTRINSIC(dims_3d_t) @@ -131,6 +136,27 @@ dims_3d_from_steps(unsigned int size1, int step1, unsigned int size2, int step2, count1, count2); }; +template INTRINSIC(T *) add_2d_byte(T *a, dims_2d_t ¶ms) { + unsigned int size1 = params.num1; + int off = params.inc1; + int inc1 = params.inc2; + addr_t &count1 = params.count1; + return (T *)__builtin_aie2p_add_2d((void *)a, off, inc1, size1, count1); +} + +template INTRINSIC(T *) add_3d_byte(T *a, dims_3d_t ¶ms) { + unsigned int size1 = params.num1; + int off = params.inc1; + unsigned int size2 = params.num2; + ; + int inc1 = params.inc2; + int inc2 = params.inc3; + addr_t &count1 = params.count1; + addr_t &count2 = params.count2; + return (T *)__builtin_aie2p_add_3d((void *)a, off, inc1, inc2, size1, count1, + size2, count2); +} + INTRINSIC(v8int32) load_4x16_lo(v8int32 addr) { return __builtin_aie2p_load_4x16_lo(addr); } diff --git a/clang/lib/Headers/aiev2_addr.h b/clang/lib/Headers/aiev2_addr.h index d54cafda11df..09640e5f6fca 100644 --- a/clang/lib/Headers/aiev2_addr.h +++ b/clang/lib/Headers/aiev2_addr.h @@ -264,4 +264,25 @@ load_lut_2x_float(const void *lut1, const void *lut2, v16uint32 offset, (v32bfloat16)insert(v2, 1, (v16bfloat16)read_lut64_3(lut1, lut2, offset)); } +template INTRINSIC(T *) add_2d_byte(T *a, dims_2d_t ¶ms) { + unsigned int size1 = params.num1; + int off = params.inc1; + int inc1 = params.inc2; + addr_t &count1 = params.count1; + return (T *)__builtin_aiev2_add_2d((void *)a, off, inc1, size1, count1); +} + +template INTRINSIC(T *) add_3d_byte(T *a, dims_3d_t ¶ms) { + unsigned int size1 = params.num1; + int off = params.inc1; + unsigned int size2 = params.num2; + ; + int inc1 = params.inc2; + int inc2 = params.inc3; + addr_t &count1 = params.count1; + addr_t &count2 = params.count2; + return (T *)__builtin_aiev2_add_3d((void *)a, off, inc1, inc2, size1, count1, + size2, count2); +} + #endif /*__AIEV2_ADDR_H__*/ diff --git a/clang/test/CodeGen/aie/aie-addr-intrinsic.cpp b/clang/test/CodeGen/aie/aie-addr-intrinsic.cpp index 8a4e8d1cf397..9c31eaed1c38 100644 --- a/clang/test/CodeGen/aie/aie-addr-intrinsic.cpp +++ b/clang/test/CodeGen/aie/aie-addr-intrinsic.cpp @@ -23,7 +23,6 @@ dims_2d_t test_dims_2d_t (unsigned int size1, int inc1, int inc2) { return dims_2d_t(size1,inc1, inc2); } -// // CHECK-COMMON-LABEL: @_Z15test2_dims_2d_tjiii( // CHECK-COMMON-NEXT: entry: // CHECK-COMMON-NEXT: [[DOTFCA_0_INSERT:%.*]] = insertvalue [[STRUCT_DIMS_2D_T:%.*]] poison, i32 [[SIZE1:%.*]], 0 @@ -225,6 +224,119 @@ const v16int32* test_add_2d_byte(const v16int32* a, int off, int size1, addr_t& return add_2d_byte(a,off,size1,count1,inc1); } +// AIE2-LABEL: @_Z16test_add_2d_bytePDv16_iR9dims_2d_t( +// AIE2-NEXT: entry: +// AIE2-NEXT: [[TMP0:%.*]] = load i32, ptr [[PARAMS:%.*]], align 4, !tbaa [[TBAA14:![0-9]+]] +// AIE2-NEXT: [[INC1_I:%.*]] = getelementptr inbounds i8, ptr [[PARAMS]], i20 4 +// AIE2-NEXT: [[TMP1:%.*]] = load i32, ptr [[INC1_I]], align 4, !tbaa [[TBAA16:![0-9]+]] +// AIE2-NEXT: [[INC2_I:%.*]] = getelementptr inbounds i8, ptr [[PARAMS]], i20 8 +// AIE2-NEXT: [[TMP2:%.*]] = load i32, ptr [[INC2_I]], align 4, !tbaa [[TBAA17:![0-9]+]] +// AIE2-NEXT: [[COUNT12_I:%.*]] = getelementptr inbounds i8, ptr [[PARAMS]], i20 12 +// AIE2-NEXT: [[TMP3:%.*]] = trunc i32 [[TMP1]] to i20 +// AIE2-NEXT: [[TMP4:%.*]] = trunc i32 [[TMP2]] to i20 +// AIE2-NEXT: [[TMP5:%.*]] = trunc i32 [[TMP0]] to i20 +// AIE2-NEXT: [[TMP6:%.*]] = load i32, ptr [[COUNT12_I]], align 4, !tbaa [[TBAA13]] +// AIE2-NEXT: [[TMP7:%.*]] = trunc i32 [[TMP6]] to i20 +// AIE2-NEXT: [[TMP8:%.*]] = tail call { ptr, i20 } @llvm.aie2.add.2d(ptr [[A:%.*]], i20 [[TMP3]], i20 [[TMP4]], i20 [[TMP5]], i20 [[TMP7]]) +// AIE2-NEXT: [[TMP9:%.*]] = extractvalue { ptr, i20 } [[TMP8]], 1 +// AIE2-NEXT: [[TMP10:%.*]] = zext i20 [[TMP9]] to i32 +// AIE2-NEXT: store i32 [[TMP10]], ptr [[COUNT12_I]], align 4 +// AIE2-NEXT: [[TMP11:%.*]] = extractvalue { ptr, i20 } [[TMP8]], 0 +// AIE2-NEXT: ret ptr [[TMP11]] +// +// AIE2P-LABEL: @_Z16test_add_2d_bytePDv16_iR9dims_2d_t( +// AIE2P-NEXT: entry: +// AIE2P-NEXT: [[TMP0:%.*]] = load i32, ptr [[PARAMS:%.*]], align 4, !tbaa [[TBAA14:![0-9]+]] +// AIE2P-NEXT: [[INC1_I:%.*]] = getelementptr inbounds i8, ptr [[PARAMS]], i20 4 +// AIE2P-NEXT: [[TMP1:%.*]] = load i32, ptr [[INC1_I]], align 4, !tbaa [[TBAA16:![0-9]+]] +// AIE2P-NEXT: [[INC2_I:%.*]] = getelementptr inbounds i8, ptr [[PARAMS]], i20 8 +// AIE2P-NEXT: [[TMP2:%.*]] = load i32, ptr [[INC2_I]], align 4, !tbaa [[TBAA17:![0-9]+]] +// AIE2P-NEXT: [[COUNT12_I:%.*]] = getelementptr inbounds i8, ptr [[PARAMS]], i20 12 +// AIE2P-NEXT: [[TMP3:%.*]] = trunc i32 [[TMP1]] to i20 +// AIE2P-NEXT: [[TMP4:%.*]] = trunc i32 [[TMP2]] to i20 +// AIE2P-NEXT: [[TMP5:%.*]] = trunc i32 [[TMP0]] to i20 +// AIE2P-NEXT: [[TMP6:%.*]] = load i32, ptr [[COUNT12_I]], align 4, !tbaa [[TBAA13]] +// AIE2P-NEXT: [[TMP7:%.*]] = trunc i32 [[TMP6]] to i20 +// AIE2P-NEXT: [[TMP8:%.*]] = tail call { ptr, i20 } @llvm.aie2p.add.2d(ptr [[A:%.*]], i20 [[TMP3]], i20 [[TMP4]], i20 [[TMP5]], i20 [[TMP7]]) +// AIE2P-NEXT: [[TMP9:%.*]] = extractvalue { ptr, i20 } [[TMP8]], 1 +// AIE2P-NEXT: [[TMP10:%.*]] = zext i20 [[TMP9]] to i32 +// AIE2P-NEXT: store i32 [[TMP10]], ptr [[COUNT12_I]], align 4 +// AIE2P-NEXT: [[TMP11:%.*]] = extractvalue { ptr, i20 } [[TMP8]], 0 +// AIE2P-NEXT: ret ptr [[TMP11]] +// +v16int32* test_add_2d_byte(v16int32* a, dims_2d_t ¶ms){ + return add_2d_byte(a,params); +} + +// AIE2-LABEL: @_Z16test_add_3d_bytePDv16_iR9dims_3d_t( +// AIE2-NEXT: entry: +// AIE2-NEXT: [[TMP0:%.*]] = load i32, ptr [[PARAMS:%.*]], align 4, !tbaa [[TBAA2]] +// AIE2-NEXT: [[INC1_I:%.*]] = getelementptr inbounds i8, ptr [[PARAMS]], i20 4 +// AIE2-NEXT: [[TMP1:%.*]] = load i32, ptr [[INC1_I]], align 4, !tbaa [[TBAA7]] +// AIE2-NEXT: [[NUM2_I:%.*]] = getelementptr inbounds i8, ptr [[PARAMS]], i20 8 +// AIE2-NEXT: [[TMP2:%.*]] = load i32, ptr [[NUM2_I]], align 4, !tbaa [[TBAA8]] +// AIE2-NEXT: [[INC2_I:%.*]] = getelementptr inbounds i8, ptr [[PARAMS]], i20 12 +// AIE2-NEXT: [[TMP3:%.*]] = load i32, ptr [[INC2_I]], align 4, !tbaa [[TBAA9]] +// AIE2-NEXT: [[INC3_I:%.*]] = getelementptr inbounds i8, ptr [[PARAMS]], i20 16 +// AIE2-NEXT: [[TMP4:%.*]] = load i32, ptr [[INC3_I]], align 4, !tbaa [[TBAA10]] +// AIE2-NEXT: [[COUNT13_I:%.*]] = getelementptr inbounds i8, ptr [[PARAMS]], i20 20 +// AIE2-NEXT: [[COUNT24_I:%.*]] = getelementptr inbounds i8, ptr [[PARAMS]], i20 24 +// AIE2-NEXT: [[TMP5:%.*]] = trunc i32 [[TMP1]] to i20 +// AIE2-NEXT: [[TMP6:%.*]] = trunc i32 [[TMP3]] to i20 +// AIE2-NEXT: [[TMP7:%.*]] = trunc i32 [[TMP4]] to i20 +// AIE2-NEXT: [[TMP8:%.*]] = trunc i32 [[TMP0]] to i20 +// AIE2-NEXT: [[TMP9:%.*]] = load i32, ptr [[COUNT13_I]], align 4, !tbaa [[TBAA13]] +// AIE2-NEXT: [[TMP10:%.*]] = trunc i32 [[TMP9]] to i20 +// AIE2-NEXT: [[TMP11:%.*]] = trunc i32 [[TMP2]] to i20 +// AIE2-NEXT: [[TMP12:%.*]] = load i32, ptr [[COUNT24_I]], align 4, !tbaa [[TBAA13]] +// AIE2-NEXT: [[TMP13:%.*]] = trunc i32 [[TMP12]] to i20 +// AIE2-NEXT: [[TMP14:%.*]] = tail call { ptr, i20, i20 } @llvm.aie2.add.3d(ptr [[A:%.*]], i20 [[TMP5]], i20 [[TMP6]], i20 [[TMP7]], i20 [[TMP8]], i20 [[TMP10]], i20 [[TMP11]], i20 [[TMP13]]) +// AIE2-NEXT: [[TMP15:%.*]] = extractvalue { ptr, i20, i20 } [[TMP14]], 1 +// AIE2-NEXT: [[TMP16:%.*]] = zext i20 [[TMP15]] to i32 +// AIE2-NEXT: [[TMP17:%.*]] = extractvalue { ptr, i20, i20 } [[TMP14]], 2 +// AIE2-NEXT: [[TMP18:%.*]] = zext i20 [[TMP17]] to i32 +// AIE2-NEXT: store i32 [[TMP16]], ptr [[COUNT13_I]], align 4 +// AIE2-NEXT: store i32 [[TMP18]], ptr [[COUNT24_I]], align 4 +// AIE2-NEXT: [[TMP19:%.*]] = extractvalue { ptr, i20, i20 } [[TMP14]], 0 +// AIE2-NEXT: ret ptr [[TMP19]] +// +// AIE2P-LABEL: @_Z16test_add_3d_bytePDv16_iR9dims_3d_t( +// AIE2P-NEXT: entry: +// AIE2P-NEXT: [[TMP0:%.*]] = load i32, ptr [[PARAMS:%.*]], align 4, !tbaa [[TBAA2]] +// AIE2P-NEXT: [[INC1_I:%.*]] = getelementptr inbounds i8, ptr [[PARAMS]], i20 4 +// AIE2P-NEXT: [[TMP1:%.*]] = load i32, ptr [[INC1_I]], align 4, !tbaa [[TBAA7]] +// AIE2P-NEXT: [[NUM2_I:%.*]] = getelementptr inbounds i8, ptr [[PARAMS]], i20 8 +// AIE2P-NEXT: [[TMP2:%.*]] = load i32, ptr [[NUM2_I]], align 4, !tbaa [[TBAA8]] +// AIE2P-NEXT: [[INC2_I:%.*]] = getelementptr inbounds i8, ptr [[PARAMS]], i20 12 +// AIE2P-NEXT: [[TMP3:%.*]] = load i32, ptr [[INC2_I]], align 4, !tbaa [[TBAA9]] +// AIE2P-NEXT: [[INC3_I:%.*]] = getelementptr inbounds i8, ptr [[PARAMS]], i20 16 +// AIE2P-NEXT: [[TMP4:%.*]] = load i32, ptr [[INC3_I]], align 4, !tbaa [[TBAA10]] +// AIE2P-NEXT: [[COUNT13_I:%.*]] = getelementptr inbounds i8, ptr [[PARAMS]], i20 20 +// AIE2P-NEXT: [[COUNT24_I:%.*]] = getelementptr inbounds i8, ptr [[PARAMS]], i20 24 +// AIE2P-NEXT: [[TMP5:%.*]] = trunc i32 [[TMP1]] to i20 +// AIE2P-NEXT: [[TMP6:%.*]] = trunc i32 [[TMP3]] to i20 +// AIE2P-NEXT: [[TMP7:%.*]] = trunc i32 [[TMP4]] to i20 +// AIE2P-NEXT: [[TMP8:%.*]] = trunc i32 [[TMP0]] to i20 +// AIE2P-NEXT: [[TMP9:%.*]] = load i32, ptr [[COUNT13_I]], align 4, !tbaa [[TBAA13]] +// AIE2P-NEXT: [[TMP10:%.*]] = trunc i32 [[TMP9]] to i20 +// AIE2P-NEXT: [[TMP11:%.*]] = trunc i32 [[TMP2]] to i20 +// AIE2P-NEXT: [[TMP12:%.*]] = load i32, ptr [[COUNT24_I]], align 4, !tbaa [[TBAA13]] +// AIE2P-NEXT: [[TMP13:%.*]] = trunc i32 [[TMP12]] to i20 +// AIE2P-NEXT: [[TMP14:%.*]] = tail call { ptr, i20, i20 } @llvm.aie2p.add.3d(ptr [[A:%.*]], i20 [[TMP5]], i20 [[TMP6]], i20 [[TMP7]], i20 [[TMP8]], i20 [[TMP10]], i20 [[TMP11]], i20 [[TMP13]]) +// AIE2P-NEXT: [[TMP15:%.*]] = extractvalue { ptr, i20, i20 } [[TMP14]], 1 +// AIE2P-NEXT: [[TMP16:%.*]] = zext i20 [[TMP15]] to i32 +// AIE2P-NEXT: [[TMP17:%.*]] = extractvalue { ptr, i20, i20 } [[TMP14]], 2 +// AIE2P-NEXT: [[TMP18:%.*]] = zext i20 [[TMP17]] to i32 +// AIE2P-NEXT: store i32 [[TMP16]], ptr [[COUNT13_I]], align 4 +// AIE2P-NEXT: store i32 [[TMP18]], ptr [[COUNT24_I]], align 4 +// AIE2P-NEXT: [[TMP19:%.*]] = extractvalue { ptr, i20, i20 } [[TMP14]], 0 +// AIE2P-NEXT: ret ptr [[TMP19]] +// +v16int32* test_add_3d_byte(v16int32* a, dims_3d_t ¶ms){ + return add_3d_byte(a,params); +} + + // AIE2-LABEL: @_Z15test_add_3d_ptrPDv16_iiiRiiiS1_i( // AIE2-NEXT: entry: // AIE2-NEXT: [[MUL_I:%.*]] = shl i32 [[OFF:%.*]], 6