diff --git a/llvm/lib/Target/AIE/AIE2InstructionSelector.cpp b/llvm/lib/Target/AIE/AIE2InstructionSelector.cpp index babb0e87903b..953e4c46d380 100644 --- a/llvm/lib/Target/AIE/AIE2InstructionSelector.cpp +++ b/llvm/lib/Target/AIE/AIE2InstructionSelector.cpp @@ -140,6 +140,8 @@ class AIE2InstructionSelector : public InstructionSelector { bool selectAddrInsn(MachineInstr &I, MachineRegisterInfo &MRI); bool selectCascadeStreamInsn(MachineInstr &I, MachineRegisterInfo &MRI, bool isWrite); + bool selectG_AIE_ADD_VECTOR_ELT_LEFT(MachineInstr &I, + MachineRegisterInfo &MRI); bool selectG_CONCAT_VECTORS(MachineInstr &I, MachineRegisterInfo &MRI); bool selectG_BRCOND(MachineInstr &I, MachineRegisterInfo &MRI); bool selectG_BRINDIRECT(MachineInstr &I, MachineRegisterInfo &MRI); @@ -737,6 +739,8 @@ bool AIE2InstructionSelector::select(MachineInstr &I) { return selectG_STORE(I, MRI); case G_UNMERGE_VALUES: return selectG_UNMERGE_VALUES(I, MRI); + case AIE2::G_AIE_ADD_VECTOR_ELT_LEFT: + return selectG_AIE_ADD_VECTOR_ELT_LEFT(I, MRI); case G_CONCAT_VECTORS: return selectG_CONCAT_VECTORS(I, MRI); case AIE2::G_AIE_OFFSET_STORE: @@ -800,6 +804,44 @@ bool AIE2InstructionSelector::selectStartLoop(MachineInstr &I, return constrainSelectedInstRegOperands(*ADDI, TII, TRI, RBI); } +bool AIE2InstructionSelector::selectG_AIE_ADD_VECTOR_ELT_LEFT( + MachineInstr &I, MachineRegisterInfo &MRI) { + const Register Dst = I.getOperand(0).getReg(); + const Register Src = I.getOperand(1).getReg(); + const Register Value = I.getOperand(2).getReg(); + const LLT VecEltDstTy = MRI.getType(Dst).getElementType(); + const TypeSize VecEltDstTySize = VecEltDstTy.getSizeInBits(); + + // We assume that we always receive a vector operand and that the vector types + // are always true. As of 03/24, this may not be true due to vint64s being + // used for accumulators instead. + unsigned Opcode; + switch (VecEltDstTySize) { + case 8: + Opcode = AIE2::VPUSH_LO_8; + break; + case 16: + Opcode = AIE2::VPUSH_LO_16; + break; + case 32: + Opcode = AIE2::VPUSH_LO_32; + break; + case 64: + llvm_unreachable( + "Unexpected accumulator vector in selection of G_AIE_ADD_VECTOR_LEFT"); + default: + llvm_unreachable( + "Unexpected vector size in selection of G_AIE_ADD_VECTOR_ELT_LEFT"); + } + + // This is the opposite order from the ISA which expects vector, value. This + // is choice made in TD which takes it in this opposite order. + MachineInstr &MI = *MIB.buildInstr(Opcode, {Dst}, {Value, Src}); + I.eraseFromParent(); + + return constrainSelectedInstRegOperands(MI, TII, TRI, RBI); +} + // WIP: Implement this as a tablegen pattern instead, it is very similar to the // definition used for instrinsics. bool AIE2InstructionSelector::selectG_CONCAT_VECTORS(MachineInstr &I, diff --git a/llvm/test/CodeGen/AIE/aie2/GlobalISel/inst-select-aie-add-vector-elt-left.mir b/llvm/test/CodeGen/AIE/aie2/GlobalISel/inst-select-aie-add-vector-elt-left.mir new file mode 100644 index 000000000000..3f4c5b916c8a --- /dev/null +++ b/llvm/test/CodeGen/AIE/aie2/GlobalISel/inst-select-aie-add-vector-elt-left.mir @@ -0,0 +1,92 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# +# This file is licensed under the Apache License v2.0 with LLVM Exceptions. +# See https://llvm.org/LICENSE.txt for license information. +# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +# +# (c) Copyright 2023-2024 Advanced Micro Devices, Inc. or its affiliates +# +# RUN: llc -mtriple aie2 -run-pass=instruction-select %s -verify-machineinstrs -o - | FileCheck %s + +--- +name: vpush_int32_512 +alignment: 16 +legalized: true +regBankSelected: true +tracksRegLiveness: true +stack: + - { id: 0, name: "", size: 64, alignment: 32 } +body: | + bb.0.entry: + ; CHECK-LABEL: name: vpush_int32_512 + ; CHECK: [[MOV_RLC_imm10_pseudo:%[0-9]+]]:er = MOV_RLC_imm10_pseudo 12 + ; CHECK-NEXT: [[DEF:%[0-9]+]]:vec512 = IMPLICIT_DEF + ; CHECK-NEXT: [[VPUSH_LO_32_:%[0-9]+]]:vec512 = VPUSH_LO_32 [[MOV_RLC_imm10_pseudo]], [[DEF]] + ; CHECK-NEXT: PseudoRET implicit $lr, implicit [[VPUSH_LO_32_]] + %0:gprregbank(s32) = G_CONSTANT i32 12 + %1:vregbank(<16 x s32>) = G_IMPLICIT_DEF + %2:vregbank(<16 x s32>) = G_AIE_ADD_VECTOR_ELT_LEFT %1, %0(s32) + PseudoRET implicit $lr, implicit %2 +... + + +--- +name: vpush_int16_512 +legalized: true +regBankSelected: true +stack: + - { id: 0, name: "", size: 64, alignment: 32} +body: | + bb.0.entry: + ; CHECK-LABEL: name: vpush_int16_512 + ; CHECK: [[MOV_RLC_imm10_pseudo:%[0-9]+]]:er = MOV_RLC_imm10_pseudo 12 + ; CHECK-NEXT: [[DEF:%[0-9]+]]:vec512 = IMPLICIT_DEF + ; CHECK-NEXT: [[VPUSH_LO_16_:%[0-9]+]]:vec512 = VPUSH_LO_16 [[MOV_RLC_imm10_pseudo]], [[DEF]] + ; CHECK-NEXT: PseudoRET implicit $lr, implicit [[VPUSH_LO_16_]] + %0:gprregbank(s32) = G_CONSTANT i32 12 + %1:vregbank(<32 x s16>) = G_IMPLICIT_DEF + %2:vregbank(<32 x s16>) = G_AIE_ADD_VECTOR_ELT_LEFT %1, %0(s32) + PseudoRET implicit $lr, implicit %2 +... + +--- +name: vpush_int8_512 +legalized: true +regBankSelected: true +tracksRegLiveness: true +stack: + - { id: 0, name: "", size: 64, alignment: 32} +body: | + bb.0.entry: + ; CHECK-LABEL: name: vpush_int8_512 + ; CHECK: [[MOV_RLC_imm10_pseudo:%[0-9]+]]:er = MOV_RLC_imm10_pseudo 12 + ; CHECK-NEXT: [[DEF:%[0-9]+]]:vec512 = IMPLICIT_DEF + ; CHECK-NEXT: [[VPUSH_LO_8_:%[0-9]+]]:vec512 = VPUSH_LO_8 [[MOV_RLC_imm10_pseudo]], [[DEF]] + ; CHECK-NEXT: PseudoRET implicit $lr, implicit [[VPUSH_LO_8_]] + %0:gprregbank(s32) = G_CONSTANT i32 12 + %1:vregbank(<64 x s8>) = G_IMPLICIT_DEF + %2:vregbank(<64 x s8>) = G_AIE_ADD_VECTOR_ELT_LEFT %1, %0(s32) + PseudoRET implicit $lr, implicit %2 +... + +--- +name: vpush_int32_256 +legalized: true +regBankSelected: true +tracksRegLiveness: true +stack: + - { id: 0, name: "", size: 32, alignment: 32 } +body: | + bb.0.entry: + ; CHECK-LABEL: name: vpush_int32_256 + ; CHECK: [[MOV_RLC_imm10_pseudo:%[0-9]+]]:er = MOV_RLC_imm10_pseudo 0 + ; CHECK-NEXT: [[DEF:%[0-9]+]]:vec512 = IMPLICIT_DEF + ; CHECK-NEXT: [[VPUSH_LO_32_:%[0-9]+]]:vec512 = VPUSH_LO_32 [[MOV_RLC_imm10_pseudo]], [[DEF]] + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vec256 = COPY [[VPUSH_LO_32_]].sub_256_lo + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vec256 = COPY [[VPUSH_LO_32_]].sub_256_hi + ; CHECK-NEXT: PseudoRET implicit $lr, implicit [[COPY]] + %0:gprregbank(s32) = G_CONSTANT i32 0 + %1:vregbank(<16 x s32>) = G_IMPLICIT_DEF + %2:vregbank(<16 x s32>) = G_AIE_ADD_VECTOR_ELT_LEFT %1, %0(s32) + %3:vregbank(<8 x s32>), %4:vregbank(<8 x s32>) = G_UNMERGE_VALUES %2(<16 x s32>) + PseudoRET implicit $lr, implicit %3