Skip to content

Commit

Permalink
[AIE2P] Support fifo load with extra storage - InstructionSelect
Browse files Browse the repository at this point in the history
  • Loading branch information
khallouh committed Feb 4, 2025
1 parent b0bc1e6 commit ccef3a2
Show file tree
Hide file tree
Showing 4 changed files with 218 additions and 0 deletions.
67 changes: 67 additions & 0 deletions llvm/lib/Target/AIE/aie2p/AIE2PInstructionSelector.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -121,6 +121,8 @@ class AIE2PInstructionSelector : public AIEBaseInstructionSelector {
bool selectVLD_FIFO_POP_BFP16_1D(MachineInstr &I, MachineRegisterInfo &MRI);
bool selectVLD_FIFO_POP_BFP16_2D(MachineInstr &I, MachineRegisterInfo &MRI);
bool selectVLD_FIFO_POP_BFP16_3D(MachineInstr &I, MachineRegisterInfo &MRI);
bool selectVLD_FIFO_FILLX(MachineInstr &I, MachineRegisterInfo &MRI);
bool selectVLD_FIFO_POPX(MachineInstr &I, MachineRegisterInfo &MRI);
bool selectSetI128(MachineInstr &I, MachineOperand &DstReg,
MachineOperand &SrcReg, MachineRegisterInfo &MRI);
bool selectExtractI128(MachineInstr &I, Register DstReg, Register SrcReg,
Expand Down Expand Up @@ -395,6 +397,10 @@ bool AIE2PInstructionSelector::select(MachineInstr &I) {
return selectVST_FIFO(I, MRI);
case Intrinsic::aie2p_fifo_ld_fill:
return selectVLD_FIFO_FILL(I, MRI);
case Intrinsic::aie2p_fifo_ld_fillx:
return selectVLD_FIFO_FILLX(I, MRI);
case Intrinsic::aie2p_fifo_ld_popx:
return selectVLD_FIFO_POPX(I, MRI);
case Intrinsic::aie2p_fifo_ld_pop_unaligned:
return selectVLD_FIFO_POP_512(I, MRI);
case Intrinsic::aie2p_fifo_ld_pop_1d_unaligned:
Expand Down Expand Up @@ -2575,6 +2581,10 @@ unsigned getLoadFifoOpcode(MachineInstr &I) {
return AIE2P::VLDB_POP_544_3D;
case Intrinsic::aie2p_fifo_ld_pop_576_3d_bfp16:
return AIE2P::VLDB_POP_576_3D;
case Intrinsic::aie2p_fifo_ld_fillx:
return AIE2P::VLDB_FILLX_512;
case Intrinsic::aie2p_fifo_ld_popx:
return AIE2P::VLDB_POPX_512;
}
llvm_unreachable("unreachable: Failed to get sparse load opcode");
return AIE2P::INSTRUCTION_LIST_END;
Expand All @@ -2596,6 +2606,34 @@ bool AIE2PInstructionSelector::selectVLD_FIFO_FILL(MachineInstr &I,
return constrainSelectedInstRegOperands(*MI, TII, TRI, RBI);
}

bool AIE2PInstructionSelector::selectVLD_FIFO_FILLX(MachineInstr &I,
MachineRegisterInfo &MRI) {
unsigned IntrinsicID = cast<GIntrinsic>(I).getIntrinsicID();
assert(IntrinsicID == Intrinsic::aie2p_fifo_ld_fillx);
Register PtrOut = I.getOperand(0).getReg();
Register FifoOut = I.getOperand(1).getReg();
Register AvailOut = I.getOperand(2).getReg();
Register ExtraOut = I.getOperand(3).getReg();
Register PtrIn = I.getOperand(5).getReg();
Register FifoIn = I.getOperand(6).getReg();
Register AvailIn = I.getOperand(7).getReg();
Register ExtraIn = I.getOperand(8).getReg();
Register ConfIn = I.getOperand(9).getReg();

MIB.buildInstr(TargetOpcode::COPY, {AIE2P::lfe}, {}).addReg(ExtraIn);
MachineInstrBuilder MI =
MIB.buildInstr(getLoadFifoOpcode(I), {PtrOut, FifoOut, AvailOut},
{ConfIn, ConfIn, PtrIn, FifoIn, AvailIn});
auto CopyBackLfeMI =
MIB.buildInstr(TargetOpcode::COPY, {ExtraOut}, {}).addReg(AIE2P::lfe);

I.eraseFromParent();
return constrainSelectedInstRegOperands(*MI, TII, TRI, RBI) &&
constrainOperandRegClass(*MF, TRI, MRI, TII, RBI, *CopyBackLfeMI,
AIE2P::mExtraRegClass,
CopyBackLfeMI->getOperand(0));
}

bool AIE2PInstructionSelector::selectVLD_FIFO_POP_512(
MachineInstr &I, MachineRegisterInfo &MRI) {
unsigned IntrinsicID = cast<GIntrinsic>(I).getIntrinsicID();
Expand All @@ -2614,6 +2652,35 @@ bool AIE2PInstructionSelector::selectVLD_FIFO_POP_512(
return constrainSelectedInstRegOperands(*MI, TII, TRI, RBI);
}

bool AIE2PInstructionSelector::selectVLD_FIFO_POPX(MachineInstr &I,
MachineRegisterInfo &MRI) {
unsigned IntrinsicID = cast<GIntrinsic>(I).getIntrinsicID();
assert(IntrinsicID == Intrinsic::aie2p_fifo_ld_popx);
Register VecOut = I.getOperand(0).getReg();
Register PtrOut = I.getOperand(1).getReg();
Register FifoOut = I.getOperand(2).getReg();
Register AvailOut = I.getOperand(3).getReg();
Register ExtraOut = I.getOperand(4).getReg();
Register PtrIn = I.getOperand(6).getReg();
Register FifoIn = I.getOperand(7).getReg();
Register AvailIn = I.getOperand(8).getReg();
Register ExtraIn = I.getOperand(9).getReg();
Register ConfIn = I.getOperand(10).getReg();

MIB.buildInstr(TargetOpcode::COPY, {AIE2P::lfe}, {}).addReg(ExtraIn);
MachineInstrBuilder MI =
MIB.buildInstr(getLoadFifoOpcode(I), {VecOut, PtrOut, FifoOut, AvailOut},
{ConfIn, ConfIn, PtrIn, FifoIn, AvailIn});
auto CopyBackLfeMI =
MIB.buildInstr(TargetOpcode::COPY, {ExtraOut}, {}).addReg(AIE2P::lfe);

I.eraseFromParent();
return constrainSelectedInstRegOperands(*MI, TII, TRI, RBI) &&
constrainOperandRegClass(*MF, TRI, MRI, TII, RBI, *CopyBackLfeMI,
AIE2P::mExtraRegClass,
CopyBackLfeMI->getOperand(0));
}

bool AIE2PInstructionSelector::selectVLD_FIFO_POP_512_1D(
MachineInstr &I, MachineRegisterInfo &MRI) {
unsigned IntrinsicID = cast<GIntrinsic>(I).getIntrinsicID();
Expand Down
1 change: 1 addition & 0 deletions llvm/lib/Target/AIE/aie2p/AIE2PRegisterInfo.td
Original file line number Diff line number Diff line change
Expand Up @@ -302,6 +302,7 @@ class AIE2PDim3DRegisterClass <dag reglist, RegAltNameIndex idx = NoRegAltName>
def eLdFifoReg : AIE2PVector1024RegisterClass<(add lf0, lf1)>;

def mFifoHLReg : AIE2PVector512RegisterClass<(add sfh, sfl, lfh0, lfh1, lfl0, lfl1, lfe)>;
def mExtra : AIE2PVector512RegisterClass<(add lfe)>;
def mStFifo : AIE2PVector1024RegisterClass<(add sf)>;
def mStFifoh : AIE2PVector512RegisterClass<(add sfh)>;
def mStFifol : AIE2PVector512RegisterClass<(add sfl)>;
Expand Down
56 changes: 56 additions & 0 deletions llvm/test/CodeGen/AIE/aie2p/GlobalIsel/inst-select-fifo-loads.mir
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,62 @@ body: |
PseudoRET implicit $lr, implicit %7, implicit %8, implicit %9
...

---
name: ld_fillx
tracksRegLiveness: true
legalized: true
regBankSelected: true
body: |
bb.1.entry:
liveins:
; CHECK-LABEL: name: ld_fillx
; CHECK: [[DEF:%[0-9]+]]:eps = IMPLICIT_DEF
; CHECK-NEXT: [[DEF1:%[0-9]+]]:eldfiforeg = IMPLICIT_DEF
; CHECK-NEXT: [[DEF2:%[0-9]+]]:erf2 = IMPLICIT_DEF
; CHECK-NEXT: [[DEF3:%[0-9]+]]:fifo512 = IMPLICIT_DEF
; CHECK-NEXT: [[DEF4:%[0-9]+]]:mr30_fifo_step_e1 = IMPLICIT_DEF
; CHECK-NEXT: $lfe = COPY [[DEF3]]
; CHECK-NEXT: [[VLDB_FILLX_512_:%[0-9]+]]:eps, [[VLDB_FILLX_512_1:%[0-9]+]]:eldfiforeg, [[VLDB_FILLX_512_2:%[0-9]+]]:erf2 = VLDB_FILLX_512 [[DEF4]], [[DEF4]], [[DEF]], [[DEF1]], [[DEF2]], implicit-def $lfe, implicit $lfe
; CHECK-NEXT: [[COPY:%[0-9]+]]:mextra = COPY $lfe
; CHECK-NEXT: PseudoRET implicit $lr, implicit [[VLDB_FILLX_512_]], implicit [[VLDB_FILLX_512_1]], implicit [[VLDB_FILLX_512_2]], implicit [[COPY]]
%2:modregbank(s20) = G_IMPLICIT_DEF
%4:ptrregbank(p0) = G_IMPLICIT_DEF
%5:fiforegbank(<32 x s32>) = G_IMPLICIT_DEF
%6:gprregbank(s32) = G_IMPLICIT_DEF
%10:fiforegbank(<16 x s32>) = G_IMPLICIT_DEF
%11:gprregbank(s32) = G_IMPLICIT_DEF
%7:ptrregbank(p0), %8:fiforegbank(<32 x s32>), %9:gprregbank(s32), %12:fiforegbank(<16 x s32>) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.aie2p.fifo.ld.fillx), %4:ptrregbank(p0), %5:fiforegbank(<32 x s32>), %6:gprregbank(s32), %10:fiforegbank(<16 x s32>), %11:gprregbank(s32)
PseudoRET implicit $lr, implicit %7, implicit %8, implicit %9, implicit %12
...

---
name: ld_popx
tracksRegLiveness: true
legalized: true
regBankSelected: true
body: |
bb.1.entry:
liveins:
; CHECK-LABEL: name: ld_popx
; CHECK: [[DEF:%[0-9]+]]:eps = IMPLICIT_DEF
; CHECK-NEXT: [[DEF1:%[0-9]+]]:eldfiforeg = IMPLICIT_DEF
; CHECK-NEXT: [[DEF2:%[0-9]+]]:erf2 = IMPLICIT_DEF
; CHECK-NEXT: [[DEF3:%[0-9]+]]:fifo512 = IMPLICIT_DEF
; CHECK-NEXT: [[DEF4:%[0-9]+]]:mr30_fifo_step_e1 = IMPLICIT_DEF
; CHECK-NEXT: $lfe = COPY [[DEF3]]
; CHECK-NEXT: [[VLDB_POPX_512_:%[0-9]+]]:vec512, [[VLDB_POPX_512_1:%[0-9]+]]:eps, [[VLDB_POPX_512_2:%[0-9]+]]:eldfiforeg, [[VLDB_POPX_512_3:%[0-9]+]]:erf2 = VLDB_POPX_512 [[DEF4]], [[DEF4]], [[DEF]], [[DEF1]], [[DEF2]], implicit-def $lfe, implicit-def $srfifo_uf, implicit $lfe
; CHECK-NEXT: [[COPY:%[0-9]+]]:mextra = COPY $lfe
; CHECK-NEXT: PseudoRET implicit $lr, implicit [[VLDB_POPX_512_]], implicit [[VLDB_POPX_512_1]], implicit [[VLDB_POPX_512_2]], implicit [[VLDB_POPX_512_3]], implicit [[COPY]]
%3:modregbank(s20) = G_IMPLICIT_DEF
%5:ptrregbank(p0) = G_IMPLICIT_DEF
%6:fiforegbank(<32 x s32>) = G_IMPLICIT_DEF
%7:gprregbank(s32) = G_IMPLICIT_DEF
%12:fiforegbank(<16 x s32>) = G_IMPLICIT_DEF
%13:gprregbank(s32) = G_IMPLICIT_DEF
%8:vregbank(<64 x s8>), %9:ptrregbank(p0), %10:fiforegbank(<32 x s32>), %11:gprregbank(s32), %14:fiforegbank(<16 x s32>) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.aie2p.fifo.ld.popx), %5:ptrregbank(p0), %6:fiforegbank(<32 x s32>), %7:gprregbank(s32), %12:fiforegbank(<16 x s32>), %13:gprregbank(s32)
PseudoRET implicit $lr, implicit %8, implicit %9, implicit %10, implicit %11, implicit %14
...

---
name: pop_unaligned
tracksRegLiveness: true
Expand Down
94 changes: 94 additions & 0 deletions llvm/test/CodeGen/AIE/aie2p/fifo-loads.ll
Original file line number Diff line number Diff line change
Expand Up @@ -624,6 +624,98 @@ entry:
ret %struct.v64bfp16ebs16 %.fca.1.insert.i
}

define dso_local noundef <64 x i8> @_Z17test_fifo_ld_popxRPDv64_hR12fifo_state_t(ptr nocapture nonnull align 4 dereferenceable(4) %p, ptr nocapture nonnull align 64 dereferenceable(256) %s) local_unnamed_addr #0 {
; CHECK-LABEL: _Z17test_fifo_ld_popxRPDv64_hR12fifo_state_t:
; CHECK: .p2align 4
; CHECK-NEXT: // %bb.0: // %entry
; CHECK-NEXT: lda p0, [p0, #0]; mov dj0, #128
; CHECK-NEXT: lda r24, [p1, dj0]
; CHECK-NEXT: nop
; CHECK-NEXT: nop
; CHECK-NEXT: vlda lfl0, [p1, #0]
; CHECK-NEXT: vlda x0, [p1, #192]; mov p2, p0
; CHECK-NEXT: vlda lfh0, [p1, #64]
; CHECK-NEXT: movxm r30, #2015
; CHECK-NEXT: vldb.popx.512 x0, [p0, lf0, r24]
; CHECK-NEXT: nop
; CHECK-NEXT: nop
; CHECK-NEXT: nop
; CHECK-NEXT: vmov lfe, x0
; CHECK-NEXT: nop
; CHECK-NEXT: nop
; CHECK-NEXT: vst lfl0, [p1, #0]; ret lr
; CHECK-NEXT: st r24, [p1, dj0]; vmov x2, lfe // Delay Slot 5
; CHECK-NEXT: vst lfh0, [p1, #64] // Delay Slot 4
; CHECK-NEXT: vst x2, [p1, #192] // Delay Slot 3
; CHECK-NEXT: st p0, [p2, #0] // Delay Slot 2
; CHECK-NEXT: nop // Delay Slot 1
entry:
%pos1.i.i = getelementptr inbounds i8, ptr %s, i20 128
%extra3.i.i = getelementptr inbounds i8, ptr %s, i20 192
%0 = load ptr, ptr %p, align 4, !tbaa !2
%1 = load <32 x i32>, ptr %s, align 64, !tbaa !6
%2 = load i32, ptr %pos1.i.i, align 64, !tbaa !7
%3 = load <16 x i32>, ptr %extra3.i.i, align 64, !tbaa !6
%4 = tail call { <64 x i8>, ptr, <32 x i32>, i32, <16 x i32> } @llvm.aie2p.fifo.ld.popx(ptr %0, <32 x i32> %1, i32 %2, <16 x i32> %3, i32 2015)
%5 = extractvalue { <64 x i8>, ptr, <32 x i32>, i32, <16 x i32> } %4, 0
%6 = extractvalue { <64 x i8>, ptr, <32 x i32>, i32, <16 x i32> } %4, 1
%7 = extractvalue { <64 x i8>, ptr, <32 x i32>, i32, <16 x i32> } %4, 2
%8 = extractvalue { <64 x i8>, ptr, <32 x i32>, i32, <16 x i32> } %4, 3
%9 = extractvalue { <64 x i8>, ptr, <32 x i32>, i32, <16 x i32> } %4, 4
store <16 x i32> %9, ptr %extra3.i.i, align 64
store <32 x i32> %7, ptr %s, align 128
store i32 %8, ptr %pos1.i.i, align 64
store ptr %6, ptr %p, align 4
ret <64 x i8> %5
}

define dso_local void @_Z18test_fifo_ld_fillxRP22v64bfp16ebs8_unalignedR12fifo_state_tii(ptr nocapture nonnull align 4 dereferenceable(4) %p, ptr nocapture nonnull align 64 dereferenceable(256) %s, i32 noundef %step, i32 noundef %mask) local_unnamed_addr #0 {
; CHECK-LABEL: _Z18test_fifo_ld_fillxRP22v64bfp16ebs8_unalignedR12fifo_state_tii:
; CHECK: .p2align 4
; CHECK-NEXT: // %bb.0: // %entry
; CHECK-NEXT: lda p0, [p0, #0]; nopb ; nops ; nopx ; mov dj0, #128; nopv
; CHECK-NEXT: lda r24, [p1, dj0]; nopx
; CHECK-NEXT: nop
; CHECK-NEXT: nop
; CHECK-NEXT: vlda lfl0, [p1, #0]
; CHECK-NEXT: vlda x0, [p1, #192]; movx r2, #6; mov p2, p0
; CHECK-NEXT: vlda lfh0, [p1, #64]; lshl r0, r0, r2
; CHECK-NEXT: or r30, r0, r1
; CHECK-NEXT: vldb.fillx.512 [p0, lf0, r24]
; CHECK-NEXT: nop
; CHECK-NEXT: nop
; CHECK-NEXT: nop
; CHECK-NEXT: vmov lfe, x0
; CHECK-NEXT: nop
; CHECK-NEXT: nop
; CHECK-NEXT: vst lfl0, [p1, #0]; ret lr
; CHECK-NEXT: st r24, [p1, dj0]; vmov x0, lfe // Delay Slot 5
; CHECK-NEXT: vst lfh0, [p1, #64] // Delay Slot 4
; CHECK-NEXT: vst x0, [p1, #192] // Delay Slot 3
; CHECK-NEXT: st p0, [p2, #0] // Delay Slot 2
; CHECK-NEXT: nop // Delay Slot 1
entry:
%pos1.i = getelementptr inbounds i8, ptr %s, i20 128
%extra3.i = getelementptr inbounds i8, ptr %s, i20 192
%shl.i = shl i32 %step, 6
%or.i = or i32 %shl.i, %mask
%0 = load ptr, ptr %p, align 4, !tbaa !2
%1 = load <32 x i32>, ptr %s, align 64, !tbaa !6
%2 = load i32, ptr %pos1.i, align 64, !tbaa !7
%3 = load <16 x i32>, ptr %extra3.i, align 64, !tbaa !6
%4 = tail call { ptr, <32 x i32>, i32, <16 x i32> } @llvm.aie2p.fifo.ld.fillx(ptr %0, <32 x i32> %1, i32 %2, <16 x i32> %3, i32 %or.i)
%5 = extractvalue { ptr, <32 x i32>, i32, <16 x i32> } %4, 0
%6 = extractvalue { ptr, <32 x i32>, i32, <16 x i32> } %4, 1
%7 = extractvalue { ptr, <32 x i32>, i32, <16 x i32> } %4, 2
%8 = extractvalue { ptr, <32 x i32>, i32, <16 x i32> } %4, 3
store <16 x i32> %8, ptr %extra3.i, align 64
store <32 x i32> %6, ptr %s, align 128
store i32 %7, ptr %pos1.i, align 64
store ptr %5, ptr %p, align 4
ret void
}




declare { ptr, <32 x i32>, i32 } @llvm.aie2p.fifo.ld.fill(ptr, <32 x i32>, i32) #5
Expand All @@ -639,6 +731,8 @@ declare { ptr, <32 x i32>, i32, <64 x i8>, <8 x i8> } @llvm.aie2p.fifo.ld.pop.54
declare { ptr, <32 x i32>, i32, <64 x i8>, <8 x i8> } @llvm.aie2p.fifo.ld.pop.544.1d.bfp16(ptr, <32 x i32>, i32, i20) #5
declare { ptr, <32 x i32>, i32, i20, <64 x i8>, <8 x i8> } @llvm.aie2p.fifo.ld.pop.544.2d.bfp16(ptr, <32 x i32>, i32, i20, i20, i20, i20) #5
declare { ptr, <32 x i32>, i32, i20, i20, <64 x i8>, <8 x i8> } @llvm.aie2p.fifo.ld.pop.544.3d.bfp16(ptr, <32 x i32>, i32, i20, i20, i20, i20, i20, i20, i20) #5
declare { <64 x i8>, ptr, <32 x i32>, i32, <16 x i32> } @llvm.aie2p.fifo.ld.popx(ptr, <32 x i32>, i32, <16 x i32>, i32) #1
declare { ptr, <32 x i32>, i32, <16 x i32> } @llvm.aie2p.fifo.ld.fillx(ptr, <32 x i32>, i32, <16 x i32>, i32) #1

!2 = !{!3, !3, i64 0}
!3 = !{!"any pointer", !4, i64 0}
Expand Down

0 comments on commit ccef3a2

Please sign in to comment.