diff --git a/llvm/lib/Target/AIE/aie2p/AIE2PInstructionSelector.cpp b/llvm/lib/Target/AIE/aie2p/AIE2PInstructionSelector.cpp index 7de0283b51e6..0acdf7082d88 100644 --- a/llvm/lib/Target/AIE/aie2p/AIE2PInstructionSelector.cpp +++ b/llvm/lib/Target/AIE/aie2p/AIE2PInstructionSelector.cpp @@ -100,9 +100,9 @@ class AIE2PInstructionSelector : public AIEBaseInstructionSelector { bool selectG_LOAD(MachineInstr &I, MachineRegisterInfo &MRI); bool selectG_STORE(MachineInstr &I, MachineRegisterInfo &MRI); bool selectG_AIE_LOAD_STORE(MachineInstr &I, MachineRegisterInfo &MRI); - bool select1024BitG_AIE_LOAD_STORE(MachineInstr &I, LoadStoreOpcodes &LSO, - AddressingModeInfo &AMI, - MachineRegisterInfo &MRI); + bool selectWideG_AIE_LOAD_STORE(MachineInstr &I, LoadStoreOpcodes &LSO, + AddressingModeInfo &AMI, + MachineRegisterInfo &MRI); bool selectSetI128(MachineInstr &I, MachineOperand &DstReg, MachineOperand &SrcReg, MachineRegisterInfo &MRI); bool selectExtractI128(MachineInstr &I, Register DstReg, Register SrcReg, @@ -1731,14 +1731,25 @@ LoadStoreOpcodes AIE2PInstructionSelector::getLoadStoreOpcode( AlwaysFitsImmediateRange, /*OffsetOpcode=*/AIE2P::VLDA_dmx_lda_fifohl_idx_imm}; } - if (RBID == AIE2P::VRegBankID) { - llvm_unreachable("Unimplemented"); - } if (RBID == AIE2P::AccRegBankID) { - llvm_unreachable("Unimplemented"); + return {/*ISelOpcode=*/AIE2P::VLDA_dmx_lda_bm_idx_imm, + AlwaysFitsImmediateRange, + /*OffsetOpcode=*/AIE2P::VLDA_dmx_lda_bm_idx_imm}; + } + if (RBID == AIE2P::VRegBankID) { + return {/*ISelOpcode=*/AIE2P::VLDA_dmx_lda_x_idx_imm, + AlwaysFitsImmediateRange, + /*OffsetOpcode=*/AIE2P::VLDA_dmx_lda_x_idx_imm}; } llvm_unreachable("1024-bit vector type must be in AccRegBank or VRegBank " "or FifoRegBankID"); + } else if (LoadStoreSize == 2048) { + if (RBID == AIE2P::AccRegBankID) { + return {/*ISelOpcode=*/AIE2P::VLDA_dmx_lda_bm_idx_imm, + AlwaysFitsImmediateRange, + /*OffsetOpcode=*/AIE2P::VLDA_dmx_lda_bm_idx_imm}; + } + llvm_unreachable("2048-bit vector type must be in AccRegBank"); } break; } @@ -2055,6 +2066,14 @@ LoadStoreOpcodes AIE2PInstructionSelector::getLoadStoreOpcode( } llvm_unreachable("1024-bit vector type must be in AccRegBank or " "VRegBank or FifoRegBankID"); + } else if (LoadStoreSize == 2048) { + assert(RBID == AIE2P::AccRegBankID && + "2048-bit vectors should be in the Accumulator Register Bank"); + if (RBID == AIE2P::AccRegBankID) { + return {/*ISelOpcode=*/AIE2P::VST_dmx_sts_bm_idx_imm, + /*FitsImmediateRange=*/AlwaysFitsImmediateRange, + /*OffsetOpcode=*/AIE2P::VST_dmx_sts_bm_idx_imm}; + } } break; } @@ -2253,76 +2272,120 @@ LoadStoreOpcodes AIE2PInstructionSelector::getLoadStoreOpcode( llvm_unreachable("Invalid instruction"); } -bool AIE2PInstructionSelector::select1024BitG_AIE_LOAD_STORE( +bool AIE2PInstructionSelector::selectWideG_AIE_LOAD_STORE( MachineInstr &I, LoadStoreOpcodes &LSO, AddressingModeInfo &AMI, MachineRegisterInfo &MRI) { + LLT SrcDstTy = MRI.getType(AMI.SrcDstOp.getReg()); + unsigned SrcDstTySize = SrcDstTy.getSizeInBits(); + unsigned SplitFactor = (SrcDstTySize == 1024) ? 2 : 4; + unsigned RBID = deriveRegBankID(I.getOperand(0).getReg(), MRI, RBI); + const TargetRegisterClass *RC512 = nullptr; + const TargetRegisterClass *RC1024 = nullptr; + const TargetRegisterClass *RC2048 = &AIE2P::ACC2048RegClass; + llvm::SmallVector SubRegIdxes; + + if (RBID == AIE2P::AccRegBankID) { + SubRegIdxes = {AIE2P::sub_512_acc_lo, AIE2P::sub_512_acc_hi, + AIE2P::sub_1024_acc_hi_then_sub_512_acc_lo, + AIE2P::sub_1024_acc_hi_then_sub_512_acc_hi}; + RC512 = &AIE2P::ACC512RegClass; + RC1024 = &AIE2P::ACC1024RegClass; + } else if (RBID == AIE2P::VRegBankID) { + SubRegIdxes = {AIE2P::sub_512_lo, AIE2P::sub_512_hi}; + RC512 = &AIE2P::VEC512RegClass; + RC1024 = &AIE2P::VEC1024RegClass; + } else if (RBID == AIE2P::FifoRegBankID) { + RC512 = &AIE2P::FIFO512RegClass; + SubRegIdxes = {AIE2P::sub_lo_fifo, AIE2P::sub_hi_fifo}; + RC1024 = &AIE2P::FIFO1024RegClass; + } else { + llvm_unreachable("Unknown Register Bank ID!"); + } - bool IsFifo = deriveRegBankID(I.getOperand(0).getReg(), MRI, RBI) == - AIE2P::FifoRegBankID; - assert(IsFifo && "Expected FiforegBank for 1024-bit load/store. Other banks " - "are unsupported"); - - Register Low512 = MRI.createVirtualRegister(&AIE2P::FIFO512RegClass); - Register High512 = MRI.createVirtualRegister(&AIE2P::FIFO512RegClass); - + std::vector SubRegs(SplitFactor); + for (unsigned i = 0; i < SplitFactor; ++i) { + SubRegs[i] = MRI.createVirtualRegister(RC512); + } + auto handleSplitMemOperands = [&](auto &Instrs) { + int NumSplits = SplitFactor / 2; + for (unsigned i = 0; i < NumSplits; ++i) { + unsigned Offset = (SrcDstTySize == 2048 && i == 0) ? 128 : 0; + addSplitMemOperands( + AMI.MemI, Instrs[SplitFactor - 1 - 2 * i] /*Higher MIB*/, + Instrs[SplitFactor - 2 - 2 * i] /*Lower MIB*/, Offset, SplitFactor); + } + }; + + auto constrainInstRegOps = [&](auto &Instrs) { + return std::all_of(Instrs.begin(), Instrs.end(), [&](const auto &Instr) { + return constrainSelectedInstRegOperands(*Instr, TII, TRI, RBI); + }); + }; + + llvm::SmallVector Instrs; switch (AMI.MemI.getOpcode()) { case AIE2P::G_STORE: { - auto LowerBits = MIB.buildInstr(TargetOpcode::COPY, {Low512}, {}) - .addReg(AMI.SrcDstOp.getReg(), 0, AIE2P::sub_lo_fifo); - auto HigherBits = MIB.buildInstr(TargetOpcode::COPY, {High512}, {}) - .addReg(AMI.SrcDstOp.getReg(), 0, AIE2P::sub_hi_fifo); - - auto StoreHigher = MIB.buildInstr(*LSO.OffsetOpcode, {}, {}) - .addReg(HigherBits.getReg(0)) - .addReg(AMI.PtrOp.getReg()) - .addImm(64); // Offset - auto StoreLower = MIB.buildInstr(LSO.ISelOpcode, {}, {}); - - for (auto Def : AMI.MemI.defs()) - StoreLower.addDef(Def.getReg()); - - StoreLower.addReg(LowerBits.getReg(0)); - - addAddressingMode(StoreLower, AMI, LSO.FitsImmediateRange, false, MRI); - - addSplitMemOperands(AMI.MemI, StoreHigher, StoreLower, 0, 2); + for (unsigned SubReg = 0; SubReg < SplitFactor; ++SubReg) { + int Offset = SubReg * 64; + auto Copy = MIB.buildInstr(TargetOpcode::COPY, {SubRegs[SubReg]}, {}) + .addReg(AMI.SrcDstOp.getReg(), 0, + SubRegIdxes[SubReg % SubRegIdxes.size()]); + + auto Store = (SubReg == 0) ? MIB.buildInstr(LSO.ISelOpcode, {}, {}) + : MIB.buildInstr(*LSO.OffsetOpcode, {}, {}) + .addReg(Copy.getReg(0)) + .addReg(AMI.PtrOp.getReg()) + .addImm(Offset); + + if (SubReg == 0) { + for (auto Def : AMI.MemI.defs()) + Store.addDef(Def.getReg()); + Store.addReg(Copy.getReg(0)); + addAddressingMode(Store, AMI, LSO.FitsImmediateRange, false, MRI); + } + Instrs.push_back(Store); + } - AMI.MemI.eraseFromParent(); - return constrainSelectedInstRegOperands(*StoreLower, TII, TRI, RBI) && - constrainSelectedInstRegOperands(*StoreHigher, TII, TRI, RBI); + handleSplitMemOperands(Instrs); + break; } case AIE2P::G_LOAD: { - auto LoadHigher = MIB.buildInstr(*LSO.OffsetOpcode, {}, {}) - .addDef(High512) - .addUse(AMI.PtrOp.getReg()) - .addImm(64); // Offset - - auto LoadLower = MIB.buildInstr(LSO.ISelOpcode, {Low512}, {}); - // We have to skip the first Def (the 1024-bit Dst-Reg) - for (auto *Def = AMI.MemI.defs().begin() + 1; Def != AMI.MemI.defs().end(); - Def++) - LoadLower.addDef(Def->getReg()); - - addAddressingMode(LoadLower, AMI, LSO.FitsImmediateRange, false, MRI); - - addSplitMemOperands(AMI.MemI, LoadHigher, LoadLower, 0, 2); - - MIB.buildInstr(AIE2P::REG_SEQUENCE, {AMI.SrcDstOp.getReg()}, {}) - .addReg(Low512) - .addImm(AIE2P::sub_lo_fifo) - .addReg(High512) - .addImm(AIE2P::sub_hi_fifo); + for (unsigned SubReg = 0; SubReg < SplitFactor; ++SubReg) { + auto Load = (SubReg == 0) + ? MIB.buildInstr(LSO.ISelOpcode, {SubRegs[SubReg]}, {}) + : MIB.buildInstr(*LSO.OffsetOpcode, {}, {}) + .addDef(SubRegs[SubReg]) + .addUse(AMI.PtrOp.getReg()) + .addImm(SubReg * 64); + if (SubReg == 0) { + for (auto *Def = AMI.MemI.defs().begin() + 1; + Def != AMI.MemI.defs().end(); Def++) { + Load.addDef(Def->getReg()); + } + addAddressingMode(Load, AMI, LSO.FitsImmediateRange, false, MRI); + } + Instrs.push_back(Load); + } + auto RegSeq = + MIB.buildInstr(AIE2P::REG_SEQUENCE, {AMI.SrcDstOp.getReg()}, {}); + for (unsigned SubReg = 0; SubReg < SplitFactor; ++SubReg) { + RegSeq.addReg(SubRegs[SubReg]).addImm(SubRegIdxes[SubReg]); + } Register SrcDstReg = AMI.SrcDstOp.getReg(); - AMI.MemI.eraseFromParent(); - return constrainSelectedInstRegOperands(*LoadLower, TII, TRI, RBI) && - constrainSelectedInstRegOperands(*LoadHigher, TII, TRI, RBI) && - RBI.constrainGenericRegister(SrcDstReg, *&AIE2P::FIFO1024RegClass, - MRI); + if (!RBI.constrainGenericRegister( + SrcDstReg, *(SrcDstTySize == 2048 ? RC2048 : RC1024), MRI)) + return false; + + handleSplitMemOperands(Instrs); + break; } default: return false; } + + AMI.MemI.eraseFromParent(); + return constrainInstRegOps(Instrs); } bool AIE2PInstructionSelector::selectG_LOAD(MachineInstr &I, @@ -2384,12 +2447,13 @@ bool AIE2PInstructionSelector::selectG_AIE_LOAD_STORE( LoadStoreOpcodes LSO = getLoadStoreOpcode(AMI->MemI, MRI, RBI, AMI->ImmediateOffset); - auto StoreSize = MRI.getType(AMI->SrcDstOp.getReg()).getSizeInBits(); - if (StoreSize == 1024) { - return select1024BitG_AIE_LOAD_STORE(I, LSO, *AMI, MRI); - } - MachineInstrBuilder NewInstr = MIB.buildInstr(LSO.ISelOpcode); + LLT SrcDstTy = MRI.getType(AMI->SrcDstOp.getReg()); + auto SrcDstTySize = SrcDstTy.getSizeInBits(); + if ((SrcDstTySize == 1024) || (SrcDstTySize == 2048)) + return selectWideG_AIE_LOAD_STORE(I, LSO, *AMI, MRI); + + MachineInstrBuilder NewInstr = MIB.buildInstr(LSO.ISelOpcode); for (auto Def : AMI->MemI.defs()) NewInstr.addDef(Def.getReg()); diff --git a/llvm/test/CodeGen/AIE/aie2p/GlobalIsel/inst-select-vector-load.mir b/llvm/test/CodeGen/AIE/aie2p/GlobalIsel/inst-select-vector-load.mir index 93b201382fd4..805cec2cce6d 100644 --- a/llvm/test/CodeGen/AIE/aie2p/GlobalIsel/inst-select-vector-load.mir +++ b/llvm/test/CodeGen/AIE/aie2p/GlobalIsel/inst-select-vector-load.mir @@ -151,8 +151,6 @@ body: | $bmll0 = COPY %0(<8 x s64>) ... - - --- name: test_v32x16_fifo legalized: true @@ -189,7 +187,6 @@ body: | $lfl0 = COPY %0(<16 x s32>) ... - --- name: test_v32x32_fifo legalized: true @@ -201,9 +198,9 @@ body: | ; CHECK: liveins: $p0 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:ep = COPY $p0 - ; CHECK-NEXT: [[VLDA_dmx_lda_fifohl_idx_imm:%[0-9]+]]:fifo512 = VLDA_dmx_lda_fifohl_idx_imm [[COPY]], 64 :: (load (<16 x s32>) from unknown-address + 64) - ; CHECK-NEXT: [[VLDA_dmx_lda_fifohl_idx_imm1:%[0-9]+]]:fifo512 = VLDA_dmx_lda_fifohl_idx_imm [[COPY]], 0 :: (load (<16 x s32>), align 128) - ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:fifo1024 = REG_SEQUENCE [[VLDA_dmx_lda_fifohl_idx_imm1]], %subreg.sub_lo_fifo, [[VLDA_dmx_lda_fifohl_idx_imm]], %subreg.sub_hi_fifo + ; CHECK-NEXT: [[VLDA_dmx_lda_fifohl_idx_imm:%[0-9]+]]:fifo512 = VLDA_dmx_lda_fifohl_idx_imm [[COPY]], 0 :: (load (<16 x s32>), align 128) + ; CHECK-NEXT: [[VLDA_dmx_lda_fifohl_idx_imm1:%[0-9]+]]:fifo512 = VLDA_dmx_lda_fifohl_idx_imm [[COPY]], 64 :: (load (<16 x s32>) from unknown-address + 64) + ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:fifo1024 = REG_SEQUENCE [[VLDA_dmx_lda_fifohl_idx_imm]], %subreg.sub_lo_fifo, [[VLDA_dmx_lda_fifohl_idx_imm1]], %subreg.sub_hi_fifo ; CHECK-NEXT: $lf0 = COPY [[REG_SEQUENCE]] %1:ptrregbank(p0) = COPY $p0 %0:fiforegbank(<32 x s32>) = G_LOAD %1(p0) :: (load (<32 x s32>)) @@ -222,9 +219,9 @@ body: | ; CHECK: liveins: $p0 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:ep = COPY $p0 - ; CHECK-NEXT: [[VLDA_dmx_lda_fifohl_idx_imm:%[0-9]+]]:fifo512 = VLDA_dmx_lda_fifohl_idx_imm [[COPY]], 64 :: (load (<32 x s16>) from unknown-address + 64) - ; CHECK-NEXT: [[VLDA_dmx_lda_fifohl_idx_imm1:%[0-9]+]]:fifo512 = VLDA_dmx_lda_fifohl_idx_imm [[COPY]], 0 :: (load (<32 x s16>), align 128) - ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:fifo1024 = REG_SEQUENCE [[VLDA_dmx_lda_fifohl_idx_imm1]], %subreg.sub_lo_fifo, [[VLDA_dmx_lda_fifohl_idx_imm]], %subreg.sub_hi_fifo + ; CHECK-NEXT: [[VLDA_dmx_lda_fifohl_idx_imm:%[0-9]+]]:fifo512 = VLDA_dmx_lda_fifohl_idx_imm [[COPY]], 0 :: (load (<32 x s16>), align 128) + ; CHECK-NEXT: [[VLDA_dmx_lda_fifohl_idx_imm1:%[0-9]+]]:fifo512 = VLDA_dmx_lda_fifohl_idx_imm [[COPY]], 64 :: (load (<32 x s16>) from unknown-address + 64) + ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:fifo1024 = REG_SEQUENCE [[VLDA_dmx_lda_fifohl_idx_imm]], %subreg.sub_lo_fifo, [[VLDA_dmx_lda_fifohl_idx_imm1]], %subreg.sub_hi_fifo ; CHECK-NEXT: $lf0 = COPY [[REG_SEQUENCE]] %1:ptrregbank(p0) = COPY $p0 %0:fiforegbank(<64 x s16>) = G_LOAD %1(p0) :: (load (<64 x s16>)) @@ -242,12 +239,155 @@ body: | ; CHECK: liveins: $p0 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:ep = COPY $p0 - ; CHECK-NEXT: [[VLDA_dmx_lda_fifohl_idx_imm:%[0-9]+]]:fifo512 = VLDA_dmx_lda_fifohl_idx_imm [[COPY]], 64 :: (load (<64 x s8>) from unknown-address + 64) - ; CHECK-NEXT: [[VLDA_dmx_lda_fifohl_idx_imm1:%[0-9]+]]:fifo512 = VLDA_dmx_lda_fifohl_idx_imm [[COPY]], 0 :: (load (<64 x s8>), align 128) - ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:fifo1024 = REG_SEQUENCE [[VLDA_dmx_lda_fifohl_idx_imm1]], %subreg.sub_lo_fifo, [[VLDA_dmx_lda_fifohl_idx_imm]], %subreg.sub_hi_fifo + ; CHECK-NEXT: [[VLDA_dmx_lda_fifohl_idx_imm:%[0-9]+]]:fifo512 = VLDA_dmx_lda_fifohl_idx_imm [[COPY]], 0 :: (load (<64 x s8>), align 128) + ; CHECK-NEXT: [[VLDA_dmx_lda_fifohl_idx_imm1:%[0-9]+]]:fifo512 = VLDA_dmx_lda_fifohl_idx_imm [[COPY]], 64 :: (load (<64 x s8>) from unknown-address + 64) + ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:fifo1024 = REG_SEQUENCE [[VLDA_dmx_lda_fifohl_idx_imm]], %subreg.sub_lo_fifo, [[VLDA_dmx_lda_fifohl_idx_imm1]], %subreg.sub_hi_fifo ; CHECK-NEXT: $lf0 = COPY [[REG_SEQUENCE]] %1:ptrregbank(p0) = COPY $p0 %0:fiforegbank(<128 x s8>) = G_LOAD %1(p0) :: (load (<128 x s8>)) $lf0 = COPY %0(<128 x s8>) ... +--- +name: test_v64int16 +legalized: true +regBankSelected: true +body: | + bb.1.entry: + liveins: $p0 + ; CHECK-LABEL: name: test_v64int16 + ; CHECK: liveins: $p0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:ep = COPY $p0 + ; CHECK-NEXT: [[VLDA_dmx_lda_x_idx_imm:%[0-9]+]]:vec512 = VLDA_dmx_lda_x_idx_imm [[COPY]], 0 :: (load (<32 x s16>), align 128) + ; CHECK-NEXT: [[VLDA_dmx_lda_x_idx_imm1:%[0-9]+]]:vec512 = VLDA_dmx_lda_x_idx_imm [[COPY]], 64 :: (load (<32 x s16>) from unknown-address + 64) + ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vec1024 = REG_SEQUENCE [[VLDA_dmx_lda_x_idx_imm]], %subreg.sub_512_lo, [[VLDA_dmx_lda_x_idx_imm1]], %subreg.sub_512_hi + ; CHECK-NEXT: $y0 = COPY [[REG_SEQUENCE]] + %1:ptrregbank(p0) = COPY $p0 + %0:vregbank(<64 x s16>) = G_LOAD %1(p0) :: (load (<64 x s16>)) + $y0 = COPY %0(<64 x s16>) +... + +--- +name: test_v32int32 +legalized: true +regBankSelected: true +body: | + bb.1.entry: + liveins: $p0 + ; CHECK-LABEL: name: test_v32int32 + ; CHECK: liveins: $p0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:ep = COPY $p0 + ; CHECK-NEXT: [[VLDA_dmx_lda_x_idx_imm:%[0-9]+]]:vec512 = VLDA_dmx_lda_x_idx_imm [[COPY]], 0 :: (load (<16 x s32>), align 128) + ; CHECK-NEXT: [[VLDA_dmx_lda_x_idx_imm1:%[0-9]+]]:vec512 = VLDA_dmx_lda_x_idx_imm [[COPY]], 64 :: (load (<16 x s32>) from unknown-address + 64) + ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vec1024 = REG_SEQUENCE [[VLDA_dmx_lda_x_idx_imm]], %subreg.sub_512_lo, [[VLDA_dmx_lda_x_idx_imm1]], %subreg.sub_512_hi + ; CHECK-NEXT: $y0 = COPY [[REG_SEQUENCE]] + %1:ptrregbank(p0) = COPY $p0 + %0:vregbank(<32 x s32>) = G_LOAD %1(p0) :: (load (<32 x s32>)) + $y0 = COPY %0(<32 x s32>) +... + +--- +name: test_v128int8 +legalized: true +regBankSelected: true +body: | + bb.1.entry: + liveins: $p0 + ; CHECK-LABEL: name: test_v128int8 + ; CHECK: liveins: $p0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:ep = COPY $p0 + ; CHECK-NEXT: [[VLDA_dmx_lda_x_idx_imm:%[0-9]+]]:vec512 = VLDA_dmx_lda_x_idx_imm [[COPY]], 0 :: (load (<64 x s8>), align 128) + ; CHECK-NEXT: [[VLDA_dmx_lda_x_idx_imm1:%[0-9]+]]:vec512 = VLDA_dmx_lda_x_idx_imm [[COPY]], 64 :: (load (<64 x s8>) from unknown-address + 64) + ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vec1024 = REG_SEQUENCE [[VLDA_dmx_lda_x_idx_imm]], %subreg.sub_512_lo, [[VLDA_dmx_lda_x_idx_imm1]], %subreg.sub_512_hi + ; CHECK-NEXT: $y0 = COPY [[REG_SEQUENCE]] + %1:ptrregbank(p0) = COPY $p0 + %0:vregbank(<128 x s8>) = G_LOAD %1(p0) :: (load (<128 x s8>)) + $y0 = COPY %0(<128 x s8>) +... + +--- +name: test_v32acc32 +legalized: true +regBankSelected: true +body: | + bb.1.entry: + liveins: $p0 + ; CHECK-LABEL: name: test_v32acc32 + ; CHECK: liveins: $p0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:ep = COPY $p0 + ; CHECK-NEXT: [[VLDA_dmx_lda_bm_idx_imm:%[0-9]+]]:acc512 = VLDA_dmx_lda_bm_idx_imm [[COPY]], 0 :: (load (<16 x s32>), align 128) + ; CHECK-NEXT: [[VLDA_dmx_lda_bm_idx_imm1:%[0-9]+]]:acc512 = VLDA_dmx_lda_bm_idx_imm [[COPY]], 64 :: (load (<16 x s32>) from unknown-address + 64) + ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:acc1024 = REG_SEQUENCE [[VLDA_dmx_lda_bm_idx_imm]], %subreg.sub_512_acc_lo, [[VLDA_dmx_lda_bm_idx_imm1]], %subreg.sub_512_acc_hi + ; CHECK-NEXT: $cml0 = COPY [[REG_SEQUENCE]] + %1:ptrregbank(p0) = COPY $p0 + %0:accregbank(<32 x s32>) = G_LOAD %1(p0) :: (load (<32 x s32>)) + $cml0 = COPY %0(<32 x s32>) +... + +--- +name: test_v16acc64 +legalized: true +regBankSelected: true +body: | + bb.1.entry: + liveins: $p0 + ; CHECK-LABEL: name: test_v16acc64 + ; CHECK: liveins: $p0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:ep = COPY $p0 + ; CHECK-NEXT: [[VLDA_dmx_lda_bm_idx_imm:%[0-9]+]]:acc512 = VLDA_dmx_lda_bm_idx_imm [[COPY]], 0 :: (load (<8 x s64>), align 128) + ; CHECK-NEXT: [[VLDA_dmx_lda_bm_idx_imm1:%[0-9]+]]:acc512 = VLDA_dmx_lda_bm_idx_imm [[COPY]], 64 :: (load (<8 x s64>) from unknown-address + 64) + ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:acc1024 = REG_SEQUENCE [[VLDA_dmx_lda_bm_idx_imm]], %subreg.sub_512_acc_lo, [[VLDA_dmx_lda_bm_idx_imm1]], %subreg.sub_512_acc_hi + ; CHECK-NEXT: $cml0 = COPY [[REG_SEQUENCE]] + %1:ptrregbank(p0) = COPY $p0 + %0:accregbank(<16 x s64>) = G_LOAD %1(p0) :: (load (<16 x s64>)) + $cml0 = COPY %0(<16 x s64>) +... + +--- +name: test_v64acc32 +legalized: true +regBankSelected: true +body: | + bb.1.entry: + liveins: $p0 + ; CHECK-LABEL: name: test_v64acc32 + ; CHECK: liveins: $p0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:ep = COPY $p0 + ; CHECK-NEXT: [[VLDA_dmx_lda_bm_idx_imm:%[0-9]+]]:acc512 = VLDA_dmx_lda_bm_idx_imm [[COPY]], 0 :: (load (<16 x s32>), align 256) + ; CHECK-NEXT: [[VLDA_dmx_lda_bm_idx_imm1:%[0-9]+]]:acc512 = VLDA_dmx_lda_bm_idx_imm [[COPY]], 64 :: (load (<16 x s32>) from unknown-address + 64) + ; CHECK-NEXT: [[VLDA_dmx_lda_bm_idx_imm2:%[0-9]+]]:acc512 = VLDA_dmx_lda_bm_idx_imm [[COPY]], 128 :: (load (<16 x s32>) from unknown-address + 128, align 128) + ; CHECK-NEXT: [[VLDA_dmx_lda_bm_idx_imm3:%[0-9]+]]:acc512 = VLDA_dmx_lda_bm_idx_imm [[COPY]], 192 :: (load (<16 x s32>) from unknown-address + 192) + ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:acc2048 = REG_SEQUENCE [[VLDA_dmx_lda_bm_idx_imm]], %subreg.sub_512_acc_lo, [[VLDA_dmx_lda_bm_idx_imm1]], %subreg.sub_512_acc_hi, [[VLDA_dmx_lda_bm_idx_imm2]], %subreg.sub_1024_acc_hi_then_sub_512_acc_lo, [[VLDA_dmx_lda_bm_idx_imm3]], %subreg.sub_1024_acc_hi_then_sub_512_acc_hi + ; CHECK-NEXT: $dm0 = COPY [[REG_SEQUENCE]] + %1:ptrregbank(p0) = COPY $p0 + %0:accregbank(<64 x s32>) = G_LOAD %1(p0) :: (load (<64 x s32>)) + $dm0 = COPY %0(<64 x s32>) +... + +--- +name: test_v32acc64 +legalized: true +regBankSelected: true +body: | + bb.1.entry: + liveins: $p0 + ; CHECK-LABEL: name: test_v32acc64 + ; CHECK: liveins: $p0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:ep = COPY $p0 + ; CHECK-NEXT: [[VLDA_dmx_lda_bm_idx_imm:%[0-9]+]]:acc512 = VLDA_dmx_lda_bm_idx_imm [[COPY]], 0 :: (load (<8 x s64>), align 256) + ; CHECK-NEXT: [[VLDA_dmx_lda_bm_idx_imm1:%[0-9]+]]:acc512 = VLDA_dmx_lda_bm_idx_imm [[COPY]], 64 :: (load (<8 x s64>) from unknown-address + 64) + ; CHECK-NEXT: [[VLDA_dmx_lda_bm_idx_imm2:%[0-9]+]]:acc512 = VLDA_dmx_lda_bm_idx_imm [[COPY]], 128 :: (load (<8 x s64>) from unknown-address + 128, align 128) + ; CHECK-NEXT: [[VLDA_dmx_lda_bm_idx_imm3:%[0-9]+]]:acc512 = VLDA_dmx_lda_bm_idx_imm [[COPY]], 192 :: (load (<8 x s64>) from unknown-address + 192) + ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:acc2048 = REG_SEQUENCE [[VLDA_dmx_lda_bm_idx_imm]], %subreg.sub_512_acc_lo, [[VLDA_dmx_lda_bm_idx_imm1]], %subreg.sub_512_acc_hi, [[VLDA_dmx_lda_bm_idx_imm2]], %subreg.sub_1024_acc_hi_then_sub_512_acc_lo, [[VLDA_dmx_lda_bm_idx_imm3]], %subreg.sub_1024_acc_hi_then_sub_512_acc_hi + ; CHECK-NEXT: $dm0 = COPY [[REG_SEQUENCE]] + %1:ptrregbank(p0) = COPY $p0 + %0:accregbank(<32 x s64>) = G_LOAD %1(p0) :: (load (<32 x s64>)) + $dm0 = COPY %0(<32 x s64>) +... diff --git a/llvm/test/CodeGen/AIE/aie2p/GlobalIsel/inst-select-vector-store.mir b/llvm/test/CodeGen/AIE/aie2p/GlobalIsel/inst-select-vector-store.mir index 25c1ab952ada..a82eea007243 100644 --- a/llvm/test/CodeGen/AIE/aie2p/GlobalIsel/inst-select-vector-store.mir +++ b/llvm/test/CodeGen/AIE/aie2p/GlobalIsel/inst-select-vector-store.mir @@ -202,9 +202,9 @@ body: | ; CHECK-NEXT: [[COPY:%[0-9]+]]:ep = COPY $p0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:fifo1024 = COPY $lf0 ; CHECK-NEXT: [[COPY2:%[0-9]+]]:fifo512 = COPY [[COPY1]].sub_lo_fifo + ; CHECK-NEXT: VST_dmx_sts_fifohl_idx_imm [[COPY2]], [[COPY]], 0 :: (store (<16 x s32>), align 128) ; CHECK-NEXT: [[COPY3:%[0-9]+]]:fifo512 = COPY [[COPY1]].sub_hi_fifo ; CHECK-NEXT: VST_dmx_sts_fifohl_idx_imm [[COPY3]], [[COPY]], 64 :: (store (<16 x s32>) into unknown-address + 64) - ; CHECK-NEXT: VST_dmx_sts_fifohl_idx_imm [[COPY2]], [[COPY]], 0 :: (store (<16 x s32>), align 128) %0:ptrregbank(p0) = COPY $p0 %1:fiforegbank(<32 x s32>) = COPY $lf0 G_STORE %1(<32 x s32>), %0(p0) :: (store (<32 x s32>)) @@ -224,9 +224,9 @@ body: | ; CHECK-NEXT: [[COPY:%[0-9]+]]:ep = COPY $p0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:fifo1024 = COPY $lf0 ; CHECK-NEXT: [[COPY2:%[0-9]+]]:fifo512 = COPY [[COPY1]].sub_lo_fifo + ; CHECK-NEXT: VST_dmx_sts_fifohl_idx_imm [[COPY2]], [[COPY]], 0 :: (store (<32 x s16>), align 128) ; CHECK-NEXT: [[COPY3:%[0-9]+]]:fifo512 = COPY [[COPY1]].sub_hi_fifo ; CHECK-NEXT: VST_dmx_sts_fifohl_idx_imm [[COPY3]], [[COPY]], 64 :: (store (<32 x s16>) into unknown-address + 64) - ; CHECK-NEXT: VST_dmx_sts_fifohl_idx_imm [[COPY2]], [[COPY]], 0 :: (store (<32 x s16>), align 128) %0:ptrregbank(p0) = COPY $p0 %1:fiforegbank(<64 x s16>) = COPY $lf0 G_STORE %1(<64 x s16>), %0(p0) :: (store (<64 x s16>)) @@ -245,11 +245,166 @@ body: | ; CHECK-NEXT: [[COPY:%[0-9]+]]:ep = COPY $p0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:fifo1024 = COPY $lf0 ; CHECK-NEXT: [[COPY2:%[0-9]+]]:fifo512 = COPY [[COPY1]].sub_lo_fifo + ; CHECK-NEXT: VST_dmx_sts_fifohl_idx_imm [[COPY2]], [[COPY]], 0 :: (store (<64 x s8>), align 128) ; CHECK-NEXT: [[COPY3:%[0-9]+]]:fifo512 = COPY [[COPY1]].sub_hi_fifo ; CHECK-NEXT: VST_dmx_sts_fifohl_idx_imm [[COPY3]], [[COPY]], 64 :: (store (<64 x s8>) into unknown-address + 64) - ; CHECK-NEXT: VST_dmx_sts_fifohl_idx_imm [[COPY2]], [[COPY]], 0 :: (store (<64 x s8>), align 128) %0:ptrregbank(p0) = COPY $p0 %1:fiforegbank(<128 x s8>) = COPY $lf0 G_STORE %1(<128 x s8>), %0(p0) :: (store (<128 x s8>)) ... +--- +name: test_v64int16 +legalized: true +regBankSelected: true +body: | + bb.1.entry: + liveins: $p0, $y0 + ; CHECK-LABEL: name: test_v64int16 + ; CHECK: liveins: $p0, $y0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:ep = COPY $p0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vec1024 = COPY $y0 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vec512 = COPY [[COPY1]].sub_512_lo + ; CHECK-NEXT: VST_dmx_sts_x_idx_imm [[COPY2]], [[COPY]], 0 :: (store (<32 x s16>), align 128) + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vec512 = COPY [[COPY1]].sub_512_hi + ; CHECK-NEXT: VST_dmx_sts_x_idx_imm [[COPY3]], [[COPY]], 64 :: (store (<32 x s16>) into unknown-address + 64) + %0:ptrregbank(p0) = COPY $p0 + %1:vregbank(<64 x s16>) = COPY $y0 + G_STORE %1(<64 x s16>), %0(p0) :: (store (<64 x s16>)) +... + +--- +name: test_v32int32 +legalized: true +regBankSelected: true +body: | + bb.1.entry: + liveins: $p0, $y0 + ; CHECK-LABEL: name: test_v32int32 + ; CHECK: liveins: $p0, $y0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:ep = COPY $p0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vec1024 = COPY $y0 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vec512 = COPY [[COPY1]].sub_512_lo + ; CHECK-NEXT: VST_dmx_sts_x_idx_imm [[COPY2]], [[COPY]], 0 :: (store (<16 x s32>), align 128) + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vec512 = COPY [[COPY1]].sub_512_hi + ; CHECK-NEXT: VST_dmx_sts_x_idx_imm [[COPY3]], [[COPY]], 64 :: (store (<16 x s32>) into unknown-address + 64) + %0:ptrregbank(p0) = COPY $p0 + %1:vregbank(<32 x s32>) = COPY $y0 + G_STORE %1(<32 x s32>), %0(p0) :: (store (<32 x s32>)) +... + +--- +name: test_v128int8 +legalized: true +regBankSelected: true +body: | + bb.1.entry: + liveins: $p0, $y0 + ; CHECK-LABEL: name: test_v128int8 + ; CHECK: liveins: $p0, $y0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:ep = COPY $p0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vec1024 = COPY $y0 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vec512 = COPY [[COPY1]].sub_512_lo + ; CHECK-NEXT: VST_dmx_sts_x_idx_imm [[COPY2]], [[COPY]], 0 :: (store (<64 x s8>), align 128) + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vec512 = COPY [[COPY1]].sub_512_hi + ; CHECK-NEXT: VST_dmx_sts_x_idx_imm [[COPY3]], [[COPY]], 64 :: (store (<64 x s8>) into unknown-address + 64) + %0:ptrregbank(p0) = COPY $p0 + %1:vregbank(<128 x s8>) = COPY $y0 + G_STORE %1(<128 x s8>), %0(p0) :: (store (<128 x s8>)) +... + +--- +name: test_v32acc32 +legalized: true +regBankSelected: true +body: | + bb.1.entry: + liveins: $cml0, $p0 + ; CHECK-LABEL: name: test_v32acc32 + ; CHECK: liveins: $cml0, $p0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:ep = COPY $p0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:acc1024 = COPY $cml0 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:acc512 = COPY [[COPY1]].sub_512_acc_lo + ; CHECK-NEXT: VST_dmx_sts_bm_idx_imm [[COPY2]], [[COPY]], 0 :: (store (<16 x s32>), align 128) + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:acc512 = COPY [[COPY1]].sub_512_acc_hi + ; CHECK-NEXT: VST_dmx_sts_bm_idx_imm [[COPY3]], [[COPY]], 64 :: (store (<16 x s32>) into unknown-address + 64) + %0:ptrregbank(p0) = COPY $p0 + %1:accregbank(<32 x s32>) = COPY $cml0 + G_STORE %1(<32 x s32>), %0(p0) :: (store (<32 x s32>)) +... + +--- +name: test_v16acc64 +legalized: true +regBankSelected: true +body: | + bb.1.entry: + liveins: $cml0, $p0 + ; CHECK-LABEL: name: test_v16acc64 + ; CHECK: liveins: $cml0, $p0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:ep = COPY $p0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:acc1024 = COPY $cml0 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:acc512 = COPY [[COPY1]].sub_512_acc_lo + ; CHECK-NEXT: VST_dmx_sts_bm_idx_imm [[COPY2]], [[COPY]], 0 :: (store (<8 x s64>), align 128) + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:acc512 = COPY [[COPY1]].sub_512_acc_hi + ; CHECK-NEXT: VST_dmx_sts_bm_idx_imm [[COPY3]], [[COPY]], 64 :: (store (<8 x s64>) into unknown-address + 64) + %0:ptrregbank(p0) = COPY $p0 + %1:accregbank(<16 x s64>) = COPY $cml0 + G_STORE %1(<16 x s64>), %0(p0) :: (store (<16 x s64>)) +... + +--- +name: test_v64acc32 +legalized: true +regBankSelected: true +body: | + bb.1.entry: + liveins: $dm0, $p0 + ; CHECK-LABEL: name: test_v64acc32 + ; CHECK: liveins: $dm0, $p0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:ep = COPY $p0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:acc2048 = COPY $dm0 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:acc512 = COPY [[COPY1]].sub_512_acc_lo + ; CHECK-NEXT: VST_dmx_sts_bm_idx_imm [[COPY2]], [[COPY]], 0 :: (store (<16 x s32>), align 256) + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:acc512 = COPY [[COPY1]].sub_512_acc_hi + ; CHECK-NEXT: VST_dmx_sts_bm_idx_imm [[COPY3]], [[COPY]], 64 :: (store (<16 x s32>) into unknown-address + 64) + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:acc512 = COPY [[COPY1]].sub_1024_acc_hi_then_sub_512_acc_lo + ; CHECK-NEXT: VST_dmx_sts_bm_idx_imm [[COPY4]], [[COPY]], 128 :: (store (<16 x s32>) into unknown-address + 128, align 128) + ; CHECK-NEXT: [[COPY5:%[0-9]+]]:acc512 = COPY [[COPY1]].sub_1024_acc_hi_then_sub_512_acc_hi + ; CHECK-NEXT: VST_dmx_sts_bm_idx_imm [[COPY5]], [[COPY]], 192 :: (store (<16 x s32>) into unknown-address + 192) + %0:ptrregbank(p0) = COPY $p0 + %1:accregbank(<64 x s32>) = COPY $dm0 + G_STORE %1(<64 x s32>), %0(p0) :: (store (<64 x s32>)) +... + +--- +name: test_v32acc64 +legalized: true +regBankSelected: true +body: | + bb.1.entry: + liveins: $dm0, $p0 + ; CHECK-LABEL: name: test_v32acc64 + ; CHECK: liveins: $dm0, $p0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:ep = COPY $p0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:acc2048 = COPY $dm0 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:acc512 = COPY [[COPY1]].sub_512_acc_lo + ; CHECK-NEXT: VST_dmx_sts_bm_idx_imm [[COPY2]], [[COPY]], 0 :: (store (<8 x s64>), align 256) + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:acc512 = COPY [[COPY1]].sub_512_acc_hi + ; CHECK-NEXT: VST_dmx_sts_bm_idx_imm [[COPY3]], [[COPY]], 64 :: (store (<8 x s64>) into unknown-address + 64) + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:acc512 = COPY [[COPY1]].sub_1024_acc_hi_then_sub_512_acc_lo + ; CHECK-NEXT: VST_dmx_sts_bm_idx_imm [[COPY4]], [[COPY]], 128 :: (store (<8 x s64>) into unknown-address + 128, align 128) + ; CHECK-NEXT: [[COPY5:%[0-9]+]]:acc512 = COPY [[COPY1]].sub_1024_acc_hi_then_sub_512_acc_hi + ; CHECK-NEXT: VST_dmx_sts_bm_idx_imm [[COPY5]], [[COPY]], 192 :: (store (<8 x s64>) into unknown-address + 192) + %0:ptrregbank(p0) = COPY $p0 + %1:accregbank(<32 x s64>) = COPY $dm0 + G_STORE %1(<32 x s64>), %0(p0) :: (store (<32 x s64>)) +... +