Skip to content

Commit

Permalink
[AIEX] Reschedule Multi-Slot Instruction for better schedule
Browse files Browse the repository at this point in the history
  • Loading branch information
krishnamtibrewala committed Oct 9, 2024
1 parent 74fa8be commit 70ae414
Show file tree
Hide file tree
Showing 8 changed files with 313 additions and 44 deletions.
9 changes: 8 additions & 1 deletion llvm/lib/Target/AIE/AIEAlternateDescriptors.h
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,9 @@

namespace llvm {

using MutateInstructionMap =
std::unordered_map<MachineInstr *,
std::pair<MachineInstr *, const MCInstrDesc *>>;
using MIAltDescsMap = std::unordered_map<MachineInstr *, const MCInstrDesc *>;

class AIEAlternateDescriptors {
Expand All @@ -40,7 +43,11 @@ class AIEAlternateDescriptors {
const AIEBaseSubtarget &STI = AIEBaseSubtarget::get(*MI->getMF());
const AIEBaseInstrInfo *TII = STI.getInstrInfo();

AlternateDescs[MI] = &TII->get(AltInstOpcode);
setAlternateDescriptor(MI, &TII->get(AltInstOpcode));
}

void setAlternateDescriptor(MachineInstr *MI, const MCInstrDesc *AltDesc) {
AlternateDescs[MI] = AltDesc;
}

// Return the alternate descriptor for the given multi-opcode instruction.
Expand Down
20 changes: 18 additions & 2 deletions llvm/lib/Target/AIE/AIEHazardRecognizer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -436,6 +436,14 @@ auto toHazardType(bool Conflict) {
}
} // namespace

ScheduleHazardRecognizer::HazardType AIEHazardRecognizer::getHazardType(
const MCInstrDesc &Desc, MemoryBankBits MemoryBanks,
iterator_range<const MachineOperand *> MIOperands,
const MachineRegisterInfo &MRI, int DeltaCycles) {
return getHazardType(Scoreboard, Desc, MemoryBanks, MIOperands, MRI,
DeltaCycles);
}

// These functions interpret the itinerary, translating InstrStages
// to ResourceCycles to apply.
// We deviate from the standard ScoreboardHazardRecognizer by not
Expand Down Expand Up @@ -548,7 +556,7 @@ bool AIEHazardRecognizer::onlyFormatHazard(
const ResourceScoreboard<FuncUnitWrapper> &TheScoreboard,
const MCInstrDesc &Desc, MemoryBankBits MemoryBanks,
iterator_range<const MachineOperand *> MIOperands,
const MachineRegisterInfo &MRI, int DeltaCycles) {
const MachineRegisterInfo &MRI, int DeltaCycles) const {

unsigned const SchedClass = TII->getSchedClass(Desc, MIOperands, MRI);
SlotBits SlotSet =
Expand All @@ -568,7 +576,7 @@ bool AIEHazardRecognizer::onlyFormatHazard(
return checkFormatConflict(Scoreboard, DeltaCycles, SlotSet);
}

bool AIEHazardRecognizer::onlyFormatHazard(SUnit *SU, int DeltaCycles) {
bool AIEHazardRecognizer::onlyFormatHazard(SUnit *SU, int DeltaCycles) const {
MachineInstr *MI = SU->getInstr();
const std::vector<unsigned int> *AlternateInsts =
TII->getFormatInterface()->getAlternateInstsOpcode(MI->getOpcode());
Expand Down Expand Up @@ -607,6 +615,14 @@ void AIEHazardRecognizer::emitInScoreboard(
TII->getMemoryCycles(SchedClass), DeltaCycles, FUDepthLimit);
}

void AIEHazardRecognizer::releaseFromScoreboard(
const MCInstrDesc &Desc, MemoryBankBits MemoryBanks,
iterator_range<const MachineOperand *> MIOperands,
const MachineRegisterInfo &MRI, int DeltaCycles) {
releaseFromScoreboard(Scoreboard, Desc, MemoryBanks, MIOperands, MRI,
DeltaCycles);
}

void AIEHazardRecognizer::releaseFromScoreboard(
ResourceScoreboard<FuncUnitWrapper> &TheScoreboard, const MCInstrDesc &Desc,
MemoryBankBits MemoryBanks,
Expand Down
14 changes: 12 additions & 2 deletions llvm/lib/Target/AIE/AIEHazardRecognizer.h
Original file line number Diff line number Diff line change
Expand Up @@ -121,7 +121,7 @@ class AIEHazardRecognizer : public ScheduleHazardRecognizer {
void RecedeCycle() override;

// Check if the instruction only has a format hazard
bool onlyFormatHazard(SUnit *SU, int DeltaCycles);
bool onlyFormatHazard(SUnit *SU, int DeltaCycles) const;

/// Check conflict with Other shifted by DeltaCycles into the
/// future relative to *this.
Expand Down Expand Up @@ -159,6 +159,11 @@ class AIEHazardRecognizer : public ScheduleHazardRecognizer {
iterator_range<const MachineOperand *> MIOperands,
const MachineRegisterInfo &MRI,
int DeltaCycles) const;
// Apply the above function to the local scoreboard.
void releaseFromScoreboard(const MCInstrDesc &Desc,
MemoryBankBits MemoryBanks,
iterator_range<const MachineOperand *> MIOperands,
const MachineRegisterInfo &MRI, int DeltaCycles);

/// Block all scoreboard resources at DeltaCycles
void blockCycleInScoreboard(int DeltaCycle);
Expand Down Expand Up @@ -200,6 +205,11 @@ class AIEHazardRecognizer : public ScheduleHazardRecognizer {
const MCInstrDesc &Desc, MemoryBankBits MemoryBanks,
iterator_range<const MachineOperand *> MIOperands,
const MachineRegisterInfo &MRI, int DeltaCycles) const;
ScheduleHazardRecognizer::HazardType
getHazardType(const MCInstrDesc &Desc, MemoryBankBits MemoryBanks,
iterator_range<const MachineOperand *> MIOperands,
const MachineRegisterInfo &MRI, int DeltaCycles);

bool checkConflict(const ResourceScoreboard<FuncUnitWrapper> &Scoreboard,
MachineInstr &MI, int DeltaCycles) const;

Expand Down Expand Up @@ -232,7 +242,7 @@ class AIEHazardRecognizer : public ScheduleHazardRecognizer {
onlyFormatHazard(const ResourceScoreboard<FuncUnitWrapper> &TheScoreboard,
const MCInstrDesc &Desc, MemoryBankBits MemoryBanks,
iterator_range<const MachineOperand *> MIOperands,
const MachineRegisterInfo &MRI, int DeltaCycles);
const MachineRegisterInfo &MRI, int DeltaCycles) const;

static void enterResources(ResourceScoreboard<FuncUnitWrapper> &Scoreboard,
const InstrItineraryData *ItinData,
Expand Down
155 changes: 154 additions & 1 deletion llvm/lib/Target/AIE/AIEMachineScheduler.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -87,6 +87,13 @@ static cl::opt<bool> UseLoopHeuristics(
"aie-loop-sched-heuristics", cl::init(true),
cl::desc("Use special picking heuristics when scheduling a loop region"));

/// This option enables instruction mutuation to shift a multislot instruction
/// in event of a slot conflict.
static cl::opt<bool> InstructionMutation(
"aie-instruction-mutation", cl::init(true),
cl::desc("Allow instuction mutation to shift a multislot "
"instruction in event of a slot conflict"));

namespace {
// A sentinel value to represent an unknown SUnit.
const constexpr unsigned UnknownSUNum = ~0;
Expand Down Expand Up @@ -478,6 +485,124 @@ int AIEPostRASchedStrategy::getMaxDeltaCycles(const SchedBoundary &Zone) const {
BottomUpDelta.getValue()});
}

static bool checkSlotConflict(const unsigned OpCodeA, const unsigned OpCodeB,
const AIEBaseMCFormats &Formats) {

MCSlotKind SlotKindA = Formats.getSlotKind(OpCodeA);
MCSlotKind SlotKindB = Formats.getSlotKind(OpCodeB);

if (SlotKindA != MCSlotKind() && SlotKindB != MCSlotKind()) {
return (Formats.getSlotInfo(SlotKindA)->getSlotSet() &
Formats.getSlotInfo(SlotKindB)->getSlotSet());
}
return true;
}

bool AIEPostRASchedStrategy::canShiftSlot(SUnit &SU, SchedBoundary &Zone,
const int DeltaCycle) {

if (!InstructionMutation)
return false;

const AIEBaseMCFormats &Formats = *getTII(*Zone.DAG)->getFormatInterface();
AIEHazardRecognizer &HR = *getAIEHazardRecognizer(Zone);
bool CanShiftSlot = false;

if (!(!Formats.getAlternateInstsOpcode(SU.getInstr()->getOpcode()) &&
HR.onlyFormatHazard(&SU, DeltaCycle))) {
// We are only interested in single slot instructions and instructions that
// have only format hazard.
// TODO : Extend this to SUs that are multi-slot and have only format hazard
return false;
}
for (MachineInstr &MI : *Zone.DAG) {
SUnit *ZoneSU = Zone.DAG->getSUnit(&MI);
if (!ZoneSU)
continue;
if (!ZoneSU->isScheduled)
continue;

const int CurrCycle = Zone.getCurrCycle();
if (ZoneSU->BotReadyCycle !=
static_cast<unsigned int>(CurrCycle - DeltaCycle))
continue;

// Check for a MultiSlot instruction scheduled in the same DeltaCycle, we
// focus on multi-slot because they can be scheduled in different slots
auto AltOpcodes = Formats.getAlternateInstsOpcode(MI.getOpcode());
if (!AltOpcodes)
continue;

// Check if the scheduled multi-slot instruction has a slot conflict
// with the new instruction, if so we might have the possiblity to shift
// the multi-slot and schedule the new instruction.
if (!checkSlotConflict(HR.getSelectedAltDescs().getOpcode(&MI),
SU.getInstr()->getOpcode(), Formats))
continue;

// Release the multi-slot instruction from the scoreboard to check if any
// other alternate opcode in presence of the new instruction will not create
// a hazard.
HR.releaseFromScoreboard(*HR.getSelectedAltDescs().getDesc(&MI),
HR.getMemoryBanks(&MI), MI.operands(),
MI.getMF()->getRegInfo(),
CurrCycle - ZoneSU->BotReadyCycle);

MachineInstr *NewMI = SU.getInstr();
// Check if the new instuction can be scheduled after unscheduling
// the conflicting multi-slot instruction.
if (HR.getHazardType(NewMI->getDesc(), HR.getMemoryBanks(NewMI),
NewMI->operands(), NewMI->getMF()->getRegInfo(),
DeltaCycle) !=
ScheduleHazardRecognizer::HazardType::NoHazard) {
// If the new instruction cannot be scheduled after unscheduling the
// mulit-slot revert back the state of scoreboard to original state and
// continue.
HR.emitInScoreboard(*HR.getSelectedAltDescs().getDesc(&MI),
HR.getMemoryBanks(&MI), MI.operands(),
MI.getMF()->getRegInfo(),
CurrCycle - ZoneSU->BotReadyCycle);
continue;
}

// Emit the new instruction in the scoreboard. This will help us
// to check if the previously unscheduled multi-slot instruction
// can be scheduled in the same cycle, with an alternate opcode.
HR.emitInScoreboard(NewMI->getDesc(), HR.getMemoryBanks(NewMI),
NewMI->operands(), NewMI->getMF()->getRegInfo(),
DeltaCycle);

// Check if the previously unscheduled multi-slot instruction
// can be rescheduled in presense of the new instruction in the
// same cycle, with a different opcode.
for (const auto AltOpcodeInside : *AltOpcodes) {
const MCInstrDesc &Desc = getTII(*Zone.DAG)->get(AltOpcodeInside);
if (HR.getHazardType(Desc, HR.getMemoryBanks(&MI), MI.operands(),
MI.getMF()->getRegInfo(), DeltaCycle) ==
ScheduleHazardRecognizer::HazardType::NoHazard) {
// Cache the information to mutate the instruction during bumpNode()
MutateInstruction.insert(
std::make_pair(NewMI, std::make_pair(&MI, &Desc)));
CanShiftSlot = true;
break;
}
}

// Revert back the state of scoreboard to original state.
HR.releaseFromScoreboard(NewMI->getDesc(), HR.getMemoryBanks(NewMI),
NewMI->operands(), NewMI->getMF()->getRegInfo(),
DeltaCycle);
HR.emitInScoreboard(*HR.getSelectedAltDescs().getDesc(&MI),
HR.getMemoryBanks(&MI), MI.operands(),
MI.getMF()->getRegInfo(),
CurrCycle - ZoneSU->BotReadyCycle);

if (CanShiftSlot)
break;
}
return CanShiftSlot;
}

bool AIEPostRASchedStrategy::isAvailableNode(SUnit &SU, SchedBoundary &Zone,
bool /*VerifyReadyCycle*/) {
// Whether or not the zone is Top or Bot, verify if SU is ready to be
Expand All @@ -496,7 +621,8 @@ bool AIEPostRASchedStrategy::isAvailableNode(SUnit &SU, SchedBoundary &Zone,
// ReadyCycle is always greater or equal to the current cycle,
// so DeltaCycles will always be less or equal to 0.
if (Zone.checkHazard(&SU, DeltaCycles))
continue;
if (!canShiftSlot(SU, Zone, DeltaCycles))
continue;
SU.BotReadyCycle = CurrCycle - DeltaCycles;
return true;
}
Expand All @@ -513,10 +639,37 @@ void AIEPostRASchedStrategy::schedNode(SUnit *SU, bool IsTopNode) {
if (IsTopNode) {
PostGenericScheduler::schedNode(SU, IsTopNode);
} else {
AIEHazardRecognizer &HR = *getAIEHazardRecognizer(Bot);
int DeltaCycles = int(Bot.getCurrCycle()) - int(SU->BotReadyCycle);
assert(DeltaCycles <= 0);

// Check if an instuction needs to be moved to a different slot.
if (MutateInstruction.find(SU->getInstr()) != MutateInstruction.end()) {
auto [MI, Desc] = MutateInstruction[SU->getInstr()];
HR.releaseFromScoreboard(*HR.getSelectedAltDescs().getDesc(MI),
HR.getMemoryBanks(MI), MI->operands(),
MI->getMF()->getRegInfo(), DeltaCycles);
// Update the selected opcode for the instruction, refer
// AIEPostRASchedStrategy::canShiftSlot()
HR.getSelectedAltDescs().setAlternateDescriptor(MI, Desc);
}

Bot.bumpNode(SU, DeltaCycles);

if (MutateInstruction.find(SU->getInstr()) != MutateInstruction.end()) {
auto [MI, Desc] = MutateInstruction[SU->getInstr()];
assert(HR.getHazardType(*Desc, HR.getMemoryBanks(MI), MI->operands(),
MI->getMF()->getRegInfo(), DeltaCycles) ==
ScheduleHazardRecognizer::HazardType::NoHazard);
// Reschedule the instruction with the new opcode.
HR.emitInScoreboard(*Desc, HR.getMemoryBanks(MI), MI->operands(),
MI->getMF()->getRegInfo(), DeltaCycles);
}
}
// Clear the MutateInstruction map since after scheduling the instruction the
// validity of mutation map can no longer be guaranteed.
MutateInstruction.clear();
SU->isScheduled = true;
}

void AIEPostRASchedStrategy::enterFunction(MachineFunction *MF) {
Expand Down
2 changes: 2 additions & 0 deletions llvm/lib/Target/AIE/AIEMachineScheduler.h
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@ std::vector<AIE::MachineBundle> computeAndFinalizeBundles(SchedBoundary &Zone);
class AIEPostRASchedStrategy : public PostGenericScheduler {
/// Maintain the state of interblock/loop-aware scheduling
AIE::InterBlockScheduling InterBlock;
MutateInstructionMap MutateInstruction;

public:
AIEPostRASchedStrategy(const MachineSchedContext *C);
Expand All @@ -50,6 +51,7 @@ class AIEPostRASchedStrategy : public PostGenericScheduler {
SUnit *pickNodeAndCycle(bool &IsTopNode,
std::optional<unsigned> &BotEmissionCycle) override;

bool canShiftSlot(SUnit &SU, SchedBoundary &Zone, const int DeltaCycle);
bool isAvailableNode(SUnit &SU, SchedBoundary &Zone,
bool VerifyReadyCycle) override;

Expand Down
57 changes: 27 additions & 30 deletions llvm/test/CodeGen/AIE/aie2/end-to-end/Add2D-red.ll
Original file line number Diff line number Diff line change
Expand Up @@ -35,52 +35,49 @@ define void @add2d(ptr noalias %params, ptr noalias %ifm1_data, ptr noalias %ifm
; ASM-LABEL: add2d:
; ASM: .p2align 4
; ASM-NEXT: // %bb.0: // %newFuncRoot
; ASM-NEXT: paddb [sp], #32
; ASM-NEXT: paddb [sp], #32; nopxm
; ASM-NEXT: st p7, [sp, #-32] // 4-byte Folded Spill
; ASM-NEXT: paddb [p0], #40; st p6, [sp, #-28] // 4-byte Folded Spill
; ASM-NEXT: lda m2, [p0], #-4
; ASM-NEXT: lda m5, [p0], #8
; ASM-NEXT: lda m2, [p0], #-4; mov p6, sp
; ASM-NEXT: lda m5, [p0], #8; paddb [p6], #-36
; ASM-NEXT: lda p6, [p6, #0]
; ASM-NEXT: lda m4, [p0], #8
; ASM-NEXT: lda m3, [p0], #-24
; ASM-NEXT: lda r4, [p0], #36
; ASM-NEXT: lda r1, [p0], #-32
; ASM-NEXT: lda r0, [p0], #-12; mov p6, sp
; ASM-NEXT: lda r2, [p0], #40; paddb [p6], #-36
; ASM-NEXT: lda p6, [p6, #0]; mov p7, sp
; ASM-NEXT: paddb [p7], #-40
; ASM-NEXT: lda r0, [p0], #-12; mov p7, sp
; ASM-NEXT: lda r2, [p0], #40; paddb [p7], #-40
; ASM-NEXT: lda p7, [p7, #0]
; ASM-NEXT: nop
; ASM-NEXT: lda m1, [p0], #36
; ASM-NEXT: lda m0, [p0], #-8
; ASM-NEXT: lda dn0, [p0], #-8
; ASM-NEXT: lda dj0, [p0], #12
; ASM-NEXT: st r2, [p4, #0]
; ASM-NEXT: lda dj0, [p0], #12; nez r3, r0; mov p4, sp
; ASM-NEXT: st r3, [p5, #0]
; ASM-NEXT: lda dn4, [p0], #-8; paddb [p4], #-44; mov p5, sp
; ASM-NEXT: lda p4, [p4, #0]; paddb [p5], #-48
; ASM-NEXT: lda dn4, [p0], #-8; nez r3, r0; mov p4, sp
; ASM-NEXT: paddb [p4], #-44; st r3, [p5, #0]
; ASM-NEXT: lda p4, [p4, #0]; mov p5, sp
; ASM-NEXT: lda dj4, [p0], #-36; paddb [p5], #-48
; ASM-NEXT: lda p5, [p5, #0]
; ASM-NEXT: lda dj4, [p0], #-36
; ASM-NEXT: nop
; ASM-NEXT: st m1, [p6, #0]
; ASM-NEXT: lda r0, [p0], #-36; mov p6, sp
; ASM-NEXT: st m0, [p7, #0]
; ASM-NEXT: paddb [p6], #-52; st m0, [p7, #0]
; ASM-NEXT: lda r6, [p6, #0]; mov p6, sp
; ASM-NEXT: st dj0, [p4, #0]
; ASM-NEXT: lda r5, [p0, #0]; mov p4, sp
; ASM-NEXT: paddb [p4], #-60; st dj4, [p5, #0]
; ASM-NEXT: lda p7, [p4, #0]; paddb [p6], #-52; mov p5, sp
; ASM-NEXT: lda r6, [p6, #0]; paddb [p5], #-76; mov p4, sp
; ASM-NEXT: lda r9, [p5, #0]; paddb [p4], #-64; mov p5, sp
; ASM-NEXT: lda r8, [p4, #0]; paddb [p5], #-80; mov p0, sp
; ASM-NEXT: lda r10, [p5, #0]; paddb [p0], #-68; mov p5, sp
; ASM-NEXT: lda p4, [p0, #0]; paddb [p5], #-84; mov p6, sp
; ASM-NEXT: lda r11, [p5, #0]
; ASM-NEXT: paddb [p6], #-56; mov p0, sp
; ASM-NEXT: lda p6, [p6, #0]; paddb [p0], #-72; mov p5, sp
; ASM-NEXT: lda p0, [p0, #0]; paddb [p5], #-88
; ASM-NEXT: lda r12, [p5, #0]; mov p5, sp
; ASM-NEXT: paddb [p5], #-92
; ASM-NEXT: lda r13, [p5, #0]
; ASM-NEXT: mova r6, #1; add r7, r1, #-1; mov p5, r6
; ASM-NEXT: mova r6, #3; ne r4, r4, r6
; ASM-NEXT: st dj4, [p5, #0]
; ASM-NEXT: lda r5, [p0, #0]; paddb [p6], #-56; mov p5, sp
; ASM-NEXT: lda p6, [p6, #0]; paddb [p5], #-76; mov p0, sp
; ASM-NEXT: lda r9, [p5, #0]; mov p5, sp
; ASM-NEXT: paddb [p5], #-80; mov p4, sp
; ASM-NEXT: lda r10, [p5, #0]; paddb [p4], #-60; mov p5, sp
; ASM-NEXT: lda p7, [p4, #0]; paddb [p5], #-84; mov p4, sp
; ASM-NEXT: lda r11, [p5, #0]; paddb [p4], #-64; mov p5, sp
; ASM-NEXT: lda r8, [p4, #0]; paddb [p5], #-88
; ASM-NEXT: lda r12, [p5, #0]; paddb [p0], #-68; mov p5, sp
; ASM-NEXT: lda p4, [p0, #0]; paddb [p5], #-92; add r7, r1, #-1; mov p0, sp
; ASM-NEXT: lda r13, [p5, #0]; paddb [p0], #-72; movx r6, #1; mov p5, r6
; ASM-NEXT: lda p0, [p0, #0]; ne r4, r4, r6; mov r6, #3
; ASM-NEXT: st dn0, [p5, #0]; ltu r7, r7, r6
; ASM-NEXT: st dn4, [p6, #0]; jz r7, #.LBB0_2
; ASM-NEXT: nez r0, r0; mov p6, r8 // Delay Slot 5
Expand Down
Loading

0 comments on commit 70ae414

Please sign in to comment.