Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Change LDS & VMEM hardware latency (in scheduling model) #1

Open
wants to merge 12 commits into
base: main
Choose a base branch
from
Open
14 changes: 7 additions & 7 deletions llvm/lib/CodeGen/MachineScheduler.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1599,7 +1599,7 @@ void BaseMemOpClusterMutation::clusterNeighboringMemOps(
continue;
LLVM_DEBUG(dbgs() << " Copy Succ SU(" << Succ.getSUnit()->NodeNum
<< ")\n");
DAG->addEdge(Succ.getSUnit(), SDep(SUb, SDep::Artificial));
// DAG->addEdge(Succ.getSUnit(), SDep(SUb, SDep::Artificial));
}
} else {
// Copy predecessor edges from SUb to SUa to avoid the SUnits that
Expand Down Expand Up @@ -3148,12 +3148,12 @@ void GenericScheduler::tryCandidate(SchedCandidate &Cand,
DAG->MF))
return;

// Avoid increasing the max critical pressure in the scheduled region.
if (DAG->isTrackingPressure() && tryPressure(TryCand.RPDelta.CriticalMax,
Cand.RPDelta.CriticalMax,
TryCand, Cand, RegCritical, TRI,
DAG->MF))
return;
// // Avoid increasing the max critical pressure in the scheduled region.
// if (DAG->isTrackingPressure() && tryPressure(TryCand.RPDelta.CriticalMax,
// Cand.RPDelta.CriticalMax,
// TryCand, Cand, RegCritical, TRI,
// DAG->MF))
// return;

// We only compare a subset of features when comparing nodes between
// Top and Bottom boundary. Some properties are simply incomparable, in many
Expand Down
2 changes: 1 addition & 1 deletion llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -645,7 +645,7 @@ void GCNSubtarget::overrideSchedPolicy(MachineSchedPolicy &Policy,

// Enabling both top down and bottom up scheduling seems to give us less
// register spills than just using one of these approaches on its own.
Policy.OnlyTopDown = false;
Policy.OnlyTopDown = true;
Policy.OnlyBottomUp = false;

// Enabling ShouldTrackLaneMasks crashes the SI Machine Scheduler.
Expand Down
8 changes: 5 additions & 3 deletions llvm/lib/Target/AMDGPU/SISchedule.td
Original file line number Diff line number Diff line change
Expand Up @@ -75,7 +75,7 @@ class SISchedMachineModel : SchedMachineModel {
// to the register pressure analysis.
let MicroOpBufferSize = 1;
let IssueWidth = 1;
let PostRAScheduler = 1;
let PostRAScheduler = 0;

// FIXME:Approximate 2 * branch cost. Try to hack around bad
// early-ifcvt heuristics. These need improvement to avoid the OOE
Expand All @@ -96,13 +96,13 @@ def HWExport : ProcResource<1> {
let BufferSize = 7; // Taken from S_WAITCNT
}
def HWLGKM : ProcResource<1> {
let BufferSize = 31; // Taken from S_WAITCNT
let BufferSize = 0; // Taken from S_WAITCNT
}
def HWSALU : ProcResource<1> {
let BufferSize = 1;
}
def HWVMEM : ProcResource<1> {
let BufferSize = 15; // Taken from S_WAITCNT
let BufferSize = 0; // Taken from S_WAITCNT
}
def HWVALU : ProcResource<1> {
let BufferSize = 1;
Expand Down Expand Up @@ -139,9 +139,11 @@ multiclass SICommonWriteRes {

def : HWWriteRes<WriteBranch, [HWBranch], 8>;
def : HWWriteRes<WriteExport, [HWExport], 4>;
let ResourceCycles = [5] in
def : HWWriteRes<WriteLDS, [HWLGKM], 5>; // Can be between 2 and 64
def : HWWriteRes<WriteSALU, [HWSALU], 1>;
def : HWWriteRes<WriteSMEM, [HWLGKM], 5>;
let ResourceCycles = [80] in
def : HWWriteRes<WriteVMEM, [HWVMEM], 80>;
def : HWWriteRes<WriteBarrier, [HWBranch], 500>; // XXX: Guessed ???

Expand Down