whchung · poyenc · Jun 9, 2022 · Jun 9, 2022 · Jun 9, 2022 · Jun 9, 2022
diff --git a/llvm/lib/CodeGen/MachineScheduler.cpp b/llvm/lib/CodeGen/MachineScheduler.cpp
@@ -1599,7 +1599,7 @@ void BaseMemOpClusterMutation::clusterNeighboringMemOps(
           continue;
         LLVM_DEBUG(dbgs() << "  Copy Succ SU(" << Succ.getSUnit()->NodeNum
                           << ")\n");
-        DAG->addEdge(Succ.getSUnit(), SDep(SUb, SDep::Artificial));
+        // DAG->addEdge(Succ.getSUnit(), SDep(SUb, SDep::Artificial));
       }
     } else {
       // Copy predecessor edges from SUb to SUa to avoid the SUnits that
@@ -3148,12 +3148,12 @@ void GenericScheduler::tryCandidate(SchedCandidate &Cand,
                                                DAG->MF))
     return;
 
-  // Avoid increasing the max critical pressure in the scheduled region.
-  if (DAG->isTrackingPressure() && tryPressure(TryCand.RPDelta.CriticalMax,
-                                               Cand.RPDelta.CriticalMax,
-                                               TryCand, Cand, RegCritical, TRI,
-                                               DAG->MF))
-    return;
+  // // Avoid increasing the max critical pressure in the scheduled region.
+  // if (DAG->isTrackingPressure() && tryPressure(TryCand.RPDelta.CriticalMax,
+  //                                              Cand.RPDelta.CriticalMax,
+  //                                              TryCand, Cand, RegCritical, TRI,
+  //                                              DAG->MF))
+  //   return;
 
   // We only compare a subset of features when comparing nodes between
   // Top and Bottom boundary. Some properties are simply incomparable, in many

diff --git a/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp b/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp
@@ -645,7 +645,7 @@ void GCNSubtarget::overrideSchedPolicy(MachineSchedPolicy &Policy,
 
   // Enabling both top down and bottom up scheduling seems to give us less
   // register spills than just using one of these approaches on its own.
-  Policy.OnlyTopDown = false;
+  Policy.OnlyTopDown = true;
   Policy.OnlyBottomUp = false;
 
   // Enabling ShouldTrackLaneMasks crashes the SI Machine Scheduler.

diff --git a/llvm/lib/Target/AMDGPU/SISchedule.td b/llvm/lib/Target/AMDGPU/SISchedule.td
@@ -75,7 +75,7 @@ class SISchedMachineModel : SchedMachineModel {
   // to the register pressure analysis.
   let MicroOpBufferSize = 1;
   let IssueWidth = 1;
-  let PostRAScheduler = 1;
+  let PostRAScheduler = 0;
 
   // FIXME:Approximate 2 * branch cost.  Try to hack around bad
   // early-ifcvt heuristics. These need improvement to avoid the OOE
@@ -96,13 +96,13 @@ def HWExport : ProcResource<1> {
   let BufferSize = 7; // Taken from S_WAITCNT
 }
 def HWLGKM   : ProcResource<1> {
-  let BufferSize = 31;  // Taken from S_WAITCNT
+  let BufferSize = 0;  // Taken from S_WAITCNT
 }
 def HWSALU   : ProcResource<1> {
   let BufferSize = 1;
 }
 def HWVMEM   : ProcResource<1> {
-  let BufferSize = 15;  // Taken from S_WAITCNT
+  let BufferSize = 0;  // Taken from S_WAITCNT
 }
 def HWVALU   : ProcResource<1> {
   let BufferSize = 1;
@@ -139,9 +139,11 @@ multiclass SICommonWriteRes {
 
   def : HWWriteRes<WriteBranch,  [HWBranch], 8>;
   def : HWWriteRes<WriteExport,  [HWExport], 4>;
+  let ResourceCycles = [5] in
   def : HWWriteRes<WriteLDS,     [HWLGKM],   5>; // Can be between 2 and 64
   def : HWWriteRes<WriteSALU,    [HWSALU],   1>;
   def : HWWriteRes<WriteSMEM,    [HWLGKM],   5>;
+  let ResourceCycles = [80] in
   def : HWWriteRes<WriteVMEM,    [HWVMEM],   80>;
   def : HWWriteRes<WriteBarrier, [HWBranch], 500>; // XXX: Guessed ???