From e4e071d487030ef3c17d64bc3050e33f779fdc81 Mon Sep 17 00:00:00 2001 From: Christudasan Devadasan Date: Wed, 5 Feb 2025 15:40:40 +0530 Subject: [PATCH] [CodeGen][NewPM] Port SIWholeQuadMode to NPM. --- llvm/lib/Target/AMDGPU/AMDGPU.h | 4 +- llvm/lib/Target/AMDGPU/AMDGPUPassRegistry.def | 2 +- .../lib/Target/AMDGPU/AMDGPUTargetMachine.cpp | 3 +- llvm/lib/Target/AMDGPU/SIWholeQuadMode.cpp | 84 +++++++++++++------ llvm/lib/Target/AMDGPU/SIWholeQuadMode.h | 27 ++++++ llvm/test/CodeGen/AMDGPU/licm-wwm.mir | 1 + .../CodeGen/AMDGPU/si-init-whole-wave.mir | 1 + llvm/test/CodeGen/AMDGPU/wqm-terminators.mir | 1 + llvm/test/CodeGen/AMDGPU/wqm.mir | 1 + 9 files changed, 94 insertions(+), 30 deletions(-) create mode 100644 llvm/lib/Target/AMDGPU/SIWholeQuadMode.h diff --git a/llvm/lib/Target/AMDGPU/AMDGPU.h b/llvm/lib/Target/AMDGPU/AMDGPU.h index 31656c98ccd36fa..fa3496dd5c9c29c 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPU.h +++ b/llvm/lib/Target/AMDGPU/AMDGPU.h @@ -41,7 +41,7 @@ FunctionPass *createSIPeepholeSDWALegacyPass(); FunctionPass *createSILowerI1CopiesLegacyPass(); FunctionPass *createSIShrinkInstructionsLegacyPass(); FunctionPass *createSILoadStoreOptimizerLegacyPass(); -FunctionPass *createSIWholeQuadModePass(); +FunctionPass *createSIWholeQuadModeLegacyPass(); FunctionPass *createSIFixControlFlowLiveIntervalsPass(); FunctionPass *createSIOptimizeExecMaskingPreRAPass(); FunctionPass *createSIOptimizeVGPRLiveRangeLegacyPass(); @@ -204,7 +204,7 @@ extern char &SILowerSGPRSpillsLegacyID; void initializeSILoadStoreOptimizerLegacyPass(PassRegistry &); extern char &SILoadStoreOptimizerLegacyID; -void initializeSIWholeQuadModePass(PassRegistry &); +void initializeSIWholeQuadModeLegacyPass(PassRegistry &); extern char &SIWholeQuadModeID; void initializeSILowerControlFlowLegacyPass(PassRegistry &); diff --git a/llvm/lib/Target/AMDGPU/AMDGPUPassRegistry.def b/llvm/lib/Target/AMDGPU/AMDGPUPassRegistry.def index 45e2f0d9097adfd..224515aeb26fb22 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUPassRegistry.def +++ b/llvm/lib/Target/AMDGPU/AMDGPUPassRegistry.def @@ -111,6 +111,7 @@ MACHINE_FUNCTION_PASS("si-optimize-exec-masking", SIOptimizeExecMaskingPass()) MACHINE_FUNCTION_PASS("si-peephole-sdwa", SIPeepholeSDWAPass()) MACHINE_FUNCTION_PASS("si-pre-allocate-wwm-regs", SIPreAllocateWWMRegsPass()) MACHINE_FUNCTION_PASS("si-shrink-instructions", SIShrinkInstructionsPass()) +MACHINE_FUNCTION_PASS("si-wqm", SIWholeQuadModePass()) #undef MACHINE_FUNCTION_PASS #define DUMMY_MACHINE_FUNCTION_PASS(NAME, CREATE_PASS) @@ -140,7 +141,6 @@ DUMMY_MACHINE_FUNCTION_PASS("amdgpu-global-isel-divergence-lowering", AMDGPUGlob DUMMY_MACHINE_FUNCTION_PASS("amdgpu-regbankselect", AMDGPURegBankSelectPass()) DUMMY_MACHINE_FUNCTION_PASS("amdgpu-regbanklegalize", AMDGPURegBankLegalizePass()) DUMMY_MACHINE_FUNCTION_PASS("amdgpu-regbank-combiner", AMDGPURegBankCombinerPass()) -DUMMY_MACHINE_FUNCTION_PASS("si-wqm", SIWholeQuadModePass()) #undef DUMMY_MACHINE_FUNCTION_PASS diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp index 4003fdba0555b8e..1df03748332e5cd 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp @@ -50,6 +50,7 @@ #include "SIPeepholeSDWA.h" #include "SIPreAllocateWWMRegs.h" #include "SIShrinkInstructions.h" +#include "SIWholeQuadMode.h" #include "TargetInfo/AMDGPUTargetInfo.h" #include "Utils/AMDGPUBaseInfo.h" #include "llvm/Analysis/CGSCCPassManager.h" @@ -529,7 +530,7 @@ extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeAMDGPUTarget() { initializeSIInsertHardClausesPass(*PR); initializeSIInsertWaitcntsPass(*PR); initializeSIModeRegisterPass(*PR); - initializeSIWholeQuadModePass(*PR); + initializeSIWholeQuadModeLegacyPass(*PR); initializeSILowerControlFlowLegacyPass(*PR); initializeSIPreEmitPeepholePass(*PR); initializeSILateBranchLoweringPass(*PR); diff --git a/llvm/lib/Target/AMDGPU/SIWholeQuadMode.cpp b/llvm/lib/Target/AMDGPU/SIWholeQuadMode.cpp index 87eb6d9e385d468..3293602db090177 100644 --- a/llvm/lib/Target/AMDGPU/SIWholeQuadMode.cpp +++ b/llvm/lib/Target/AMDGPU/SIWholeQuadMode.cpp @@ -67,6 +67,7 @@ /// //===----------------------------------------------------------------------===// +#include "SIWholeQuadMode.h" #include "AMDGPU.h" #include "GCNSubtarget.h" #include "MCTargetDesc/AMDGPUMCTargetDesc.h" @@ -148,11 +149,19 @@ struct WorkItem { WorkItem(MachineInstr *MI) : MI(MI) {} }; -class SIWholeQuadMode : public MachineFunctionPass { +class SIWholeQuadMode { +public: + SIWholeQuadMode(MachineFunction &MF, LiveIntervals *LIS, + MachineDominatorTree *MDT, MachinePostDominatorTree *PDT) + : ST(&MF.getSubtarget()), TII(ST->getInstrInfo()), + TRI(&TII->getRegisterInfo()), MRI(&MF.getRegInfo()), LIS(LIS), MDT(MDT), + PDT(PDT) {} + bool run(MachineFunction &MF); + private: + const GCNSubtarget *ST; const SIInstrInfo *TII; const SIRegisterInfo *TRI; - const GCNSubtarget *ST; MachineRegisterInfo *MRI; LiveIntervals *LIS; MachineDominatorTree *MDT; @@ -225,12 +234,13 @@ class SIWholeQuadMode : public MachineFunctionPass { void lowerInitExec(MachineInstr &MI); MachineBasicBlock::iterator lowerInitExecInstrs(MachineBasicBlock &Entry, bool &Changed); +}; +class SIWholeQuadModeLegacy : public MachineFunctionPass { public: static char ID; - SIWholeQuadMode() : - MachineFunctionPass(ID) { } + SIWholeQuadModeLegacy() : MachineFunctionPass(ID) {} bool runOnMachineFunction(MachineFunction &MF) override; @@ -250,23 +260,22 @@ class SIWholeQuadMode : public MachineFunctionPass { MachineFunctionProperties::Property::IsSSA); } }; - } // end anonymous namespace -char SIWholeQuadMode::ID = 0; +char SIWholeQuadModeLegacy::ID = 0; -INITIALIZE_PASS_BEGIN(SIWholeQuadMode, DEBUG_TYPE, "SI Whole Quad Mode", false, - false) +INITIALIZE_PASS_BEGIN(SIWholeQuadModeLegacy, DEBUG_TYPE, "SI Whole Quad Mode", + false, false) INITIALIZE_PASS_DEPENDENCY(LiveIntervalsWrapperPass) INITIALIZE_PASS_DEPENDENCY(MachineDominatorTreeWrapperPass) INITIALIZE_PASS_DEPENDENCY(MachinePostDominatorTreeWrapperPass) -INITIALIZE_PASS_END(SIWholeQuadMode, DEBUG_TYPE, "SI Whole Quad Mode", false, - false) +INITIALIZE_PASS_END(SIWholeQuadModeLegacy, DEBUG_TYPE, "SI Whole Quad Mode", + false, false) -char &llvm::SIWholeQuadModeID = SIWholeQuadMode::ID; +char &llvm::SIWholeQuadModeID = SIWholeQuadModeLegacy::ID; -FunctionPass *llvm::createSIWholeQuadModePass() { - return new SIWholeQuadMode; +FunctionPass *llvm::createSIWholeQuadModeLegacyPass() { + return new SIWholeQuadModeLegacy; } #ifndef NDEBUG @@ -1689,7 +1698,7 @@ SIWholeQuadMode::lowerInitExecInstrs(MachineBasicBlock &Entry, bool &Changed) { return InsertPt; } -bool SIWholeQuadMode::runOnMachineFunction(MachineFunction &MF) { +bool SIWholeQuadMode::run(MachineFunction &MF) { LLVM_DEBUG(dbgs() << "SI Whole Quad Mode on " << MF.getName() << " ------------- \n"); LLVM_DEBUG(MF.dump();); @@ -1704,18 +1713,6 @@ bool SIWholeQuadMode::runOnMachineFunction(MachineFunction &MF) { SetInactiveInstrs.clear(); StateTransition.clear(); - ST = &MF.getSubtarget(); - - TII = ST->getInstrInfo(); - TRI = &TII->getRegisterInfo(); - MRI = &MF.getRegInfo(); - LIS = &getAnalysis().getLIS(); - auto *MDTWrapper = getAnalysisIfAvailable(); - MDT = MDTWrapper ? &MDTWrapper->getDomTree() : nullptr; - auto *PDTWrapper = - getAnalysisIfAvailable(); - PDT = PDTWrapper ? &PDTWrapper->getPostDomTree() : nullptr; - if (ST->isWave32()) { AndOpc = AMDGPU::S_AND_B32; AndTermOpc = AMDGPU::S_AND_B32_term; @@ -1816,3 +1813,38 @@ bool SIWholeQuadMode::runOnMachineFunction(MachineFunction &MF) { return Changed; } + +bool SIWholeQuadModeLegacy::runOnMachineFunction(MachineFunction &MF) { + LiveIntervals *LIS = &getAnalysis().getLIS(); + auto *MDTWrapper = getAnalysisIfAvailable(); + MachineDominatorTree *MDT = MDTWrapper ? &MDTWrapper->getDomTree() : nullptr; + auto *PDTWrapper = + getAnalysisIfAvailable(); + MachinePostDominatorTree *PDT = + PDTWrapper ? &PDTWrapper->getPostDomTree() : nullptr; + SIWholeQuadMode Impl(MF, LIS, MDT, PDT); + return Impl.run(MF); +} + +PreservedAnalyses +SIWholeQuadModePass::run(MachineFunction &MF, + MachineFunctionAnalysisManager &MFAM) { + MFPropsModifier _(*this, MF); + + LiveIntervals *LIS = &MFAM.getResult(MF); + MachineDominatorTree *MDT = + MFAM.getCachedResult(MF); + MachinePostDominatorTree *PDT = + MFAM.getCachedResult(MF); + SIWholeQuadMode Impl(MF, LIS, MDT, PDT); + bool Changed = Impl.run(MF); + if (!Changed) + return PreservedAnalyses::all(); + + PreservedAnalyses PA = getMachineFunctionPassPreservedAnalyses(); + PA.preserve(); + PA.preserve(); + PA.preserve(); + PA.preserve(); + return PA; +} diff --git a/llvm/lib/Target/AMDGPU/SIWholeQuadMode.h b/llvm/lib/Target/AMDGPU/SIWholeQuadMode.h new file mode 100644 index 000000000000000..e30b46721841b4d --- /dev/null +++ b/llvm/lib/Target/AMDGPU/SIWholeQuadMode.h @@ -0,0 +1,27 @@ +//===- SIWholeQuadMode.h ----------------------------------------*- C++- *-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIB_TARGET_AMDGPU_SIWHOLEQUADMODE_H +#define LLVM_LIB_TARGET_AMDGPU_SIWHOLEQUADMODE_H + +#include "llvm/CodeGen/MachinePassManager.h" + +namespace llvm { +class SIWholeQuadModePass : public PassInfoMixin { +public: + PreservedAnalyses run(MachineFunction &MF, + MachineFunctionAnalysisManager &MFAM); + + MachineFunctionProperties getClearedProperties() const { + return MachineFunctionProperties().set( + MachineFunctionProperties::Property::IsSSA); + } +}; +} // namespace llvm + +#endif // LLVM_LIB_TARGET_AMDGPU_SIWHOLEQUADMODE_H diff --git a/llvm/test/CodeGen/AMDGPU/licm-wwm.mir b/llvm/test/CodeGen/AMDGPU/licm-wwm.mir index fc20674971a7167..85525aa4dbb0984 100644 --- a/llvm/test/CodeGen/AMDGPU/licm-wwm.mir +++ b/llvm/test/CodeGen/AMDGPU/licm-wwm.mir @@ -1,5 +1,6 @@ # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 5 # RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1100 -run-pass=early-machinelicm,si-wqm -o - %s | FileCheck -check-prefix=GCN %s +# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1100 -passes=early-machinelicm,si-wqm -o - %s | FileCheck -check-prefix=GCN %s # Machine LICM may hoist an intruction from a WWM region, which will force SI-WQM pass # to create a second WWM region. This is an unwanted hoisting. diff --git a/llvm/test/CodeGen/AMDGPU/si-init-whole-wave.mir b/llvm/test/CodeGen/AMDGPU/si-init-whole-wave.mir index a4a9c04bb0c6a58..c02301446861d55 100644 --- a/llvm/test/CodeGen/AMDGPU/si-init-whole-wave.mir +++ b/llvm/test/CodeGen/AMDGPU/si-init-whole-wave.mir @@ -1,5 +1,6 @@ # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 5 # RUN: llc -mtriple=amdgcn -mcpu=gfx1200 -verify-machineinstrs -run-pass si-wqm -o - %s | FileCheck %s +# RUN: llc -mtriple=amdgcn -mcpu=gfx1200 -passes=si-wqm -o - %s | FileCheck %s --- # Test that we don't do silly things when there is no whole wave mode in the diff --git a/llvm/test/CodeGen/AMDGPU/wqm-terminators.mir b/llvm/test/CodeGen/AMDGPU/wqm-terminators.mir index 8d75bb3b1280f74..7656629a7b00987 100644 --- a/llvm/test/CodeGen/AMDGPU/wqm-terminators.mir +++ b/llvm/test/CodeGen/AMDGPU/wqm-terminators.mir @@ -1,5 +1,6 @@ # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 2 # RUN: llc -mtriple=amdgcn -mcpu=gfx1010 -verify-machineinstrs -run-pass si-wqm -o - %s | FileCheck %s +# RUN: llc -mtriple=amdgcn -mcpu=gfx1010 -passes=si-wqm -o - %s | FileCheck %s --- | define amdgpu_ps void @exit_to_exact() { diff --git a/llvm/test/CodeGen/AMDGPU/wqm.mir b/llvm/test/CodeGen/AMDGPU/wqm.mir index 4762760c4ba24b4..99327e1d3c4985c 100644 --- a/llvm/test/CodeGen/AMDGPU/wqm.mir +++ b/llvm/test/CodeGen/AMDGPU/wqm.mir @@ -1,4 +1,5 @@ # RUN: llc -mtriple=amdgcn -mcpu=fiji -verify-machineinstrs -run-pass si-wqm -o - %s | FileCheck %s +# RUN: llc -mtriple=amdgcn -mcpu=fiji -passes=si-wqm -o - %s | FileCheck %s --- | define amdgpu_ps void @test_strict_wwm_scc() {