Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[CodeGen][NewPM] Port SIWholeQuadMode to NPM. #125833

Merged
merged 1 commit into from
Feb 5, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions llvm/lib/Target/AMDGPU/AMDGPU.h
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@ FunctionPass *createSIPeepholeSDWALegacyPass();
FunctionPass *createSILowerI1CopiesLegacyPass();
FunctionPass *createSIShrinkInstructionsLegacyPass();
FunctionPass *createSILoadStoreOptimizerLegacyPass();
FunctionPass *createSIWholeQuadModePass();
FunctionPass *createSIWholeQuadModeLegacyPass();
FunctionPass *createSIFixControlFlowLiveIntervalsPass();
FunctionPass *createSIOptimizeExecMaskingPreRAPass();
FunctionPass *createSIOptimizeVGPRLiveRangeLegacyPass();
Expand Down Expand Up @@ -204,7 +204,7 @@ extern char &SILowerSGPRSpillsLegacyID;
void initializeSILoadStoreOptimizerLegacyPass(PassRegistry &);
extern char &SILoadStoreOptimizerLegacyID;

void initializeSIWholeQuadModePass(PassRegistry &);
void initializeSIWholeQuadModeLegacyPass(PassRegistry &);
extern char &SIWholeQuadModeID;

void initializeSILowerControlFlowLegacyPass(PassRegistry &);
Expand Down
2 changes: 1 addition & 1 deletion llvm/lib/Target/AMDGPU/AMDGPUPassRegistry.def
Original file line number Diff line number Diff line change
Expand Up @@ -111,6 +111,7 @@ MACHINE_FUNCTION_PASS("si-optimize-exec-masking", SIOptimizeExecMaskingPass())
MACHINE_FUNCTION_PASS("si-peephole-sdwa", SIPeepholeSDWAPass())
MACHINE_FUNCTION_PASS("si-pre-allocate-wwm-regs", SIPreAllocateWWMRegsPass())
MACHINE_FUNCTION_PASS("si-shrink-instructions", SIShrinkInstructionsPass())
MACHINE_FUNCTION_PASS("si-wqm", SIWholeQuadModePass())
#undef MACHINE_FUNCTION_PASS

#define DUMMY_MACHINE_FUNCTION_PASS(NAME, CREATE_PASS)
Expand Down Expand Up @@ -140,7 +141,6 @@ DUMMY_MACHINE_FUNCTION_PASS("amdgpu-global-isel-divergence-lowering", AMDGPUGlob
DUMMY_MACHINE_FUNCTION_PASS("amdgpu-regbankselect", AMDGPURegBankSelectPass())
DUMMY_MACHINE_FUNCTION_PASS("amdgpu-regbanklegalize", AMDGPURegBankLegalizePass())
DUMMY_MACHINE_FUNCTION_PASS("amdgpu-regbank-combiner", AMDGPURegBankCombinerPass())
DUMMY_MACHINE_FUNCTION_PASS("si-wqm", SIWholeQuadModePass())

#undef DUMMY_MACHINE_FUNCTION_PASS

Expand Down
3 changes: 2 additions & 1 deletion llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,7 @@
#include "SIPeepholeSDWA.h"
#include "SIPreAllocateWWMRegs.h"
#include "SIShrinkInstructions.h"
#include "SIWholeQuadMode.h"
#include "TargetInfo/AMDGPUTargetInfo.h"
#include "Utils/AMDGPUBaseInfo.h"
#include "llvm/Analysis/CGSCCPassManager.h"
Expand Down Expand Up @@ -529,7 +530,7 @@ extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeAMDGPUTarget() {
initializeSIInsertHardClausesPass(*PR);
initializeSIInsertWaitcntsPass(*PR);
initializeSIModeRegisterPass(*PR);
initializeSIWholeQuadModePass(*PR);
initializeSIWholeQuadModeLegacyPass(*PR);
initializeSILowerControlFlowLegacyPass(*PR);
initializeSIPreEmitPeepholePass(*PR);
initializeSILateBranchLoweringPass(*PR);
Expand Down
84 changes: 58 additions & 26 deletions llvm/lib/Target/AMDGPU/SIWholeQuadMode.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,7 @@
///
//===----------------------------------------------------------------------===//

#include "SIWholeQuadMode.h"
#include "AMDGPU.h"
#include "GCNSubtarget.h"
#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
Expand Down Expand Up @@ -148,11 +149,19 @@ struct WorkItem {
WorkItem(MachineInstr *MI) : MI(MI) {}
};

class SIWholeQuadMode : public MachineFunctionPass {
class SIWholeQuadMode {
public:
SIWholeQuadMode(MachineFunction &MF, LiveIntervals *LIS,
MachineDominatorTree *MDT, MachinePostDominatorTree *PDT)
: ST(&MF.getSubtarget<GCNSubtarget>()), TII(ST->getInstrInfo()),
TRI(&TII->getRegisterInfo()), MRI(&MF.getRegInfo()), LIS(LIS), MDT(MDT),
PDT(PDT) {}
bool run(MachineFunction &MF);

private:
const GCNSubtarget *ST;
const SIInstrInfo *TII;
const SIRegisterInfo *TRI;
const GCNSubtarget *ST;
MachineRegisterInfo *MRI;
LiveIntervals *LIS;
MachineDominatorTree *MDT;
Expand Down Expand Up @@ -225,12 +234,13 @@ class SIWholeQuadMode : public MachineFunctionPass {
void lowerInitExec(MachineInstr &MI);
MachineBasicBlock::iterator lowerInitExecInstrs(MachineBasicBlock &Entry,
bool &Changed);
};

class SIWholeQuadModeLegacy : public MachineFunctionPass {
public:
static char ID;

SIWholeQuadMode() :
MachineFunctionPass(ID) { }
SIWholeQuadModeLegacy() : MachineFunctionPass(ID) {}

bool runOnMachineFunction(MachineFunction &MF) override;

Expand All @@ -250,23 +260,22 @@ class SIWholeQuadMode : public MachineFunctionPass {
MachineFunctionProperties::Property::IsSSA);
}
};

} // end anonymous namespace

char SIWholeQuadMode::ID = 0;
char SIWholeQuadModeLegacy::ID = 0;

INITIALIZE_PASS_BEGIN(SIWholeQuadMode, DEBUG_TYPE, "SI Whole Quad Mode", false,
false)
INITIALIZE_PASS_BEGIN(SIWholeQuadModeLegacy, DEBUG_TYPE, "SI Whole Quad Mode",
false, false)
INITIALIZE_PASS_DEPENDENCY(LiveIntervalsWrapperPass)
INITIALIZE_PASS_DEPENDENCY(MachineDominatorTreeWrapperPass)
INITIALIZE_PASS_DEPENDENCY(MachinePostDominatorTreeWrapperPass)
INITIALIZE_PASS_END(SIWholeQuadMode, DEBUG_TYPE, "SI Whole Quad Mode", false,
false)
INITIALIZE_PASS_END(SIWholeQuadModeLegacy, DEBUG_TYPE, "SI Whole Quad Mode",
false, false)

char &llvm::SIWholeQuadModeID = SIWholeQuadMode::ID;
char &llvm::SIWholeQuadModeID = SIWholeQuadModeLegacy::ID;

FunctionPass *llvm::createSIWholeQuadModePass() {
return new SIWholeQuadMode;
FunctionPass *llvm::createSIWholeQuadModeLegacyPass() {
return new SIWholeQuadModeLegacy;
}

#ifndef NDEBUG
Expand Down Expand Up @@ -1689,7 +1698,7 @@ SIWholeQuadMode::lowerInitExecInstrs(MachineBasicBlock &Entry, bool &Changed) {
return InsertPt;
}

bool SIWholeQuadMode::runOnMachineFunction(MachineFunction &MF) {
bool SIWholeQuadMode::run(MachineFunction &MF) {
LLVM_DEBUG(dbgs() << "SI Whole Quad Mode on " << MF.getName()
<< " ------------- \n");
LLVM_DEBUG(MF.dump(););
Expand All @@ -1704,18 +1713,6 @@ bool SIWholeQuadMode::runOnMachineFunction(MachineFunction &MF) {
SetInactiveInstrs.clear();
StateTransition.clear();

ST = &MF.getSubtarget<GCNSubtarget>();

TII = ST->getInstrInfo();
TRI = &TII->getRegisterInfo();
MRI = &MF.getRegInfo();
LIS = &getAnalysis<LiveIntervalsWrapperPass>().getLIS();
auto *MDTWrapper = getAnalysisIfAvailable<MachineDominatorTreeWrapperPass>();
MDT = MDTWrapper ? &MDTWrapper->getDomTree() : nullptr;
auto *PDTWrapper =
getAnalysisIfAvailable<MachinePostDominatorTreeWrapperPass>();
PDT = PDTWrapper ? &PDTWrapper->getPostDomTree() : nullptr;

if (ST->isWave32()) {
AndOpc = AMDGPU::S_AND_B32;
AndTermOpc = AMDGPU::S_AND_B32_term;
Expand Down Expand Up @@ -1816,3 +1813,38 @@ bool SIWholeQuadMode::runOnMachineFunction(MachineFunction &MF) {

return Changed;
}

bool SIWholeQuadModeLegacy::runOnMachineFunction(MachineFunction &MF) {
LiveIntervals *LIS = &getAnalysis<LiveIntervalsWrapperPass>().getLIS();
auto *MDTWrapper = getAnalysisIfAvailable<MachineDominatorTreeWrapperPass>();
MachineDominatorTree *MDT = MDTWrapper ? &MDTWrapper->getDomTree() : nullptr;
auto *PDTWrapper =
getAnalysisIfAvailable<MachinePostDominatorTreeWrapperPass>();
MachinePostDominatorTree *PDT =
PDTWrapper ? &PDTWrapper->getPostDomTree() : nullptr;
SIWholeQuadMode Impl(MF, LIS, MDT, PDT);
return Impl.run(MF);
}

PreservedAnalyses
SIWholeQuadModePass::run(MachineFunction &MF,
MachineFunctionAnalysisManager &MFAM) {
MFPropsModifier _(*this, MF);

LiveIntervals *LIS = &MFAM.getResult<LiveIntervalsAnalysis>(MF);
MachineDominatorTree *MDT =
MFAM.getCachedResult<MachineDominatorTreeAnalysis>(MF);
MachinePostDominatorTree *PDT =
MFAM.getCachedResult<MachinePostDominatorTreeAnalysis>(MF);
SIWholeQuadMode Impl(MF, LIS, MDT, PDT);
bool Changed = Impl.run(MF);
if (!Changed)
return PreservedAnalyses::all();

PreservedAnalyses PA = getMachineFunctionPassPreservedAnalyses();
PA.preserve<SlotIndexesAnalysis>();
PA.preserve<LiveIntervalsAnalysis>();
PA.preserve<MachineDominatorTreeAnalysis>();
PA.preserve<MachinePostDominatorTreeAnalysis>();
return PA;
}
27 changes: 27 additions & 0 deletions llvm/lib/Target/AMDGPU/SIWholeQuadMode.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
//===- SIWholeQuadMode.h ----------------------------------------*- C++- *-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//

#ifndef LLVM_LIB_TARGET_AMDGPU_SIWHOLEQUADMODE_H
#define LLVM_LIB_TARGET_AMDGPU_SIWHOLEQUADMODE_H

#include "llvm/CodeGen/MachinePassManager.h"

namespace llvm {
class SIWholeQuadModePass : public PassInfoMixin<SIWholeQuadModePass> {
public:
PreservedAnalyses run(MachineFunction &MF,
MachineFunctionAnalysisManager &MFAM);

MachineFunctionProperties getClearedProperties() const {
return MachineFunctionProperties().set(
MachineFunctionProperties::Property::IsSSA);
}
};
} // namespace llvm

#endif // LLVM_LIB_TARGET_AMDGPU_SIWHOLEQUADMODE_H
1 change: 1 addition & 0 deletions llvm/test/CodeGen/AMDGPU/licm-wwm.mir
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 5
# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1100 -run-pass=early-machinelicm,si-wqm -o - %s | FileCheck -check-prefix=GCN %s
# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1100 -passes=early-machinelicm,si-wqm -o - %s | FileCheck -check-prefix=GCN %s

# Machine LICM may hoist an intruction from a WWM region, which will force SI-WQM pass
# to create a second WWM region. This is an unwanted hoisting.
Expand Down
1 change: 1 addition & 0 deletions llvm/test/CodeGen/AMDGPU/si-init-whole-wave.mir
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 5
# RUN: llc -mtriple=amdgcn -mcpu=gfx1200 -verify-machineinstrs -run-pass si-wqm -o - %s | FileCheck %s
# RUN: llc -mtriple=amdgcn -mcpu=gfx1200 -passes=si-wqm -o - %s | FileCheck %s

---
# Test that we don't do silly things when there is no whole wave mode in the
Expand Down
1 change: 1 addition & 0 deletions llvm/test/CodeGen/AMDGPU/wqm-terminators.mir
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 2
# RUN: llc -mtriple=amdgcn -mcpu=gfx1010 -verify-machineinstrs -run-pass si-wqm -o - %s | FileCheck %s
# RUN: llc -mtriple=amdgcn -mcpu=gfx1010 -passes=si-wqm -o - %s | FileCheck %s

--- |
define amdgpu_ps void @exit_to_exact() {
Expand Down
1 change: 1 addition & 0 deletions llvm/test/CodeGen/AMDGPU/wqm.mir
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
# RUN: llc -mtriple=amdgcn -mcpu=fiji -verify-machineinstrs -run-pass si-wqm -o - %s | FileCheck %s
# RUN: llc -mtriple=amdgcn -mcpu=fiji -passes=si-wqm -o - %s | FileCheck %s

--- |
define amdgpu_ps void @test_strict_wwm_scc() {
Expand Down