From ab976a17121374ae3407374b2aa6306e95863eb3 Mon Sep 17 00:00:00 2001 From: Stephen Long <63318318+steplong@users.noreply.github.com> Date: Fri, 24 Jan 2025 14:02:06 -0500 Subject: [PATCH] PreISelIntrinsicLowering: Lower llvm.exp/llvm.exp2 to a loop if scalable vec arg (#117568) --- llvm/include/llvm/CodeGen/TargetLowering.h | 4 + .../Transforms/Utils/LowerVectorIntrinsics.h | 30 ++++++++ llvm/lib/CodeGen/PreISelIntrinsicLowering.cpp | 14 ++++ llvm/lib/CodeGen/TargetLoweringBase.cpp | 11 +++ llvm/lib/Transforms/Utils/CMakeLists.txt | 1 + .../Utils/LowerVectorIntrinsics.cpp | 73 +++++++++++++++++++ .../AArch64/expand-exp.ll | 43 +++++++++++ .../AArch64/lit.local.cfg | 2 + .../llvm/lib/Transforms/Utils/BUILD.gn | 1 + 9 files changed, 179 insertions(+) create mode 100644 llvm/include/llvm/Transforms/Utils/LowerVectorIntrinsics.h create mode 100644 llvm/lib/Transforms/Utils/LowerVectorIntrinsics.cpp create mode 100644 llvm/test/Transforms/PreISelIntrinsicLowering/AArch64/expand-exp.ll create mode 100644 llvm/test/Transforms/PreISelIntrinsicLowering/AArch64/lit.local.cfg diff --git a/llvm/include/llvm/CodeGen/TargetLowering.h b/llvm/include/llvm/CodeGen/TargetLowering.h index 59743dbe4d2ea..861cffdc115a4 100644 --- a/llvm/include/llvm/CodeGen/TargetLowering.h +++ b/llvm/include/llvm/CodeGen/TargetLowering.h @@ -2123,6 +2123,10 @@ class TargetLoweringBase { /// Get the ISD node that corresponds to the Instruction class opcode. int InstructionOpcodeToISD(unsigned Opcode) const; + /// Get the ISD node that corresponds to the Intrinsic ID. Returns + /// ISD::DELETED_NODE by default for an unsupported Intrinsic ID. + int IntrinsicIDToISD(Intrinsic::ID ID) const; + /// @} //===--------------------------------------------------------------------===// diff --git a/llvm/include/llvm/Transforms/Utils/LowerVectorIntrinsics.h b/llvm/include/llvm/Transforms/Utils/LowerVectorIntrinsics.h new file mode 100644 index 0000000000000..cb48bb01e178a --- /dev/null +++ b/llvm/include/llvm/Transforms/Utils/LowerVectorIntrinsics.h @@ -0,0 +1,30 @@ +//===- llvm/Transforms/Utils/LowerVectorIntrinsics.h ------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// Lower intrinsics with a scalable vector arg to loops. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_TRANSFORMS_UTILS_LOWERVECTORINTRINSICS_H +#define LLVM_TRANSFORMS_UTILS_LOWERVECTORINTRINSICS_H + +#include +#include + +namespace llvm { + +class CallInst; +class Module; + +/// Lower \p CI as a loop. \p CI is a unary intrinsic with a vector argument and +/// is deleted and replaced with a loop. +bool lowerUnaryVectorIntrinsicAsLoop(Module &M, CallInst *CI); + +} // namespace llvm + +#endif diff --git a/llvm/lib/CodeGen/PreISelIntrinsicLowering.cpp b/llvm/lib/CodeGen/PreISelIntrinsicLowering.cpp index 4a3d1673c2a7c..048a6a49e4cb9 100644 --- a/llvm/lib/CodeGen/PreISelIntrinsicLowering.cpp +++ b/llvm/lib/CodeGen/PreISelIntrinsicLowering.cpp @@ -33,6 +33,7 @@ #include "llvm/Target/TargetMachine.h" #include "llvm/Transforms/Scalar/LowerConstantIntrinsics.h" #include "llvm/Transforms/Utils/LowerMemIntrinsics.h" +#include "llvm/Transforms/Utils/LowerVectorIntrinsics.h" using namespace llvm; @@ -453,6 +454,19 @@ bool PreISelIntrinsicLowering::lowerIntrinsics(Module &M) const { case Intrinsic::objc_sync_exit: Changed |= lowerObjCCall(F, "objc_sync_exit"); break; + case Intrinsic::exp: + case Intrinsic::exp2: + Changed |= forEachCall(F, [&](CallInst *CI) { + Type *Ty = CI->getArgOperand(0)->getType(); + if (!isa(Ty)) + return false; + const TargetLowering *TL = TM->getSubtargetImpl(F)->getTargetLowering(); + unsigned Op = TL->IntrinsicIDToISD(F.getIntrinsicID()); + if (!TL->isOperationExpand(Op, EVT::getEVT(Ty))) + return false; + return lowerUnaryVectorIntrinsicAsLoop(M, CI); + }); + break; } } return Changed; diff --git a/llvm/lib/CodeGen/TargetLoweringBase.cpp b/llvm/lib/CodeGen/TargetLoweringBase.cpp index 73af0a9a71407..9c56912aa6ba0 100644 --- a/llvm/lib/CodeGen/TargetLoweringBase.cpp +++ b/llvm/lib/CodeGen/TargetLoweringBase.cpp @@ -1841,6 +1841,17 @@ int TargetLoweringBase::InstructionOpcodeToISD(unsigned Opcode) const { llvm_unreachable("Unknown instruction type encountered!"); } +int TargetLoweringBase::IntrinsicIDToISD(Intrinsic::ID ID) const { + switch (ID) { + case Intrinsic::exp: + return ISD::FEXP; + case Intrinsic::exp2: + return ISD::FEXP2; + default: + return ISD::DELETED_NODE; + } +} + Value * TargetLoweringBase::getDefaultSafeStackPointerLocation(IRBuilderBase &IRB, bool UseTLS) const { diff --git a/llvm/lib/Transforms/Utils/CMakeLists.txt b/llvm/lib/Transforms/Utils/CMakeLists.txt index 65bd3080662c4..78cad0d253be8 100644 --- a/llvm/lib/Transforms/Utils/CMakeLists.txt +++ b/llvm/lib/Transforms/Utils/CMakeLists.txt @@ -56,6 +56,7 @@ add_llvm_component_library(LLVMTransformUtils LowerInvoke.cpp LowerMemIntrinsics.cpp LowerSwitch.cpp + LowerVectorIntrinsics.cpp MatrixUtils.cpp MemoryOpRemark.cpp MemoryTaggingSupport.cpp diff --git a/llvm/lib/Transforms/Utils/LowerVectorIntrinsics.cpp b/llvm/lib/Transforms/Utils/LowerVectorIntrinsics.cpp new file mode 100644 index 0000000000000..cd716deec14f5 --- /dev/null +++ b/llvm/lib/Transforms/Utils/LowerVectorIntrinsics.cpp @@ -0,0 +1,73 @@ +//===- LowerVectorIntrinsics.cpp ------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "llvm/Transforms/Utils/LowerVectorIntrinsics.h" +#include "llvm/IR/IRBuilder.h" +#include "llvm/IR/IntrinsicInst.h" +#include "llvm/Support/Debug.h" + +#define DEBUG_TYPE "lower-vector-intrinsics" + +using namespace llvm; + +bool llvm::lowerUnaryVectorIntrinsicAsLoop(Module &M, CallInst *CI) { + Type *ArgTy = CI->getArgOperand(0)->getType(); + VectorType *VecTy = cast(ArgTy); + + BasicBlock *PreLoopBB = CI->getParent(); + BasicBlock *PostLoopBB = nullptr; + Function *ParentFunc = PreLoopBB->getParent(); + LLVMContext &Ctx = PreLoopBB->getContext(); + + PostLoopBB = PreLoopBB->splitBasicBlock(CI); + BasicBlock *LoopBB = BasicBlock::Create(Ctx, "", ParentFunc, PostLoopBB); + PreLoopBB->getTerminator()->setSuccessor(0, LoopBB); + + // Loop preheader + IRBuilder<> PreLoopBuilder(PreLoopBB->getTerminator()); + Value *LoopEnd = nullptr; + if (auto *ScalableVecTy = dyn_cast(VecTy)) { + Value *VScale = PreLoopBuilder.CreateVScale( + ConstantInt::get(PreLoopBuilder.getInt64Ty(), 1)); + Value *N = ConstantInt::get(PreLoopBuilder.getInt64Ty(), + ScalableVecTy->getMinNumElements()); + LoopEnd = PreLoopBuilder.CreateMul(VScale, N); + } else { + FixedVectorType *FixedVecTy = cast(VecTy); + LoopEnd = ConstantInt::get(PreLoopBuilder.getInt64Ty(), + FixedVecTy->getNumElements()); + } + + // Loop body + IRBuilder<> LoopBuilder(LoopBB); + Type *Int64Ty = LoopBuilder.getInt64Ty(); + + PHINode *LoopIndex = LoopBuilder.CreatePHI(Int64Ty, 2); + LoopIndex->addIncoming(ConstantInt::get(Int64Ty, 0U), PreLoopBB); + PHINode *Vec = LoopBuilder.CreatePHI(VecTy, 2); + Vec->addIncoming(CI->getArgOperand(0), PreLoopBB); + + Value *Elem = LoopBuilder.CreateExtractElement(Vec, LoopIndex); + Function *Exp = Intrinsic::getOrInsertDeclaration(&M, CI->getIntrinsicID(), + VecTy->getElementType()); + Value *Res = LoopBuilder.CreateCall(Exp, Elem); + Value *NewVec = LoopBuilder.CreateInsertElement(Vec, Res, LoopIndex); + Vec->addIncoming(NewVec, LoopBB); + + Value *One = ConstantInt::get(Int64Ty, 1U); + Value *NextLoopIndex = LoopBuilder.CreateAdd(LoopIndex, One); + LoopIndex->addIncoming(NextLoopIndex, LoopBB); + + Value *ExitCond = + LoopBuilder.CreateICmp(CmpInst::ICMP_EQ, NextLoopIndex, LoopEnd); + LoopBuilder.CreateCondBr(ExitCond, PostLoopBB, LoopBB); + + CI->replaceAllUsesWith(NewVec); + CI->eraseFromParent(); + return true; +} diff --git a/llvm/test/Transforms/PreISelIntrinsicLowering/AArch64/expand-exp.ll b/llvm/test/Transforms/PreISelIntrinsicLowering/AArch64/expand-exp.ll new file mode 100644 index 0000000000000..284f2ad8072fc --- /dev/null +++ b/llvm/test/Transforms/PreISelIntrinsicLowering/AArch64/expand-exp.ll @@ -0,0 +1,43 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5 +; RUN: opt -passes=pre-isel-intrinsic-lowering -S < %s | FileCheck %s +target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128-Fn32" +target triple = "aarch64" + +define @scalable_vec_exp( %input) { +; CHECK-LABEL: define @scalable_vec_exp( +; CHECK-SAME: [[INPUT:%.*]]) { +; CHECK-NEXT: [[TMP1:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP2:%.*]] = mul i64 [[TMP1]], 4 +; CHECK-NEXT: br label %[[BB3:.*]] +; CHECK: [[BB3]]: +; CHECK-NEXT: [[TMP4:%.*]] = phi i64 [ 0, [[TMP0:%.*]] ], [ [[TMP9:%.*]], %[[BB3]] ] +; CHECK-NEXT: [[TMP5:%.*]] = phi [ [[INPUT]], [[TMP0]] ], [ [[TMP8:%.*]], %[[BB3]] ] +; CHECK-NEXT: [[TMP6:%.*]] = extractelement [[TMP5]], i64 [[TMP4]] +; CHECK-NEXT: [[TMP7:%.*]] = call float @llvm.exp.f32(float [[TMP6]]) +; CHECK-NEXT: [[TMP8]] = insertelement [[TMP5]], float [[TMP7]], i64 [[TMP4]] +; CHECK-NEXT: [[TMP9]] = add i64 [[TMP4]], 1 +; CHECK-NEXT: [[TMP10:%.*]] = icmp eq i64 [[TMP9]], [[TMP2]] +; CHECK-NEXT: br i1 [[TMP10]], label %[[BB11:.*]], label %[[BB3]] +; CHECK: [[BB11]]: +; CHECK-NEXT: ret [[TMP8]] +; + %output = call @llvm.exp.nxv4f32( %input) + ret %output +} + +define <4 x float> @fixed_vec_exp(<4 x float> %input) { +; CHECK-LABEL: define <4 x float> @fixed_vec_exp( +; CHECK-SAME: <4 x float> [[INPUT:%.*]]) { +; CHECK-NEXT: [[OUTPUT:%.*]] = call <4 x float> @llvm.exp.v4f32(<4 x float> [[INPUT]]) +; CHECK-NEXT: ret <4 x float> [[OUTPUT]] +; + %output = call <4 x float> @llvm.exp.v4f32(<4 x float> %input) + ret <4 x float> %output +} + +declare <4 x float> @llvm.exp.v4f32(<4 x float>) #0 +declare @llvm.exp.nxv4f32() #0 + +; CHECK: attributes #0 = { nocallback nofree nosync nounwind speculatable willreturn memory(none) } +; CHECK-NEXT: attributes #1 = { nocallback nofree nosync nounwind willreturn memory(none) } +attributes #0 = { nocallback nofree nosync nounwind speculatable willreturn memory(none) } diff --git a/llvm/test/Transforms/PreISelIntrinsicLowering/AArch64/lit.local.cfg b/llvm/test/Transforms/PreISelIntrinsicLowering/AArch64/lit.local.cfg new file mode 100644 index 0000000000000..10d4a0e953ed4 --- /dev/null +++ b/llvm/test/Transforms/PreISelIntrinsicLowering/AArch64/lit.local.cfg @@ -0,0 +1,2 @@ +if not "AArch64" in config.root.targets: + config.unsupported = True diff --git a/llvm/utils/gn/secondary/llvm/lib/Transforms/Utils/BUILD.gn b/llvm/utils/gn/secondary/llvm/lib/Transforms/Utils/BUILD.gn index 1479e1c355d95..b16fe19bddfd1 100644 --- a/llvm/utils/gn/secondary/llvm/lib/Transforms/Utils/BUILD.gn +++ b/llvm/utils/gn/secondary/llvm/lib/Transforms/Utils/BUILD.gn @@ -64,6 +64,7 @@ static_library("Utils") { "LowerInvoke.cpp", "LowerMemIntrinsics.cpp", "LowerSwitch.cpp", + "LowerVectorIntrinsics.cpp", "MatrixUtils.cpp", "Mem2Reg.cpp", "MemoryOpRemark.cpp",