Skip to content

Commit

Permalink
PreISelIntrinsicLowering: Lower llvm.exp/llvm.exp2 to a loop if scala…
Browse files Browse the repository at this point in the history
…ble vec arg (#117568)
  • Loading branch information
steplong authored Jan 24, 2025
1 parent 3861b9d commit ab976a1
Show file tree
Hide file tree
Showing 9 changed files with 179 additions and 0 deletions.
4 changes: 4 additions & 0 deletions llvm/include/llvm/CodeGen/TargetLowering.h
Original file line number Diff line number Diff line change
Expand Up @@ -2123,6 +2123,10 @@ class TargetLoweringBase {
/// Get the ISD node that corresponds to the Instruction class opcode.
int InstructionOpcodeToISD(unsigned Opcode) const;

/// Get the ISD node that corresponds to the Intrinsic ID. Returns
/// ISD::DELETED_NODE by default for an unsupported Intrinsic ID.
int IntrinsicIDToISD(Intrinsic::ID ID) const;

/// @}

//===--------------------------------------------------------------------===//
Expand Down
30 changes: 30 additions & 0 deletions llvm/include/llvm/Transforms/Utils/LowerVectorIntrinsics.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
//===- llvm/Transforms/Utils/LowerVectorIntrinsics.h ------------*- C++ -*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
// Lower intrinsics with a scalable vector arg to loops.
//
//===----------------------------------------------------------------------===//

#ifndef LLVM_TRANSFORMS_UTILS_LOWERVECTORINTRINSICS_H
#define LLVM_TRANSFORMS_UTILS_LOWERVECTORINTRINSICS_H

#include <cstdint>
#include <optional>

namespace llvm {

class CallInst;
class Module;

/// Lower \p CI as a loop. \p CI is a unary intrinsic with a vector argument and
/// is deleted and replaced with a loop.
bool lowerUnaryVectorIntrinsicAsLoop(Module &M, CallInst *CI);

} // namespace llvm

#endif
14 changes: 14 additions & 0 deletions llvm/lib/CodeGen/PreISelIntrinsicLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@
#include "llvm/Target/TargetMachine.h"
#include "llvm/Transforms/Scalar/LowerConstantIntrinsics.h"
#include "llvm/Transforms/Utils/LowerMemIntrinsics.h"
#include "llvm/Transforms/Utils/LowerVectorIntrinsics.h"

using namespace llvm;

Expand Down Expand Up @@ -453,6 +454,19 @@ bool PreISelIntrinsicLowering::lowerIntrinsics(Module &M) const {
case Intrinsic::objc_sync_exit:
Changed |= lowerObjCCall(F, "objc_sync_exit");
break;
case Intrinsic::exp:
case Intrinsic::exp2:
Changed |= forEachCall(F, [&](CallInst *CI) {
Type *Ty = CI->getArgOperand(0)->getType();
if (!isa<ScalableVectorType>(Ty))
return false;
const TargetLowering *TL = TM->getSubtargetImpl(F)->getTargetLowering();
unsigned Op = TL->IntrinsicIDToISD(F.getIntrinsicID());
if (!TL->isOperationExpand(Op, EVT::getEVT(Ty)))
return false;
return lowerUnaryVectorIntrinsicAsLoop(M, CI);
});
break;
}
}
return Changed;
Expand Down
11 changes: 11 additions & 0 deletions llvm/lib/CodeGen/TargetLoweringBase.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1841,6 +1841,17 @@ int TargetLoweringBase::InstructionOpcodeToISD(unsigned Opcode) const {
llvm_unreachable("Unknown instruction type encountered!");
}

int TargetLoweringBase::IntrinsicIDToISD(Intrinsic::ID ID) const {
switch (ID) {
case Intrinsic::exp:
return ISD::FEXP;
case Intrinsic::exp2:
return ISD::FEXP2;
default:
return ISD::DELETED_NODE;
}
}

Value *
TargetLoweringBase::getDefaultSafeStackPointerLocation(IRBuilderBase &IRB,
bool UseTLS) const {
Expand Down
1 change: 1 addition & 0 deletions llvm/lib/Transforms/Utils/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,7 @@ add_llvm_component_library(LLVMTransformUtils
LowerInvoke.cpp
LowerMemIntrinsics.cpp
LowerSwitch.cpp
LowerVectorIntrinsics.cpp
MatrixUtils.cpp
MemoryOpRemark.cpp
MemoryTaggingSupport.cpp
Expand Down
73 changes: 73 additions & 0 deletions llvm/lib/Transforms/Utils/LowerVectorIntrinsics.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,73 @@
//===- LowerVectorIntrinsics.cpp ------------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//

#include "llvm/Transforms/Utils/LowerVectorIntrinsics.h"
#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/Support/Debug.h"

#define DEBUG_TYPE "lower-vector-intrinsics"

using namespace llvm;

bool llvm::lowerUnaryVectorIntrinsicAsLoop(Module &M, CallInst *CI) {
Type *ArgTy = CI->getArgOperand(0)->getType();
VectorType *VecTy = cast<VectorType>(ArgTy);

BasicBlock *PreLoopBB = CI->getParent();
BasicBlock *PostLoopBB = nullptr;
Function *ParentFunc = PreLoopBB->getParent();
LLVMContext &Ctx = PreLoopBB->getContext();

PostLoopBB = PreLoopBB->splitBasicBlock(CI);
BasicBlock *LoopBB = BasicBlock::Create(Ctx, "", ParentFunc, PostLoopBB);
PreLoopBB->getTerminator()->setSuccessor(0, LoopBB);

// Loop preheader
IRBuilder<> PreLoopBuilder(PreLoopBB->getTerminator());
Value *LoopEnd = nullptr;
if (auto *ScalableVecTy = dyn_cast<ScalableVectorType>(VecTy)) {
Value *VScale = PreLoopBuilder.CreateVScale(
ConstantInt::get(PreLoopBuilder.getInt64Ty(), 1));
Value *N = ConstantInt::get(PreLoopBuilder.getInt64Ty(),
ScalableVecTy->getMinNumElements());
LoopEnd = PreLoopBuilder.CreateMul(VScale, N);
} else {
FixedVectorType *FixedVecTy = cast<FixedVectorType>(VecTy);
LoopEnd = ConstantInt::get(PreLoopBuilder.getInt64Ty(),
FixedVecTy->getNumElements());
}

// Loop body
IRBuilder<> LoopBuilder(LoopBB);
Type *Int64Ty = LoopBuilder.getInt64Ty();

PHINode *LoopIndex = LoopBuilder.CreatePHI(Int64Ty, 2);
LoopIndex->addIncoming(ConstantInt::get(Int64Ty, 0U), PreLoopBB);
PHINode *Vec = LoopBuilder.CreatePHI(VecTy, 2);
Vec->addIncoming(CI->getArgOperand(0), PreLoopBB);

Value *Elem = LoopBuilder.CreateExtractElement(Vec, LoopIndex);
Function *Exp = Intrinsic::getOrInsertDeclaration(&M, CI->getIntrinsicID(),
VecTy->getElementType());
Value *Res = LoopBuilder.CreateCall(Exp, Elem);
Value *NewVec = LoopBuilder.CreateInsertElement(Vec, Res, LoopIndex);
Vec->addIncoming(NewVec, LoopBB);

Value *One = ConstantInt::get(Int64Ty, 1U);
Value *NextLoopIndex = LoopBuilder.CreateAdd(LoopIndex, One);
LoopIndex->addIncoming(NextLoopIndex, LoopBB);

Value *ExitCond =
LoopBuilder.CreateICmp(CmpInst::ICMP_EQ, NextLoopIndex, LoopEnd);
LoopBuilder.CreateCondBr(ExitCond, PostLoopBB, LoopBB);

CI->replaceAllUsesWith(NewVec);
CI->eraseFromParent();
return true;
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
; RUN: opt -passes=pre-isel-intrinsic-lowering -S < %s | FileCheck %s
target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128-Fn32"
target triple = "aarch64"

define <vscale x 4 x float> @scalable_vec_exp(<vscale x 4 x float> %input) {
; CHECK-LABEL: define <vscale x 4 x float> @scalable_vec_exp(
; CHECK-SAME: <vscale x 4 x float> [[INPUT:%.*]]) {
; CHECK-NEXT: [[TMP1:%.*]] = call i64 @llvm.vscale.i64()
; CHECK-NEXT: [[TMP2:%.*]] = mul i64 [[TMP1]], 4
; CHECK-NEXT: br label %[[BB3:.*]]
; CHECK: [[BB3]]:
; CHECK-NEXT: [[TMP4:%.*]] = phi i64 [ 0, [[TMP0:%.*]] ], [ [[TMP9:%.*]], %[[BB3]] ]
; CHECK-NEXT: [[TMP5:%.*]] = phi <vscale x 4 x float> [ [[INPUT]], [[TMP0]] ], [ [[TMP8:%.*]], %[[BB3]] ]
; CHECK-NEXT: [[TMP6:%.*]] = extractelement <vscale x 4 x float> [[TMP5]], i64 [[TMP4]]
; CHECK-NEXT: [[TMP7:%.*]] = call float @llvm.exp.f32(float [[TMP6]])
; CHECK-NEXT: [[TMP8]] = insertelement <vscale x 4 x float> [[TMP5]], float [[TMP7]], i64 [[TMP4]]
; CHECK-NEXT: [[TMP9]] = add i64 [[TMP4]], 1
; CHECK-NEXT: [[TMP10:%.*]] = icmp eq i64 [[TMP9]], [[TMP2]]
; CHECK-NEXT: br i1 [[TMP10]], label %[[BB11:.*]], label %[[BB3]]
; CHECK: [[BB11]]:
; CHECK-NEXT: ret <vscale x 4 x float> [[TMP8]]
;
%output = call <vscale x 4 x float> @llvm.exp.nxv4f32(<vscale x 4 x float> %input)
ret <vscale x 4 x float> %output
}

define <4 x float> @fixed_vec_exp(<4 x float> %input) {
; CHECK-LABEL: define <4 x float> @fixed_vec_exp(
; CHECK-SAME: <4 x float> [[INPUT:%.*]]) {
; CHECK-NEXT: [[OUTPUT:%.*]] = call <4 x float> @llvm.exp.v4f32(<4 x float> [[INPUT]])
; CHECK-NEXT: ret <4 x float> [[OUTPUT]]
;
%output = call <4 x float> @llvm.exp.v4f32(<4 x float> %input)
ret <4 x float> %output
}

declare <4 x float> @llvm.exp.v4f32(<4 x float>) #0
declare <vscale x 4 x float> @llvm.exp.nxv4f32(<vscale x 4 x float>) #0

; CHECK: attributes #0 = { nocallback nofree nosync nounwind speculatable willreturn memory(none) }
; CHECK-NEXT: attributes #1 = { nocallback nofree nosync nounwind willreturn memory(none) }
attributes #0 = { nocallback nofree nosync nounwind speculatable willreturn memory(none) }
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
if not "AArch64" in config.root.targets:
config.unsupported = True
1 change: 1 addition & 0 deletions llvm/utils/gn/secondary/llvm/lib/Transforms/Utils/BUILD.gn
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,7 @@ static_library("Utils") {
"LowerInvoke.cpp",
"LowerMemIntrinsics.cpp",
"LowerSwitch.cpp",
"LowerVectorIntrinsics.cpp",
"MatrixUtils.cpp",
"Mem2Reg.cpp",
"MemoryOpRemark.cpp",
Expand Down

0 comments on commit ab976a1

Please sign in to comment.