Skip to content

Commit

Permalink
[LV] Fix FindLastIV reduction for epilogue vectorization.
Browse files Browse the repository at this point in the history
  • Loading branch information
Mel-Chen committed Dec 18, 2024
1 parent 9f231a8 commit be9d783
Show file tree
Hide file tree
Showing 5 changed files with 29 additions and 41 deletions.
10 changes: 6 additions & 4 deletions llvm/include/llvm/Transforms/Utils/LoopUtils.h
Original file line number Diff line number Diff line change
Expand Up @@ -420,15 +420,17 @@ Value *createAnyOfReduction(IRBuilderBase &B, Value *Src,
PHINode *OrigPhi);

/// Create a reduction of the given vector \p Src for a reduction of the
/// kind RecurKind::IFindLastIV or RecurKind::FFindLastIV. The reduction
/// operation is described by \p Desc.
Value *createFindLastIVReduction(IRBuilderBase &B, Value *Src,
/// kind RecurKind::IFindLastIV or RecurKind::FFindLastIV. The scalar \p
/// StartVal is the incoming value of reduction phi from outside the loop. The
/// reduction operation is described by \p Desc.
Value *createFindLastIVReduction(IRBuilderBase &B, Value *Src, Value *StartVal,
const RecurrenceDescriptor &Desc);

/// Create a generic reduction using a recurrence descriptor \p Desc
/// Fast-math-flags are propagated using the RecurrenceDescriptor.
Value *createReduction(IRBuilderBase &B, const RecurrenceDescriptor &Desc,
Value *Src, PHINode *OrigPhi = nullptr);
Value *Src, Value *StartVal = nullptr,
PHINode *OrigPhi = nullptr);

/// Create an ordered reduction intrinsic using the given recurrence
/// descriptor \p Desc.
Expand Down
10 changes: 5 additions & 5 deletions llvm/lib/Transforms/Utils/LoopUtils.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1209,11 +1209,12 @@ Value *llvm::createAnyOfReduction(IRBuilderBase &Builder, Value *Src,
}

Value *llvm::createFindLastIVReduction(IRBuilderBase &Builder, Value *Src,
Value *StartVal,
const RecurrenceDescriptor &Desc) {
assert(RecurrenceDescriptor::isFindLastIVRecurrenceKind(
Desc.getRecurrenceKind()) &&
"Unexpected reduction kind");
Value *StartVal = Desc.getRecurrenceStartValue();
assert(StartVal && "Null start value");
Value *Sentinel = Desc.getSentinelValue();
Value *MaxRdx = Src->getType()->isVectorTy()
? Builder.CreateIntMaxReduce(Src, true)
Expand Down Expand Up @@ -1320,9 +1321,8 @@ Value *llvm::createSimpleReduction(VectorBuilder &VBuilder, Value *Src,
return VBuilder.createSimpleReduction(Id, SrcTy, Ops);
}

Value *llvm::createReduction(IRBuilderBase &B,
const RecurrenceDescriptor &Desc, Value *Src,
PHINode *OrigPhi) {
Value *llvm::createReduction(IRBuilderBase &B, const RecurrenceDescriptor &Desc,
Value *Src, Value *StartVal, PHINode *OrigPhi) {
// TODO: Support in-order reductions based on the recurrence descriptor.
// All ops in the reduction inherit fast-math-flags from the recurrence
// descriptor.
Expand All @@ -1333,7 +1333,7 @@ Value *llvm::createReduction(IRBuilderBase &B,
if (RecurrenceDescriptor::isAnyOfRecurrenceKind(RK))
return createAnyOfReduction(B, Src, Desc, OrigPhi);
if (RecurrenceDescriptor::isFindLastIVRecurrenceKind(RK))
return createFindLastIVReduction(B, Src, Desc);
return createFindLastIVReduction(B, Src, StartVal, Desc);

return createSimpleReduction(B, Src, RK);
}
Expand Down
9 changes: 0 additions & 9 deletions llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -9706,15 +9706,6 @@ void LoopVectorizationPlanner::adjustRecipesForReductions(
// Convert the reduction phi to operate on bools.
PhiR->setOperand(0, Plan->getOrAddLiveIn(ConstantInt::getFalse(
OrigLoop->getHeader()->getContext())));
continue;
}

if (RecurrenceDescriptor::isFindLastIVRecurrenceKind(
RdxDesc.getRecurrenceKind())) {
// Adjust the start value for FindLastIV recurrences to use the sentinel
// value after generating the ResumePhi recipe, which uses the original
// start value.
PhiR->setOperand(0, Plan->getOrAddLiveIn(RdxDesc.getSentinelValue()));
}
}

Expand Down
13 changes: 6 additions & 7 deletions llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -581,8 +581,9 @@ Value *VPInstruction::generate(VPTransformState &State) {
RecurrenceDescriptor::isAnyOfRecurrenceKind(RK) ||
RecurrenceDescriptor::isFindLastIVRecurrenceKind(RK)) &&
!PhiR->isInLoop()) {
Value *StartVal = PhiR->getStartValue()->getLiveInIRValue();
ReducedPartRdx =
createReduction(Builder, RdxDesc, ReducedPartRdx, OrigPhi);
createReduction(Builder, RdxDesc, ReducedPartRdx, StartVal, OrigPhi);
// If the reduction can be performed in a smaller type, we need to extend
// the reduction to the wider type before we branch to the original loop.
if (PhiTy != RdxDesc.getRecurrenceType())
Expand Down Expand Up @@ -3405,15 +3406,13 @@ void VPReductionPHIRecipe::execute(VPTransformState &State) {
}
} else if (RecurrenceDescriptor::isFindLastIVRecurrenceKind(RK)) {
// [I|F]FindLastIV will use a sentinel value to initialize the reduction
// phi or the resume value from the main vector loop when vectorizing the
// epilogue loop. In the exit block, ComputeReductionResult will generate
// checks to verify if the reduction result is the sentinel value. If the
// result is the sentinel value, it will be corrected back to the start
// value.
// phi. In the exit block, ComputeReductionResult will generate checks to
// verify if the reduction result is the sentinel value. If the result is
// the sentinel value, it will be corrected back to the start value.
// TODO: The sentinel value is not always necessary. When the start value is
// a constant, and smaller than the start value of the induction variable,
// the start value can be directly used to initialize the reduction phi.
Iden = StartV;
StartV = Iden = RdxDesc.getSentinelValue();
if (!ScalarPHI) {
IRBuilderBase::InsertPointGuard IPBuilder(Builder);
Builder.SetInsertPoint(VectorPH->getTerminator());
Expand Down
28 changes: 12 additions & 16 deletions llvm/test/Transforms/LoopVectorize/epilog-iv-select-cmp.ll
Original file line number Diff line number Diff line change
Expand Up @@ -46,13 +46,11 @@ define i64 @select_icmp_const(ptr %a, i64 %n) {
; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <4 x i64> poison, i64 [[BC_RESUME_VAL]], i64 0
; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x i64> [[DOTSPLATINSERT]], <4 x i64> poison, <4 x i32> zeroinitializer
; CHECK-NEXT: [[INDUCTION:%.*]] = add <4 x i64> [[DOTSPLAT]], <i64 0, i64 1, i64 2, i64 3>
; CHECK-NEXT: [[DOTSPLATINSERT8:%.*]] = insertelement <4 x i64> poison, i64 [[BC_MERGE_RDX]], i64 0
; CHECK-NEXT: [[DOTSPLAT9:%.*]] = shufflevector <4 x i64> [[DOTSPLATINSERT8]], <4 x i64> poison, <4 x i32> zeroinitializer
; CHECK-NEXT: br label %[[VEC_EPILOG_VECTOR_BODY:.*]]
; CHECK: [[VEC_EPILOG_VECTOR_BODY]]:
; CHECK-NEXT: [[INDEX4:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT9:%.*]], %[[VEC_EPILOG_VECTOR_BODY]] ]
; CHECK-NEXT: [[VEC_IND5:%.*]] = phi <4 x i64> [ [[INDUCTION]], %[[VEC_EPILOG_PH]] ], [ [[VEC_IND_NEXT6:%.*]], %[[VEC_EPILOG_VECTOR_BODY]] ]
; CHECK-NEXT: [[VEC_PHI7:%.*]] = phi <4 x i64> [ [[DOTSPLAT9]], %[[VEC_EPILOG_PH]] ], [ [[TMP11:%.*]], %[[VEC_EPILOG_VECTOR_BODY]] ]
; CHECK-NEXT: [[VEC_PHI7:%.*]] = phi <4 x i64> [ splat (i64 -9223372036854775808), %[[VEC_EPILOG_PH]] ], [ [[TMP11:%.*]], %[[VEC_EPILOG_VECTOR_BODY]] ]
; CHECK-NEXT: [[TMP7:%.*]] = add i64 [[INDEX4]], 0
; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP7]]
; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i64, ptr [[TMP8]], i32 0
Expand All @@ -66,16 +64,16 @@ define i64 @select_icmp_const(ptr %a, i64 %n) {
; CHECK: [[VEC_EPILOG_MIDDLE_BLOCK]]:
; CHECK-NEXT: [[TMP13:%.*]] = call i64 @llvm.vector.reduce.smax.v4i64(<4 x i64> [[TMP11]])
; CHECK-NEXT: [[RDX_SELECT_CMP10:%.*]] = icmp ne i64 [[TMP13]], -9223372036854775808
; CHECK-NEXT: [[RDX_SELECT11:%.*]] = select i1 [[RDX_SELECT_CMP10]], i64 [[TMP13]], i64 3
; CHECK-NEXT: [[RDX_SELECT11:%.*]] = select i1 [[RDX_SELECT_CMP10]], i64 [[TMP13]], i64 [[BC_MERGE_RDX]]
; CHECK-NEXT: [[CMP_N12:%.*]] = icmp eq i64 [[N]], [[N_VEC3]]
; CHECK-NEXT: br i1 [[CMP_N12]], label %[[EXIT]], label %[[VEC_EPILOG_SCALAR_PH]]
; CHECK: [[VEC_EPILOG_SCALAR_PH]]:
; CHECK-NEXT: [[BC_RESUME_VAL15:%.*]] = phi i64 [ [[N_VEC3]], %[[VEC_EPILOG_MIDDLE_BLOCK]] ], [ 0, %[[ITER_CHECK]] ], [ [[N_VEC]], %[[VEC_EPILOG_ITER_CHECK]] ]
; CHECK-NEXT: [[BC_MERGE_RDX16:%.*]] = phi i64 [ [[RDX_SELECT11]], %[[VEC_EPILOG_MIDDLE_BLOCK]] ], [ 3, %[[ITER_CHECK]] ], [ [[RDX_SELECT]], %[[VEC_EPILOG_ITER_CHECK]] ]
; CHECK-NEXT: [[BC_RESUME_VAL13:%.*]] = phi i64 [ [[N_VEC3]], %[[VEC_EPILOG_MIDDLE_BLOCK]] ], [ 0, %[[ITER_CHECK]] ], [ [[N_VEC]], %[[VEC_EPILOG_ITER_CHECK]] ]
; CHECK-NEXT: [[BC_MERGE_RDX14:%.*]] = phi i64 [ [[RDX_SELECT11]], %[[VEC_EPILOG_MIDDLE_BLOCK]] ], [ 3, %[[ITER_CHECK]] ], [ [[RDX_SELECT]], %[[VEC_EPILOG_ITER_CHECK]] ]
; CHECK-NEXT: br label %[[LOOP:.*]]
; CHECK: [[LOOP]]:
; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL15]], %[[VEC_EPILOG_SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ]
; CHECK-NEXT: [[RDX:%.*]] = phi i64 [ [[BC_MERGE_RDX16]], %[[VEC_EPILOG_SCALAR_PH]] ], [ [[SEL:%.*]], %[[LOOP]] ]
; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL13]], %[[VEC_EPILOG_SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ]
; CHECK-NEXT: [[RDX:%.*]] = phi i64 [ [[BC_MERGE_RDX14]], %[[VEC_EPILOG_SCALAR_PH]] ], [ [[SEL:%.*]], %[[LOOP]] ]
; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[IV]]
; CHECK-NEXT: [[L:%.*]] = load i64, ptr [[GEP]], align 8
; CHECK-NEXT: [[C:%.*]] = icmp eq i64 [[L]], 3
Expand Down Expand Up @@ -150,13 +148,11 @@ define i64 @select_fcmp_const_fast(ptr %a, i64 %n) {
; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <4 x i64> poison, i64 [[BC_RESUME_VAL]], i64 0
; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x i64> [[DOTSPLATINSERT]], <4 x i64> poison, <4 x i32> zeroinitializer
; CHECK-NEXT: [[INDUCTION:%.*]] = add <4 x i64> [[DOTSPLAT]], <i64 0, i64 1, i64 2, i64 3>
; CHECK-NEXT: [[DOTSPLATINSERT8:%.*]] = insertelement <4 x i64> poison, i64 [[BC_MERGE_RDX]], i64 0
; CHECK-NEXT: [[DOTSPLAT9:%.*]] = shufflevector <4 x i64> [[DOTSPLATINSERT8]], <4 x i64> poison, <4 x i32> zeroinitializer
; CHECK-NEXT: br label %[[VEC_EPILOG_VECTOR_BODY:.*]]
; CHECK: [[VEC_EPILOG_VECTOR_BODY]]:
; CHECK-NEXT: [[INDEX4:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT9:%.*]], %[[VEC_EPILOG_VECTOR_BODY]] ]
; CHECK-NEXT: [[VEC_IND5:%.*]] = phi <4 x i64> [ [[INDUCTION]], %[[VEC_EPILOG_PH]] ], [ [[VEC_IND_NEXT6:%.*]], %[[VEC_EPILOG_VECTOR_BODY]] ]
; CHECK-NEXT: [[VEC_PHI7:%.*]] = phi <4 x i64> [ [[DOTSPLAT9]], %[[VEC_EPILOG_PH]] ], [ [[TMP11:%.*]], %[[VEC_EPILOG_VECTOR_BODY]] ]
; CHECK-NEXT: [[VEC_PHI7:%.*]] = phi <4 x i64> [ splat (i64 -9223372036854775808), %[[VEC_EPILOG_PH]] ], [ [[TMP11:%.*]], %[[VEC_EPILOG_VECTOR_BODY]] ]
; CHECK-NEXT: [[TMP7:%.*]] = add i64 [[INDEX4]], 0
; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP7]]
; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds float, ptr [[TMP8]], i32 0
Expand All @@ -170,16 +166,16 @@ define i64 @select_fcmp_const_fast(ptr %a, i64 %n) {
; CHECK: [[VEC_EPILOG_MIDDLE_BLOCK]]:
; CHECK-NEXT: [[TMP13:%.*]] = call i64 @llvm.vector.reduce.smax.v4i64(<4 x i64> [[TMP11]])
; CHECK-NEXT: [[RDX_SELECT_CMP10:%.*]] = icmp ne i64 [[TMP13]], -9223372036854775808
; CHECK-NEXT: [[RDX_SELECT11:%.*]] = select i1 [[RDX_SELECT_CMP10]], i64 [[TMP13]], i64 2
; CHECK-NEXT: [[RDX_SELECT11:%.*]] = select i1 [[RDX_SELECT_CMP10]], i64 [[TMP13]], i64 [[BC_MERGE_RDX]]
; CHECK-NEXT: [[CMP_N12:%.*]] = icmp eq i64 [[N]], [[N_VEC3]]
; CHECK-NEXT: br i1 [[CMP_N12]], label %[[EXIT]], label %[[VEC_EPILOG_SCALAR_PH]]
; CHECK: [[VEC_EPILOG_SCALAR_PH]]:
; CHECK-NEXT: [[BC_RESUME_VAL15:%.*]] = phi i64 [ [[N_VEC3]], %[[VEC_EPILOG_MIDDLE_BLOCK]] ], [ 0, %[[ITER_CHECK]] ], [ [[N_VEC]], %[[VEC_EPILOG_ITER_CHECK]] ]
; CHECK-NEXT: [[BC_MERGE_RDX16:%.*]] = phi i64 [ [[RDX_SELECT11]], %[[VEC_EPILOG_MIDDLE_BLOCK]] ], [ 2, %[[ITER_CHECK]] ], [ [[RDX_SELECT]], %[[VEC_EPILOG_ITER_CHECK]] ]
; CHECK-NEXT: [[BC_RESUME_VAL13:%.*]] = phi i64 [ [[N_VEC3]], %[[VEC_EPILOG_MIDDLE_BLOCK]] ], [ 0, %[[ITER_CHECK]] ], [ [[N_VEC]], %[[VEC_EPILOG_ITER_CHECK]] ]
; CHECK-NEXT: [[BC_MERGE_RDX14:%.*]] = phi i64 [ [[RDX_SELECT11]], %[[VEC_EPILOG_MIDDLE_BLOCK]] ], [ 2, %[[ITER_CHECK]] ], [ [[RDX_SELECT]], %[[VEC_EPILOG_ITER_CHECK]] ]
; CHECK-NEXT: br label %[[LOOP:.*]]
; CHECK: [[LOOP]]:
; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL15]], %[[VEC_EPILOG_SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ]
; CHECK-NEXT: [[RDX:%.*]] = phi i64 [ [[BC_MERGE_RDX16]], %[[VEC_EPILOG_SCALAR_PH]] ], [ [[SEL:%.*]], %[[LOOP]] ]
; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL13]], %[[VEC_EPILOG_SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ]
; CHECK-NEXT: [[RDX:%.*]] = phi i64 [ [[BC_MERGE_RDX14]], %[[VEC_EPILOG_SCALAR_PH]] ], [ [[SEL:%.*]], %[[LOOP]] ]
; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[IV]]
; CHECK-NEXT: [[L:%.*]] = load float, ptr [[GEP]], align 4
; CHECK-NEXT: [[C:%.*]] = fcmp fast ueq float [[L]], 3.000000e+00
Expand Down

0 comments on commit be9d783

Please sign in to comment.