From eb2eb97be30c740de8696781b955b516c08d3c36 Mon Sep 17 00:00:00 2001 From: Mel Chen Date: Mon, 13 Jan 2025 20:58:38 +0800 Subject: [PATCH] [LV] Fix FindLastIV reduction for epilogue vectorization. (#120395) Following 0e528ac404e13ed2d952a2d83aaf8383293c851e, this patch adjusts the resume value of VPReductionPHIRecipe for FindLastIV reductions. Replacing the resume value with: ResumeValue = ResumeValue == StartValue ? SentinelValue : ResumeValue; This addressed the correctness issue when the start value might not be less than the minimum value of a monotonically increasing induction variable. Thanks Florian Hahn for the help. --------- Co-authored-by: Florian Hahn --- .../Transforms/Vectorize/LoopVectorize.cpp | 27 +++++++++++++++++++ .../LoopVectorize/epilog-iv-select-cmp.ll | 8 ++++-- 2 files changed, 33 insertions(+), 2 deletions(-) diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp index 0a13ce902795e62..ee352c0b12302c2 100644 --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -7691,6 +7691,20 @@ static void fixReductionScalarResumeWhenVectorizingEpilog( "AnyOf expected to start by comparing main resume value to original " "start value"); MainResumeValue = Cmp->getOperand(0); + } else if (RecurrenceDescriptor::isFindLastIVRecurrenceKind( + RdxDesc.getRecurrenceKind())) { + using namespace llvm::PatternMatch; + Value *Cmp, *OrigResumeV; + bool IsExpectedPattern = + match(MainResumeValue, m_Select(m_OneUse(m_Value(Cmp)), + m_Specific(RdxDesc.getSentinelValue()), + m_Value(OrigResumeV))) && + match(Cmp, + m_SpecificICmp(ICmpInst::ICMP_EQ, m_Specific(OrigResumeV), + m_Specific(RdxDesc.getRecurrenceStartValue()))); + assert(IsExpectedPattern && "Unexpected reduction resume pattern"); + (void)IsExpectedPattern; + MainResumeValue = OrigResumeV; } PHINode *MainResumePhi = cast(MainResumeValue); @@ -10413,6 +10427,19 @@ preparePlanForEpilogueVectorLoop(VPlan &Plan, Loop *L, cast(ResumeV)->getParent()->getFirstNonPHI()); ResumeV = Builder.CreateICmpNE(ResumeV, RdxDesc.getRecurrenceStartValue()); + } else if (RecurrenceDescriptor::isFindLastIVRecurrenceKind(RK)) { + // VPReductionPHIRecipe for FindLastIV reductions requires an adjustment + // to the resume value. The resume value is adjusted to the sentinel + // value when the final value from the main vector loop equals the start + // value. This ensures correctness when the start value might not be + // less than the minimum value of a monotonically increasing induction + // variable. + IRBuilder<> Builder( + cast(ResumeV)->getParent()->getFirstNonPHI()); + Value *Cmp = + Builder.CreateICmpEQ(ResumeV, RdxDesc.getRecurrenceStartValue()); + ResumeV = + Builder.CreateSelect(Cmp, RdxDesc.getSentinelValue(), ResumeV); } } else { // Retrieve the induction resume values for wide inductions from diff --git a/llvm/test/Transforms/LoopVectorize/epilog-iv-select-cmp.ll b/llvm/test/Transforms/LoopVectorize/epilog-iv-select-cmp.ll index 052b4a10e9c8d54..06f0f058891164f 100644 --- a/llvm/test/Transforms/LoopVectorize/epilog-iv-select-cmp.ll +++ b/llvm/test/Transforms/LoopVectorize/epilog-iv-select-cmp.ll @@ -40,7 +40,9 @@ define i64 @select_icmp_const(ptr %a, i64 %n) { ; CHECK-NEXT: br i1 [[MIN_EPILOG_ITERS_CHECK]], label %[[VEC_EPILOG_SCALAR_PH]], label %[[VEC_EPILOG_PH]] ; CHECK: [[VEC_EPILOG_PH]]: ; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[VEC_EPILOG_ITER_CHECK]] ], [ 0, %[[VECTOR_MAIN_LOOP_ITER_CHECK]] ] -; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i64 [ [[RDX_SELECT]], %[[VEC_EPILOG_ITER_CHECK]] ], [ 3, %[[VECTOR_MAIN_LOOP_ITER_CHECK]] ] +; CHECK-NEXT: [[BC_MERGE_RDX1:%.*]] = phi i64 [ [[RDX_SELECT]], %[[VEC_EPILOG_ITER_CHECK]] ], [ 3, %[[VECTOR_MAIN_LOOP_ITER_CHECK]] ] +; CHECK-NEXT: [[TMP14:%.*]] = icmp eq i64 [[BC_MERGE_RDX1]], 3 +; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = select i1 [[TMP14]], i64 -9223372036854775808, i64 [[BC_MERGE_RDX1]] ; CHECK-NEXT: [[N_MOD_VF2:%.*]] = urem i64 [[N]], 4 ; CHECK-NEXT: [[N_VEC3:%.*]] = sub i64 [[N]], [[N_MOD_VF2]] ; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <4 x i64> poison, i64 [[BC_RESUME_VAL]], i64 0 @@ -144,7 +146,9 @@ define i64 @select_fcmp_const_fast(ptr %a, i64 %n) { ; CHECK-NEXT: br i1 [[MIN_EPILOG_ITERS_CHECK]], label %[[VEC_EPILOG_SCALAR_PH]], label %[[VEC_EPILOG_PH]] ; CHECK: [[VEC_EPILOG_PH]]: ; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[VEC_EPILOG_ITER_CHECK]] ], [ 0, %[[VECTOR_MAIN_LOOP_ITER_CHECK]] ] -; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i64 [ [[RDX_SELECT]], %[[VEC_EPILOG_ITER_CHECK]] ], [ 2, %[[VECTOR_MAIN_LOOP_ITER_CHECK]] ] +; CHECK-NEXT: [[BC_MERGE_RDX1:%.*]] = phi i64 [ [[RDX_SELECT]], %[[VEC_EPILOG_ITER_CHECK]] ], [ 2, %[[VECTOR_MAIN_LOOP_ITER_CHECK]] ] +; CHECK-NEXT: [[TMP14:%.*]] = icmp eq i64 [[BC_MERGE_RDX1]], 2 +; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = select i1 [[TMP14]], i64 -9223372036854775808, i64 [[BC_MERGE_RDX1]] ; CHECK-NEXT: [[N_MOD_VF2:%.*]] = urem i64 [[N]], 4 ; CHECK-NEXT: [[N_VEC3:%.*]] = sub i64 [[N]], [[N_MOD_VF2]] ; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <4 x i64> poison, i64 [[BC_RESUME_VAL]], i64 0