Skip to content

Commit

Permalink
[VPlan] Propagate all GEP flags (#119899)
Browse files Browse the repository at this point in the history
Store GEPNoWrapFlags instead of only InBounds and propagate them.
  • Loading branch information
nikic authored Dec 17, 2024
1 parent 34c4f6f commit 1157187
Show file tree
Hide file tree
Showing 24 changed files with 224 additions and 149 deletions.
8 changes: 4 additions & 4 deletions llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h
Original file line number Diff line number Diff line change
Expand Up @@ -222,13 +222,13 @@ class VPBuilder {

VPInstruction *createPtrAdd(VPValue *Ptr, VPValue *Offset, DebugLoc DL = {},
const Twine &Name = "") {
return tryInsertInstruction(new VPInstruction(
Ptr, Offset, VPRecipeWithIRFlags::GEPFlagsTy(false), DL, Name));
return tryInsertInstruction(
new VPInstruction(Ptr, Offset, GEPNoWrapFlags::none(), DL, Name));
}
VPValue *createInBoundsPtrAdd(VPValue *Ptr, VPValue *Offset, DebugLoc DL = {},
const Twine &Name = "") {
return tryInsertInstruction(new VPInstruction(
Ptr, Offset, VPRecipeWithIRFlags::GEPFlagsTy(true), DL, Name));
return tryInsertInstruction(
new VPInstruction(Ptr, Offset, GEPNoWrapFlags::inBounds(), DL, Name));
}

VPDerivedIVRecipe *createDerivedIV(InductionDescriptor::InductionKind Kind,
Expand Down
7 changes: 5 additions & 2 deletions llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -8406,10 +8406,13 @@ VPRecipeBuilder::tryToWidenMemory(Instruction *I, ArrayRef<VPValue *> Operands,
if (Reverse)
VectorPtr = new VPReverseVectorPointerRecipe(
Ptr, &Plan.getVF(), getLoadStoreType(I),
GEP ? GEP->isInBounds() : false, I->getDebugLoc());
GEP && GEP->isInBounds() ? GEPNoWrapFlags::inBounds()
: GEPNoWrapFlags::none(),
I->getDebugLoc());
else
VectorPtr = new VPVectorPointerRecipe(Ptr, getLoadStoreType(I),
GEP ? GEP->isInBounds() : false,
GEP ? GEP->getNoWrapFlags()
: GEPNoWrapFlags::none(),
I->getDebugLoc());
Builder.getInsertBlock()->appendRecipe(VectorPtr);
Ptr = VectorPtr;
Expand Down
44 changes: 16 additions & 28 deletions llvm/lib/Transforms/Vectorize/VPlan.h
Original file line number Diff line number Diff line change
Expand Up @@ -952,11 +952,6 @@ class VPRecipeWithIRFlags : public VPSingleDefRecipe {
DisjointFlagsTy(bool IsDisjoint) : IsDisjoint(IsDisjoint) {}
};

struct GEPFlagsTy {
char IsInBounds : 1;
GEPFlagsTy(bool IsInBounds) : IsInBounds(IsInBounds) {}
};

private:
struct ExactFlagsTy {
char IsExact : 1;
Expand All @@ -983,7 +978,7 @@ class VPRecipeWithIRFlags : public VPSingleDefRecipe {
WrapFlagsTy WrapFlags;
DisjointFlagsTy DisjointFlags;
ExactFlagsTy ExactFlags;
GEPFlagsTy GEPFlags;
GEPNoWrapFlags GEPFlags;
NonNegFlagsTy NonNegFlags;
FastMathFlagsTy FMFs;
unsigned AllFlags;
Expand Down Expand Up @@ -1020,7 +1015,7 @@ class VPRecipeWithIRFlags : public VPSingleDefRecipe {
ExactFlags.IsExact = Op->isExact();
} else if (auto *GEP = dyn_cast<GetElementPtrInst>(&I)) {
OpType = OperationType::GEPOp;
GEPFlags.IsInBounds = GEP->isInBounds();
GEPFlags = GEP->getNoWrapFlags();
} else if (auto *PNNI = dyn_cast<PossiblyNonNegInst>(&I)) {
OpType = OperationType::NonNegOp;
NonNegFlags.NonNeg = PNNI->hasNonNeg();
Expand Down Expand Up @@ -1060,7 +1055,7 @@ class VPRecipeWithIRFlags : public VPSingleDefRecipe {
protected:
template <typename IterT>
VPRecipeWithIRFlags(const unsigned char SC, IterT Operands,
GEPFlagsTy GEPFlags, DebugLoc DL = {})
GEPNoWrapFlags GEPFlags, DebugLoc DL = {})
: VPSingleDefRecipe(SC, Operands, DL), OpType(OperationType::GEPOp),
GEPFlags(GEPFlags) {}

Expand Down Expand Up @@ -1097,7 +1092,7 @@ class VPRecipeWithIRFlags : public VPSingleDefRecipe {
ExactFlags.IsExact = false;
break;
case OperationType::GEPOp:
GEPFlags.IsInBounds = false;
GEPFlags = GEPNoWrapFlags::none();
break;
case OperationType::FPMathOp:
FMFs.NoNaNs = false;
Expand Down Expand Up @@ -1126,10 +1121,7 @@ class VPRecipeWithIRFlags : public VPSingleDefRecipe {
I->setIsExact(ExactFlags.IsExact);
break;
case OperationType::GEPOp:
// TODO(gep_nowrap): Track the full GEPNoWrapFlags in VPlan.
cast<GetElementPtrInst>(I)->setNoWrapFlags(
GEPFlags.IsInBounds ? GEPNoWrapFlags::inBounds()
: GEPNoWrapFlags::none());
cast<GetElementPtrInst>(I)->setNoWrapFlags(GEPFlags);
break;
case OperationType::FPMathOp:
I->setHasAllowReassoc(FMFs.AllowReassoc);
Expand All @@ -1155,11 +1147,7 @@ class VPRecipeWithIRFlags : public VPSingleDefRecipe {
return CmpPredicate;
}

bool isInBounds() const {
assert(OpType == OperationType::GEPOp &&
"recipe doesn't have inbounds flag");
return GEPFlags.IsInBounds;
}
GEPNoWrapFlags getGEPNoWrapFlags() const { return GEPFlags; }

/// Returns true if the recipe has fast-math flags.
bool hasFastMathFlags() const { return OpType == OperationType::FPMathOp; }
Expand Down Expand Up @@ -1306,7 +1294,7 @@ class VPInstruction : public VPRecipeWithIRFlags,
assert(Opcode == Instruction::Or && "only OR opcodes can be disjoint");
}

VPInstruction(VPValue *Ptr, VPValue *Offset, GEPFlagsTy Flags,
VPInstruction(VPValue *Ptr, VPValue *Offset, GEPNoWrapFlags Flags,
DebugLoc DL = {}, const Twine &Name = "")
: VPRecipeWithIRFlags(VPDef::VPInstructionSC,
ArrayRef<VPValue *>({Ptr, Offset}), Flags, DL),
Expand Down Expand Up @@ -1914,10 +1902,9 @@ class VPReverseVectorPointerRecipe : public VPRecipeWithIRFlags,

public:
VPReverseVectorPointerRecipe(VPValue *Ptr, VPValue *VF, Type *IndexedTy,
bool IsInBounds, DebugLoc DL)
GEPNoWrapFlags GEPFlags, DebugLoc DL)
: VPRecipeWithIRFlags(VPDef::VPReverseVectorPointerSC,
ArrayRef<VPValue *>({Ptr, VF}),
GEPFlagsTy(IsInBounds), DL),
ArrayRef<VPValue *>({Ptr, VF}), GEPFlags, DL),
IndexedTy(IndexedTy) {}

VP_CLASSOF_IMPL(VPDef::VPReverseVectorPointerSC)
Expand Down Expand Up @@ -1949,8 +1936,9 @@ class VPReverseVectorPointerRecipe : public VPRecipeWithIRFlags,
}

VPReverseVectorPointerRecipe *clone() override {
return new VPReverseVectorPointerRecipe(
getOperand(0), getVFValue(), IndexedTy, isInBounds(), getDebugLoc());
return new VPReverseVectorPointerRecipe(getOperand(0), getVFValue(),
IndexedTy, getGEPNoWrapFlags(),
getDebugLoc());
}

#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
Expand All @@ -1966,10 +1954,10 @@ class VPVectorPointerRecipe : public VPRecipeWithIRFlags,
Type *IndexedTy;

public:
VPVectorPointerRecipe(VPValue *Ptr, Type *IndexedTy, bool IsInBounds,
VPVectorPointerRecipe(VPValue *Ptr, Type *IndexedTy, GEPNoWrapFlags GEPFlags,
DebugLoc DL)
: VPRecipeWithIRFlags(VPDef::VPVectorPointerSC, ArrayRef<VPValue *>(Ptr),
GEPFlagsTy(IsInBounds), DL),
GEPFlags, DL),
IndexedTy(IndexedTy) {}

VP_CLASSOF_IMPL(VPDef::VPVectorPointerSC)
Expand All @@ -1991,8 +1979,8 @@ class VPVectorPointerRecipe : public VPRecipeWithIRFlags,
}

VPVectorPointerRecipe *clone() override {
return new VPVectorPointerRecipe(getOperand(0), IndexedTy, isInBounds(),
getDebugLoc());
return new VPVectorPointerRecipe(getOperand(0), IndexedTy,
getGEPNoWrapFlags(), getDebugLoc());
}

/// Return the cost of this VPHeaderPHIRecipe.
Expand Down
34 changes: 19 additions & 15 deletions llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -621,8 +621,7 @@ Value *VPInstruction::generate(VPTransformState &State) {
"can only generate first lane for PtrAdd");
Value *Ptr = State.get(getOperand(0), VPLane(0));
Value *Addend = State.get(getOperand(1), VPLane(0));
return isInBounds() ? Builder.CreateInBoundsPtrAdd(Ptr, Addend, Name)
: Builder.CreatePtrAdd(Ptr, Addend, Name);
return Builder.CreatePtrAdd(Ptr, Addend, Name, getGEPNoWrapFlags());
}
case VPInstruction::ResumePhi: {
Value *IncomingFromVPlanPred =
Expand Down Expand Up @@ -1276,8 +1275,12 @@ void VPRecipeWithIRFlags::printFlags(raw_ostream &O) const {
getFastMathFlags().print(O);
break;
case OperationType::GEPOp:
if (GEPFlags.IsInBounds)
if (GEPFlags.isInBounds())
O << " inbounds";
else if (GEPFlags.hasNoUnsignedSignedWrap())
O << " nusw";
if (GEPFlags.hasNoUnsignedWrap())
O << " nuw";
break;
case OperationType::NonNegOp:
if (NonNegFlags.NonNeg)
Expand Down Expand Up @@ -1906,9 +1909,9 @@ void VPWidenGEPRecipe::execute(VPTransformState &State) {
for (unsigned I = 0, E = getNumOperands(); I != E; I++)
Ops.push_back(State.get(getOperand(I), VPLane(0)));

auto *NewGEP =
State.Builder.CreateGEP(GEP->getSourceElementType(), Ops[0],
ArrayRef(Ops).drop_front(), "", isInBounds());
auto *NewGEP = State.Builder.CreateGEP(GEP->getSourceElementType(), Ops[0],
ArrayRef(Ops).drop_front(), "",
getGEPNoWrapFlags());
Value *Splat = State.Builder.CreateVectorSplat(State.VF, NewGEP);
State.set(this, Splat);
State.addMetadata(Splat, GEP);
Expand All @@ -1934,7 +1937,7 @@ void VPWidenGEPRecipe::execute(VPTransformState &State) {
// Create the new GEP. Note that this GEP may be a scalar if VF == 1,
// but it should be a vector, otherwise.
auto *NewGEP = State.Builder.CreateGEP(GEP->getSourceElementType(), Ptr,
Indices, "", isInBounds());
Indices, "", getGEPNoWrapFlags());
assert((State.VF.isScalar() || NewGEP->getType()->isVectorTy()) &&
"NewGEP is not a pointer vector");
State.set(this, NewGEP);
Expand Down Expand Up @@ -1985,9 +1988,10 @@ void VPReverseVectorPointerRecipe::execute(VPTransformState &State) {
// LastLane = 1 - RunTimeVF
Value *LastLane = Builder.CreateSub(ConstantInt::get(IndexTy, 1), RunTimeVF);
Value *Ptr = State.get(getOperand(0), VPLane(0));
bool InBounds = isInBounds();
Value *ResultPtr = Builder.CreateGEP(IndexedTy, Ptr, NumElt, "", InBounds);
ResultPtr = Builder.CreateGEP(IndexedTy, ResultPtr, LastLane, "", InBounds);
Value *ResultPtr =
Builder.CreateGEP(IndexedTy, Ptr, NumElt, "", getGEPNoWrapFlags());
ResultPtr = Builder.CreateGEP(IndexedTy, ResultPtr, LastLane, "",
getGEPNoWrapFlags());

State.set(this, ResultPtr, /*IsScalar*/ true);
}
Expand All @@ -1997,9 +2001,9 @@ void VPReverseVectorPointerRecipe::print(raw_ostream &O, const Twine &Indent,
VPSlotTracker &SlotTracker) const {
O << Indent;
printAsOperand(O, SlotTracker);
O << " = reverse-vector-pointer ";
if (isInBounds())
O << "inbounds ";
O << " = reverse-vector-pointer";
printFlags(O);
O << " ";
printOperands(O, SlotTracker);
}
#endif
Expand All @@ -2011,10 +2015,10 @@ void VPVectorPointerRecipe::execute(VPTransformState &State) {
Type *IndexTy = getGEPIndexTy(State.VF.isScalable(), /*IsReverse*/ false,
CurrentPart, Builder);
Value *Ptr = State.get(getOperand(0), VPLane(0));
bool InBounds = isInBounds();

Value *Increment = createStepForVF(Builder, IndexTy, State.VF, CurrentPart);
Value *ResultPtr = Builder.CreateGEP(IndexedTy, Ptr, Increment, "", InBounds);
Value *ResultPtr =
Builder.CreateGEP(IndexedTy, Ptr, Increment, "", getGEPNoWrapFlags());

State.set(this, ResultPtr, /*IsScalar*/ true);
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -81,8 +81,8 @@ define void @low_vf_ic_is_better(ptr nocapture noundef %p, i32 %tc, i16 noundef
; CHECK-VS1-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
; CHECK-VS1-NEXT: [[TMP20:%.*]] = add i64 [[TMP0]], [[INDEX]]
; CHECK-VS1-NEXT: [[TMP21:%.*]] = add i64 [[TMP20]], 0
; CHECK-VS1-NEXT: [[TMP22:%.*]] = getelementptr inbounds i8, ptr [[V]], i64 [[TMP21]]
; CHECK-VS1-NEXT: [[TMP23:%.*]] = getelementptr inbounds i8, ptr [[TMP22]], i32 0
; CHECK-VS1-NEXT: [[TMP22:%.*]] = getelementptr inbounds nuw i8, ptr [[V]], i64 [[TMP21]]
; CHECK-VS1-NEXT: [[TMP23:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP22]], i32 0
; CHECK-VS1-NEXT: [[WIDE_LOAD:%.*]] = load <vscale x 16 x i8>, ptr [[TMP23]], align 1
; CHECK-VS1-NEXT: [[TMP24:%.*]] = add <vscale x 16 x i8> [[WIDE_LOAD]], [[BROADCAST_SPLAT]]
; CHECK-VS1-NEXT: store <vscale x 16 x i8> [[TMP24]], ptr [[TMP23]], align 1
Expand Down Expand Up @@ -115,8 +115,8 @@ define void @low_vf_ic_is_better(ptr nocapture noundef %p, i32 %tc, i16 noundef
; CHECK-VS1-NEXT: [[INDEX5:%.*]] = phi i64 [ [[VEC_EPILOG_RESUME_VAL]], %[[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT9:%.*]], %[[VEC_EPILOG_VECTOR_BODY]] ]
; CHECK-VS1-NEXT: [[OFFSET_IDX:%.*]] = add i64 [[TMP0]], [[INDEX5]]
; CHECK-VS1-NEXT: [[TMP32:%.*]] = add i64 [[OFFSET_IDX]], 0
; CHECK-VS1-NEXT: [[TMP33:%.*]] = getelementptr inbounds i8, ptr [[V]], i64 [[TMP32]]
; CHECK-VS1-NEXT: [[TMP34:%.*]] = getelementptr inbounds i8, ptr [[TMP33]], i32 0
; CHECK-VS1-NEXT: [[TMP33:%.*]] = getelementptr inbounds nuw i8, ptr [[V]], i64 [[TMP32]]
; CHECK-VS1-NEXT: [[TMP34:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP33]], i32 0
; CHECK-VS1-NEXT: [[WIDE_LOAD6:%.*]] = load <vscale x 8 x i8>, ptr [[TMP34]], align 1
; CHECK-VS1-NEXT: [[TMP35:%.*]] = add <vscale x 8 x i8> [[WIDE_LOAD6]], [[BROADCAST_SPLAT8]]
; CHECK-VS1-NEXT: store <vscale x 8 x i8> [[TMP35]], ptr [[TMP34]], align 1
Expand Down Expand Up @@ -189,8 +189,8 @@ define void @low_vf_ic_is_better(ptr nocapture noundef %p, i32 %tc, i16 noundef
; CHECK-VS2-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
; CHECK-VS2-NEXT: [[TMP20:%.*]] = add i64 [[TMP0]], [[INDEX]]
; CHECK-VS2-NEXT: [[TMP21:%.*]] = add i64 [[TMP20]], 0
; CHECK-VS2-NEXT: [[TMP22:%.*]] = getelementptr inbounds i8, ptr [[V]], i64 [[TMP21]]
; CHECK-VS2-NEXT: [[TMP23:%.*]] = getelementptr inbounds i8, ptr [[TMP22]], i32 0
; CHECK-VS2-NEXT: [[TMP22:%.*]] = getelementptr inbounds nuw i8, ptr [[V]], i64 [[TMP21]]
; CHECK-VS2-NEXT: [[TMP23:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP22]], i32 0
; CHECK-VS2-NEXT: [[WIDE_LOAD:%.*]] = load <vscale x 8 x i8>, ptr [[TMP23]], align 1
; CHECK-VS2-NEXT: [[TMP24:%.*]] = add <vscale x 8 x i8> [[WIDE_LOAD]], [[BROADCAST_SPLAT]]
; CHECK-VS2-NEXT: store <vscale x 8 x i8> [[TMP24]], ptr [[TMP23]], align 1
Expand Down Expand Up @@ -223,8 +223,8 @@ define void @low_vf_ic_is_better(ptr nocapture noundef %p, i32 %tc, i16 noundef
; CHECK-VS2-NEXT: [[INDEX5:%.*]] = phi i64 [ [[VEC_EPILOG_RESUME_VAL]], %[[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT9:%.*]], %[[VEC_EPILOG_VECTOR_BODY]] ]
; CHECK-VS2-NEXT: [[OFFSET_IDX:%.*]] = add i64 [[TMP0]], [[INDEX5]]
; CHECK-VS2-NEXT: [[TMP32:%.*]] = add i64 [[OFFSET_IDX]], 0
; CHECK-VS2-NEXT: [[TMP33:%.*]] = getelementptr inbounds i8, ptr [[V]], i64 [[TMP32]]
; CHECK-VS2-NEXT: [[TMP34:%.*]] = getelementptr inbounds i8, ptr [[TMP33]], i32 0
; CHECK-VS2-NEXT: [[TMP33:%.*]] = getelementptr inbounds nuw i8, ptr [[V]], i64 [[TMP32]]
; CHECK-VS2-NEXT: [[TMP34:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP33]], i32 0
; CHECK-VS2-NEXT: [[WIDE_LOAD6:%.*]] = load <vscale x 4 x i8>, ptr [[TMP34]], align 1
; CHECK-VS2-NEXT: [[TMP35:%.*]] = add <vscale x 4 x i8> [[WIDE_LOAD6]], [[BROADCAST_SPLAT8]]
; CHECK-VS2-NEXT: store <vscale x 4 x i8> [[TMP35]], ptr [[TMP34]], align 1
Expand Down Expand Up @@ -279,7 +279,7 @@ while.end:

define void @trip_count_too_small(ptr nocapture noundef %p, i32 noundef %tc, i16 noundef %val) {
; CHECK-LABEL: define void @trip_count_too_small(
; CHECK-SAME: ptr nocapture noundef [[P:%.*]], i32 noundef [[TC:%.*]], i16 noundef [[VAL:%.*]]) #[[ATTR0]] {
; CHECK-SAME: ptr nocapture noundef [[P:%.*]], i32 noundef [[TC:%.*]], i16 noundef [[VAL:%.*]]) #[[ATTR0:[0-9]+]] {
; CHECK-NEXT: [[ENTRY:.*:]]
; CHECK-NEXT: [[CMP7:%.*]] = icmp ult i32 [[TC]], 3
; CHECK-NEXT: br i1 [[CMP7]], label %[[WHILE_PREHEADER:.*]], label %[[WHILE_END:.*]]
Expand Down Expand Up @@ -440,8 +440,8 @@ define void @overflow_indvar_known_false(ptr nocapture noundef %p, i32 noundef %
; CHECK-NEXT: [[ACTIVE_LANE_MASK:%.*]] = phi <vscale x 16 x i1> [ [[ACTIVE_LANE_MASK_ENTRY]], %[[VECTOR_PH]] ], [ [[ACTIVE_LANE_MASK_NEXT:%.*]], %[[VECTOR_BODY]] ]
; CHECK-NEXT: [[OFFSET_IDX:%.*]] = add i64 [[TMP0]], [[INDEX]]
; CHECK-NEXT: [[TMP12:%.*]] = add i64 [[OFFSET_IDX]], 0
; CHECK-NEXT: [[TMP13:%.*]] = getelementptr inbounds i8, ptr [[V]], i64 [[TMP12]]
; CHECK-NEXT: [[TMP14:%.*]] = getelementptr inbounds i8, ptr [[TMP13]], i32 0
; CHECK-NEXT: [[TMP13:%.*]] = getelementptr inbounds nuw i8, ptr [[V]], i64 [[TMP12]]
; CHECK-NEXT: [[TMP14:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP13]], i32 0
; CHECK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <vscale x 16 x i8> @llvm.masked.load.nxv16i8.p0(ptr [[TMP14]], i32 1, <vscale x 16 x i1> [[ACTIVE_LANE_MASK]], <vscale x 16 x i8> poison)
; CHECK-NEXT: [[TMP15:%.*]] = add <vscale x 16 x i8> [[WIDE_MASKED_LOAD]], [[BROADCAST_SPLAT]]
; CHECK-NEXT: call void @llvm.masked.store.nxv16i8.p0(<vscale x 16 x i8> [[TMP15]], ptr [[TMP14]], i32 1, <vscale x 16 x i1> [[ACTIVE_LANE_MASK]])
Expand Down
Loading

0 comments on commit 1157187

Please sign in to comment.