Skip to content

Commit

Permalink
Reapply "[msan] Add handlers for AVX masked load/store intrinsics (#1…
Browse files Browse the repository at this point in the history
…23857)"

This reverts commit b9d301c i.e.,
relands db79fb2.

I had mistakenly thought this caused a buildbot breakage (the actual
culprit was my other patch,
#123980, which landed at the
same time) and thus had reverted it even though AFAIK it is not broken.
  • Loading branch information
thurstond committed Jan 28, 2025
1 parent 17d1523 commit 063db51
Show file tree
Hide file tree
Showing 5 changed files with 489 additions and 289 deletions.
154 changes: 153 additions & 1 deletion llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3046,7 +3046,8 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
if (maybeHandleSimpleNomemIntrinsic(I))
return true;

// FIXME: detect and handle SSE maskstore/maskload
// FIXME: detect and handle SSE maskstore/maskload?
// Some cases are now handled in handleAVXMasked{Load,Store}.
return false;
}

Expand Down Expand Up @@ -3683,6 +3684,10 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
// TODO: Store origin.
}

// Intrinsic::masked_store
//
// Note: handleAVXMaskedStore handles AVX/AVX2 variants, though AVX512 masked
// stores are lowered to Intrinsic::masked_store.
void handleMaskedStore(IntrinsicInst &I) {
IRBuilder<> IRB(&I);
Value *V = I.getArgOperand(0);
Expand Down Expand Up @@ -3713,6 +3718,10 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
std::max(Alignment, kMinOriginAlignment));
}

// Intrinsic::masked_load
//
// Note: handleAVXMaskedLoad handles AVX/AVX2 variants, though AVX512 masked
// loads are lowered to Intrinsic::masked_load.
void handleMaskedLoad(IntrinsicInst &I) {
IRBuilder<> IRB(&I);
Value *Ptr = I.getArgOperand(0);
Expand Down Expand Up @@ -3754,6 +3763,125 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
setOrigin(&I, Origin);
}

// e.g., void @llvm.x86.avx.maskstore.ps.256(ptr, <8 x i32>, <8 x float>)
// dst mask src
//
// AVX512 masked stores are lowered to Intrinsic::masked_load and are handled
// by handleMaskedStore.
//
// This function handles AVX and AVX2 masked stores; these use the MSBs of a
// vector of integers, unlike the LLVM masked intrinsics, which require a
// vector of booleans. X86InstCombineIntrinsic.cpp::simplifyX86MaskedLoad
// mentions that the x86 backend does not know how to efficiently convert
// from a vector of booleans back into the AVX mask format; therefore, they
// (and we) do not reduce AVX/AVX2 masked intrinsics into LLVM masked
// intrinsics.
void handleAVXMaskedStore(IntrinsicInst &I) {
IRBuilder<> IRB(&I);

Value *Dst = I.getArgOperand(0);
assert(Dst->getType()->isPointerTy() && "Destination is not a pointer!");

Value *Mask = I.getArgOperand(1);
assert(isa<VectorType>(Mask->getType()) && "Mask is not a vector!");

Value *Src = I.getArgOperand(2);
assert(isa<VectorType>(Src->getType()) && "Source is not a vector!");

const Align Alignment = Align(1);

Value *SrcShadow = getShadow(Src);

if (ClCheckAccessAddress) {
insertShadowCheck(Dst, &I);
insertShadowCheck(Mask, &I);
}

Value *DstShadowPtr;
Value *DstOriginPtr;
std::tie(DstShadowPtr, DstOriginPtr) = getShadowOriginPtr(
Dst, IRB, SrcShadow->getType(), Alignment, /*isStore*/ true);

SmallVector<Value *, 2> ShadowArgs;
ShadowArgs.append(1, DstShadowPtr);
ShadowArgs.append(1, Mask);
// The intrinsic may require floating-point but shadows can be arbitrary
// bit patterns, of which some would be interpreted as "invalid"
// floating-point values (NaN etc.); we assume the intrinsic will happily
// copy them.
ShadowArgs.append(1, IRB.CreateBitCast(SrcShadow, Src->getType()));

CallInst *CI =
IRB.CreateIntrinsic(IRB.getVoidTy(), I.getIntrinsicID(), ShadowArgs);
setShadow(&I, CI);

if (!MS.TrackOrigins)
return;

// Approximation only
auto &DL = F.getDataLayout();
paintOrigin(IRB, getOrigin(Src), DstOriginPtr,
DL.getTypeStoreSize(SrcShadow->getType()),
std::max(Alignment, kMinOriginAlignment));
}

// e.g., <8 x float> @llvm.x86.avx.maskload.ps.256(ptr, <8 x i32>)
// return src mask
//
// Masked-off values are replaced with 0, which conveniently also represents
// initialized memory.
//
// AVX512 masked stores are lowered to Intrinsic::masked_load and are handled
// by handleMaskedStore.
//
// We do not combine this with handleMaskedLoad; see comment in
// handleAVXMaskedStore for the rationale.
//
// This is subtly different than handleIntrinsicByApplyingToShadow(I, 1)
// because we need to apply getShadowOriginPtr, not getShadow, to the first
// parameter.
void handleAVXMaskedLoad(IntrinsicInst &I) {
IRBuilder<> IRB(&I);

Value *Src = I.getArgOperand(0);
assert(Src->getType()->isPointerTy() && "Source is not a pointer!");

Value *Mask = I.getArgOperand(1);
assert(isa<VectorType>(Mask->getType()) && "Mask is not a vector!");

const Align Alignment = Align(1);

if (ClCheckAccessAddress) {
insertShadowCheck(Mask, &I);
}

Type *SrcShadowTy = getShadowTy(Src);
Value *SrcShadowPtr, *SrcOriginPtr;
std::tie(SrcShadowPtr, SrcOriginPtr) =
getShadowOriginPtr(Src, IRB, SrcShadowTy, Alignment, /*isStore*/ false);

SmallVector<Value *, 2> ShadowArgs;
ShadowArgs.append(1, SrcShadowPtr);
ShadowArgs.append(1, Mask);

CallInst *CI =
IRB.CreateIntrinsic(I.getType(), I.getIntrinsicID(), ShadowArgs);
// The intrinsic may require floating-point but shadows can be arbitrary
// bit patterns, of which some would be interpreted as "invalid"
// floating-point values (NaN etc.); we assume the intrinsic will happily
// copy them.
setShadow(&I, IRB.CreateBitCast(CI, getShadowTy(&I)));

if (!MS.TrackOrigins)
return;

// The "pass-through" value is always zero (initialized). To the extent
// that that results in initialized aligned 4-byte chunks, the origin value
// is ignored. It is therefore correct to simply copy the origin from src.
Value *PtrSrcOrigin = IRB.CreateLoad(MS.OriginTy, SrcOriginPtr);
setOrigin(&I, PtrSrcOrigin);
}

// Instrument BMI / BMI2 intrinsics.
// All of these intrinsics are Z = I(X, Y)
// where the types of all operands and the result match, and are either i32 or
Expand Down Expand Up @@ -4466,6 +4594,30 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
break;
}

case Intrinsic::x86_avx_maskstore_ps:
case Intrinsic::x86_avx_maskstore_pd:
case Intrinsic::x86_avx_maskstore_ps_256:
case Intrinsic::x86_avx_maskstore_pd_256:
case Intrinsic::x86_avx2_maskstore_d:
case Intrinsic::x86_avx2_maskstore_q:
case Intrinsic::x86_avx2_maskstore_d_256:
case Intrinsic::x86_avx2_maskstore_q_256: {
handleAVXMaskedStore(I);
break;
}

case Intrinsic::x86_avx_maskload_ps:
case Intrinsic::x86_avx_maskload_pd:
case Intrinsic::x86_avx_maskload_ps_256:
case Intrinsic::x86_avx_maskload_pd_256:
case Intrinsic::x86_avx2_maskload_d:
case Intrinsic::x86_avx2_maskload_q:
case Intrinsic::x86_avx2_maskload_d_256:
case Intrinsic::x86_avx2_maskload_q_256: {
handleAVXMaskedLoad(I);
break;
}

case Intrinsic::fshl:
case Intrinsic::fshr:
handleFunnelShift(I);
Expand Down
Loading

0 comments on commit 063db51

Please sign in to comment.