From 997d61a44eb07fb09391ec5f79c51d37f11ca180 Mon Sep 17 00:00:00 2001 From: Alex Richardson Date: Tue, 6 Feb 2024 12:07:27 -0800 Subject: [PATCH] [LoopIdiom] Baseline tests for no_preserve_cheri_tags --- .../Transforms/Scalar/LoopIdiomRecognize.cpp | 25 ++++++++----- .../LoopIdiom/cheri-preserve-tags-memcpy.ll | 8 ++--- .../LoopIdiom/cheri-preserve-tags-store.ll | 36 +++++++++---------- 3 files changed, 39 insertions(+), 30 deletions(-) diff --git a/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp b/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp index 6edd44853fc8..f794b2a3167d 100644 --- a/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp +++ b/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp @@ -229,7 +229,8 @@ class LoopIdiomRecognize { Instruction *TheLoad, const SCEVAddRecExpr *StoreEv, const SCEVAddRecExpr *LoadEv, - const SCEV *BECount); + const SCEV *BECount, + PreserveCheriTags PreserveTags); bool avoidLIRForMultiBlockLoop(bool IsMemset = false, bool IsLoopMemset = false); @@ -888,7 +889,7 @@ bool LoopIdiomRecognize::processLoopMemCpy(MemCpyInst *MCI, return processLoopStoreOfLoopLoad( Dest, Source, SE->getConstant(Dest->getType(), SizeInBytes), MCI->getDestAlign(), MCI->getSourceAlign(), MCI, MCI, StoreEv, LoadEv, - BECount); + BECount, MCI->shouldPreserveCheriTags()); } /// processLoopMemSet - See if this memset can be promoted to a large memset. @@ -1255,8 +1256,17 @@ bool LoopIdiomRecognize::processLoopStoreOfLoopLoad(StoreInst *SI, Value *StorePtr = SI->getPointerOperand(); const SCEVAddRecExpr *StoreEv = cast(SE->getSCEV(StorePtr)); - unsigned StoreSize = DL->getTypeStoreSize(SI->getValueOperand()->getType()); - + Type *StoreType = SI->getValueOperand()->getType(); + unsigned StoreSize = DL->getTypeStoreSize(StoreType); + auto PreserveTags = PreserveCheriTags::Unknown; + if (DL->isFatPointer(StoreType->getScalarType())) { + // Capabilities and vectors of capabilities need to preserve tags + PreserveTags = PreserveCheriTags::Required; + } else if (StoreType->isSingleValueType()) { + // But all stores of simple types (i.e. non-struct, non-array) never copy + // CHERI tag bits, so we can mark the memcpy as non-tag-preserving. + PreserveTags = PreserveCheriTags::Unnecessary; + } // The store must be feeding a non-volatile load. LoadInst *LI = cast(SI->getValueOperand()); assert(LI->isUnordered() && "Expected only non-volatile non-ordered loads."); @@ -1270,7 +1280,7 @@ bool LoopIdiomRecognize::processLoopStoreOfLoopLoad(StoreInst *SI, const SCEV *StoreSizeSCEV = SE->getConstant(StorePtr->getType(), StoreSize); return processLoopStoreOfLoopLoad(StorePtr, LoadPtr, StoreSizeSCEV, SI->getAlign(), LI->getAlign(), SI, LI, - StoreEv, LoadEv, BECount); + StoreEv, LoadEv, BECount, PreserveTags); } class MemmoveVerifier { @@ -1321,7 +1331,8 @@ bool LoopIdiomRecognize::processLoopStoreOfLoopLoad( Value *DestPtr, Value *SourcePtr, const SCEV *StoreSizeSCEV, MaybeAlign StoreAlign, MaybeAlign LoadAlign, Instruction *TheStore, Instruction *TheLoad, const SCEVAddRecExpr *StoreEv, - const SCEVAddRecExpr *LoadEv, const SCEV *BECount) { + const SCEVAddRecExpr *LoadEv, const SCEV *BECount, + PreserveCheriTags PreserveTags) { // FIXME: until llvm.memcpy.inline supports dynamic sizes, we need to // conservatively bail here, since otherwise we may have to transform @@ -1462,8 +1473,6 @@ bool LoopIdiomRecognize::processLoopStoreOfLoopLoad( AATags = AATags.extendTo(-1); CallInst *NewCall = nullptr; - // https://github.com/CTSRD-CHERI/llvm-project/pull/612 - PreserveCheriTags PreserveTags = PreserveCheriTags::TODO; // Check whether to generate an unordered atomic memcpy: // If the load or store are atomic, then they must necessarily be unordered // by previous checks. diff --git a/llvm/test/Transforms/LoopIdiom/cheri-preserve-tags-memcpy.ll b/llvm/test/Transforms/LoopIdiom/cheri-preserve-tags-memcpy.ll index f51b1f825de5..7f69b7cd4a14 100644 --- a/llvm/test/Transforms/LoopIdiom/cheri-preserve-tags-memcpy.ll +++ b/llvm/test/Transforms/LoopIdiom/cheri-preserve-tags-memcpy.ll @@ -33,7 +33,7 @@ define void @no_preserve(ptr addrspace(200) noalias writeonly %dst, ptr addrspac ; HYBRID: bb17.preheader: ; HYBRID-NEXT: [[UGLYGEP:%.*]] = getelementptr i8, ptr [[SRC:%.*]], i64 16 ; HYBRID-NEXT: [[TMP1:%.*]] = shl nuw i64 [[COUNT]], 4 -; HYBRID-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[DST:%.*]], ptr align 4 [[UGLYGEP]], i64 [[TMP1]], i1 false) +; HYBRID-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[DST:%.*]], ptr align 4 [[UGLYGEP]], i64 [[TMP1]], i1 false) #[[ATTR2:[0-9]+]] ; HYBRID-NEXT: br label [[BB30]] ; HYBRID: bb30: ; HYBRID-NEXT: ret void @@ -81,7 +81,7 @@ define void @must_preserve(ptr addrspace(200) noalias writeonly %dst, ptr addrsp ; HYBRID: bb17.preheader: ; HYBRID-NEXT: [[UGLYGEP:%.*]] = getelementptr i8, ptr [[SRC:%.*]], i64 16 ; HYBRID-NEXT: [[TMP1:%.*]] = shl nuw i64 [[COUNT]], 4 -; HYBRID-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[DST:%.*]], ptr align 4 [[UGLYGEP]], i64 [[TMP1]], i1 false) +; HYBRID-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[DST:%.*]], ptr align 4 [[UGLYGEP]], i64 [[TMP1]], i1 false) #[[ATTR3:[0-9]+]] ; HYBRID-NEXT: br label [[BB30]] ; HYBRID: bb30: ; HYBRID-NEXT: ret void @@ -130,5 +130,5 @@ attributes #2 = { must_preserve_cheri_tags } ; UTC_ARGS: --disable ; CHECK: attributes #1 = { argmemonly nocallback nofree nounwind willreturn } -; TODO-CHECK: attributes #[[ATTR2]] = { no_preserve_cheri_tags } -; TODO-CHECK: attributes #[[ATTR3]] = { must_preserve_cheri_tags } +; CHECK: attributes #[[ATTR2]] = { no_preserve_cheri_tags } +; CHECK: attributes #[[ATTR3]] = { must_preserve_cheri_tags } diff --git a/llvm/test/Transforms/LoopIdiom/cheri-preserve-tags-store.ll b/llvm/test/Transforms/LoopIdiom/cheri-preserve-tags-store.ll index 31415a821fe6..48451a1f4967 100644 --- a/llvm/test/Transforms/LoopIdiom/cheri-preserve-tags-store.ll +++ b/llvm/test/Transforms/LoopIdiom/cheri-preserve-tags-store.ll @@ -1,7 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --force-update ; We should be setting no_preserve_cheri_tags for loops that copy integers, and must_preserve_cheri_tags for capability copies. ; FIXME: this does not work with addrspace(200) pointers yet since we need SCEV. -; RUN: sed -e 's/addrspace(200)/addrspace(0)/g' -e 's/-A200-P200-G200//g' %s | \ +; RUN: sed -e 's/-A200-P200-G200//g' %s | \ ; RUN: opt --passes='require,loop(loop-idiom,loop-deletion),simplifycfg' -aa-pipeline=basic-aa -S | \ ; RUN: FileCheck %s --check-prefixes=HYBRID ; RUN: opt --passes='require,loop(loop-idiom,loop-deletion),simplifycfg' -aa-pipeline=basic-aa -S < %s | \ @@ -11,14 +11,14 @@ target datalayout = "e-m:e-pf200:128:128:128:64-i8:8:32-i16:16:32-i64:64-i128:12 %struct.state = type { [25 x i32], i32 } %struct.capstate = type { [25 x ptr addrspace(200)], i32 } -@nocap = unnamed_addr addrspace(200) global [25 x i32] zeroinitializer, align 4 -@cap = unnamed_addr addrspace(200) global [25 x ptr addrspace(200)] zeroinitializer, align 4 -@k = unnamed_addr addrspace(200) global i32 0, align 4 +@nocap = unnamed_addr addrspace("G") global [25 x i32] zeroinitializer, align 4 +@cap = unnamed_addr addrspace("G") global [25 x ptr addrspace(200)] zeroinitializer, align 4 +@k = unnamed_addr addrspace("G") global i32 0, align 4 -define void @get_state(ptr addrspace(200) nocapture noalias %state) local_unnamed_addr addrspace(200) #0 { +define void @get_state(ptr addrspace("A") nocapture noalias %state) local_unnamed_addr addrspace("P") #0 { ; HYBRID-LABEL: @get_state( ; HYBRID-NEXT: entry: -; HYBRID-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[STATE:%.*]], ptr align 4 @nocap, i64 100, i1 false) +; HYBRID-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[STATE:%.*]], ptr align 4 @nocap, i64 100, i1 false) #[[ATTR1:[0-9]+]] ; HYBRID-NEXT: ret void ; ; PURECAP-LABEL: @get_state( @@ -44,10 +44,10 @@ for.body.preheader: ; preds = %0 for.body: ; preds = %entry, %for.body %i.08 = phi i64 [ 0, %for.body.preheader ], [ %inc, %for.body ] - %arrayidx = getelementptr inbounds [25 x i32], ptr addrspace(200) @nocap, i64 0, i64 %i.08 - %0 = load i32, ptr addrspace(200) %arrayidx, align 4 - %arrayidx2 = getelementptr inbounds %struct.state, ptr addrspace(200) %state, i64 0, i32 0, i64 %i.08 - store i32 %0, ptr addrspace(200) %arrayidx2, align 4 + %arrayidx = getelementptr inbounds [25 x i32], ptr addrspace("G") @nocap, i64 0, i64 %i.08 + %0 = load i32, ptr addrspace("A") %arrayidx, align 4 + %arrayidx2 = getelementptr inbounds %struct.state, ptr addrspace("A") %state, i64 0, i32 0, i64 %i.08 + store i32 %0, ptr addrspace("A") %arrayidx2, align 4 %inc = add nuw nsw i64 %i.08, 1 %exitcond = icmp ne i64 %inc, 25 br i1 %exitcond, label %for.body, label %for.end @@ -56,10 +56,10 @@ for.end: ; preds = %for.body ret void } -define void @get_cap_state(ptr addrspace(200) nocapture noalias %state) local_unnamed_addr addrspace(200) #0 { +define void @get_cap_state(ptr addrspace("A") nocapture noalias %state) local_unnamed_addr addrspace("P") #0 { ; HYBRID-LABEL: @get_cap_state( ; HYBRID-NEXT: entry: -; HYBRID-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[STATE:%.*]], ptr align 16 @cap, i64 200, i1 false) +; HYBRID-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[STATE:%.*]], ptr align 16 @cap, i64 400, i1 false) #[[ATTR2:[0-9]+]] ; HYBRID-NEXT: ret void ; ; PURECAP-LABEL: @get_cap_state( @@ -82,10 +82,10 @@ entry: for.body: ; preds = %entry, %for.body %i.08 = phi i64 [ 0, %entry ], [ %inc, %for.body ] - %arrayidx = getelementptr inbounds [25 x ptr addrspace(200)], ptr addrspace(200) @cap, i64 0, i64 %i.08 - %0 = load ptr addrspace(200), ptr addrspace(200) %arrayidx, align 16 - %arrayidx2 = getelementptr inbounds %struct.capstate, ptr addrspace(200) %state, i64 0, i32 0, i64 %i.08 - store ptr addrspace(200) %0, ptr addrspace(200) %arrayidx2, align 16 + %arrayidx = getelementptr inbounds [25 x ptr addrspace(200)], ptr addrspace("G") @cap, i64 0, i64 %i.08 + %0 = load ptr addrspace(200), ptr addrspace("G") %arrayidx, align 16 + %arrayidx2 = getelementptr inbounds %struct.capstate, ptr addrspace("A") %state, i64 0, i32 0, i64 %i.08 + store ptr addrspace(200) %0, ptr addrspace("A") %arrayidx2, align 16 %inc = add nuw nsw i64 %i.08, 1 %exitcond = icmp ne i64 %inc, 25 br i1 %exitcond, label %for.body, label %for.end @@ -95,5 +95,5 @@ for.end: ; preds = %for.body } ; UTC_ARGS: --disable -; HYBRID-TODO: attributes #[[ATTR1]] = { no_preserve_cheri_tags } -; HYBRID-TODO: attributes #[[ATTR2]] = { must_preserve_cheri_tags } +; HYBRID: attributes #[[ATTR1]] = { no_preserve_cheri_tags } +; HYBRID: attributes #[[ATTR2]] = { must_preserve_cheri_tags }