From d98519715617a462c3ebadc778558b717354b6d2 Mon Sep 17 00:00:00 2001 From: Jinsong Ji Date: Tue, 22 Oct 2024 17:39:05 -0400 Subject: [PATCH 01/68] [NFC][Fuzzer] Refactor to avoid a false warning from gcc (#112944) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This is one of the many PRs to fix errors with LLVM_ENABLE_WERROR=on. Built by GCC 11. Refactor the code to avoid the false warning llvm-project/llvm/tools/llvm-isel-fuzzer/llvm-isel-fuzzer.cpp llvm-project/llvm/tools/llvm-isel-fuzzer/llvm-isel-fuzzer.cpp: In function ‘int LLVMFuzzerInitialize(int*, char***)’: llvm-project/llvm/tools/llvm-isel-fuzzer/llvm-isel-fuzzer.cpp:141:43: error: ISO C++ forbids zero-size array ‘argv’ [-Werror=pedantic] 141 | ExitOnError ExitOnErr(std::string(*argv[0]) + ": error:"); | --- llvm/tools/llvm-isel-fuzzer/llvm-isel-fuzzer.cpp | 9 +++++---- llvm/tools/llvm-opt-fuzzer/llvm-opt-fuzzer.cpp | 12 ++++++------ 2 files changed, 11 insertions(+), 10 deletions(-) diff --git a/llvm/tools/llvm-isel-fuzzer/llvm-isel-fuzzer.cpp b/llvm/tools/llvm-isel-fuzzer/llvm-isel-fuzzer.cpp index 742f7b94e116f3..cc7d0869da05ef 100644 --- a/llvm/tools/llvm-isel-fuzzer/llvm-isel-fuzzer.cpp +++ b/llvm/tools/llvm-isel-fuzzer/llvm-isel-fuzzer.cpp @@ -115,17 +115,18 @@ static void handleLLVMFatalError(void *, const char *Message, bool) { extern "C" LLVM_ATTRIBUTE_USED int LLVMFuzzerInitialize(int *argc, char ***argv) { EnableDebugBuffering = true; + StringRef ExecName = *argv[0]; InitializeAllTargets(); InitializeAllTargetMCs(); InitializeAllAsmPrinters(); InitializeAllAsmParsers(); - handleExecNameEncodedBEOpts(*argv[0]); + handleExecNameEncodedBEOpts(ExecName); parseFuzzerCLOpts(*argc, *argv); if (TargetTriple.empty()) { - errs() << *argv[0] << ": -mtriple must be specified\n"; + errs() << ExecName << ": -mtriple must be specified\n"; exit(1); } @@ -135,10 +136,10 @@ extern "C" LLVM_ATTRIBUTE_USED int LLVMFuzzerInitialize(int *argc, if (auto Level = CodeGenOpt::parseLevel(OptLevel)) { OLvl = *Level; } else { - errs() << argv[0] << ": invalid optimization level.\n"; + errs() << ExecName << ": invalid optimization level.\n"; return 1; } - ExitOnError ExitOnErr(std::string(*argv[0]) + ": error:"); + ExitOnError ExitOnErr(std::string(ExecName) + ": error:"); TM = ExitOnErr(codegen::createTargetMachineForTriple( Triple::normalize(TargetTriple), OLvl)); assert(TM && "Could not allocate target machine!"); diff --git a/llvm/tools/llvm-opt-fuzzer/llvm-opt-fuzzer.cpp b/llvm/tools/llvm-opt-fuzzer/llvm-opt-fuzzer.cpp index fcccf0e07ef8d2..a4af4b4a118fd4 100644 --- a/llvm/tools/llvm-opt-fuzzer/llvm-opt-fuzzer.cpp +++ b/llvm/tools/llvm-opt-fuzzer/llvm-opt-fuzzer.cpp @@ -175,6 +175,7 @@ static void handleLLVMFatalError(void *, const char *Message, bool) { extern "C" LLVM_ATTRIBUTE_USED int LLVMFuzzerInitialize(int *argc, char ***argv) { EnableDebugBuffering = true; + StringRef ExecName = *argv[0]; // Make sure we print the summary and the current unit when LLVM errors out. install_fatal_error_handler(handleLLVMFatalError, nullptr); @@ -188,17 +189,16 @@ extern "C" LLVM_ATTRIBUTE_USED int LLVMFuzzerInitialize(int *argc, // Parse input options // - handleExecNameEncodedOptimizerOpts(*argv[0]); + handleExecNameEncodedOptimizerOpts(ExecName); parseFuzzerCLOpts(*argc, *argv); // Create TargetMachine // - if (TargetTripleStr.empty()) { - errs() << *argv[0] << ": -mtriple must be specified\n"; + errs() << ExecName << ": -mtriple must be specified\n"; exit(1); } - ExitOnError ExitOnErr(std::string(*argv[0]) + ": error:"); + ExitOnError ExitOnErr(std::string(ExecName) + ": error:"); TM = ExitOnErr(codegen::createTargetMachineForTriple( Triple::normalize(TargetTripleStr))); @@ -206,14 +206,14 @@ extern "C" LLVM_ATTRIBUTE_USED int LLVMFuzzerInitialize(int *argc, // if (PassPipeline.empty()) { - errs() << *argv[0] << ": at least one pass should be specified\n"; + errs() << ExecName << ": at least one pass should be specified\n"; exit(1); } PassBuilder PB(TM.get()); ModulePassManager MPM; if (auto Err = PB.parsePassPipeline(MPM, PassPipeline)) { - errs() << *argv[0] << ": " << toString(std::move(Err)) << "\n"; + errs() << ExecName << ": " << toString(std::move(Err)) << "\n"; exit(1); } From 0ffa29fe8152e247eea87017e8c5aeedc6329c15 Mon Sep 17 00:00:00 2001 From: Jan Svoboda Date: Tue, 22 Oct 2024 15:08:02 -0700 Subject: [PATCH 02/68] [clang][modules] Timestamp PCM files when writing (#112452) Clang uses timestamp files to track the last time an implicitly-built PCM file was verified to be up-to-date with regard to its inputs. With `-fbuild-session-{file,timestamp}=` and `-fmodules-validate-once-per-build-session` this reduces the number of times a PCM file is checked per "build session". The behavior I'm seeing with the current scheme is that when lots of Clang instances wait for the same PCM to be built, they race to validate it as soon as the file lock gets released, causing lots of concurrent IO. This patch makes it so that the timestamp is written by the same Clang instance responsible for building the PCM while still holding the lock. This makes it so that whenever a PCM file gets compiled, it's never re-validated in the same build session. I believe this is as sound as the current scheme. One thing to be aware of is that there might be a time interval between accessing input file N and writing the timestamp file, where changes to input files 0..(D); } + +void serialization::updateModuleTimestamp(StringRef ModuleFilename) { + // Overwrite the timestamp file contents so that file's mtime changes. + std::error_code EC; + llvm::raw_fd_ostream OS(ModuleFile::getTimestampFilename(ModuleFilename), EC, + llvm::sys::fs::OF_TextWithCRLF); + if (EC) + return; + OS << "Timestamp file\n"; + OS.close(); + OS.clear_error(); // Avoid triggering a fatal error. +} diff --git a/clang/lib/Serialization/ASTCommon.h b/clang/lib/Serialization/ASTCommon.h index 0230908d3e0528..2a765eafe08951 100644 --- a/clang/lib/Serialization/ASTCommon.h +++ b/clang/lib/Serialization/ASTCommon.h @@ -15,6 +15,7 @@ #include "clang/AST/ASTContext.h" #include "clang/AST/DeclFriend.h" +#include "clang/Basic/LLVM.h" #include "clang/Serialization/ASTBitCodes.h" namespace clang { @@ -100,6 +101,8 @@ inline bool isPartOfPerModuleInitializer(const Decl *D) { return false; } +void updateModuleTimestamp(StringRef ModuleFilename); + } // namespace serialization } // namespace clang diff --git a/clang/lib/Serialization/ASTReader.cpp b/clang/lib/Serialization/ASTReader.cpp index 60b708067dc597..7d9170e7f0b479 100644 --- a/clang/lib/Serialization/ASTReader.cpp +++ b/clang/lib/Serialization/ASTReader.cpp @@ -4416,19 +4416,6 @@ bool ASTReader::isGlobalIndexUnavailable() const { !hasGlobalIndex() && TriedLoadingGlobalIndex; } -static void updateModuleTimestamp(ModuleFile &MF) { - // Overwrite the timestamp file contents so that file's mtime changes. - std::string TimestampFilename = MF.getTimestampFilename(); - std::error_code EC; - llvm::raw_fd_ostream OS(TimestampFilename, EC, - llvm::sys::fs::OF_TextWithCRLF); - if (EC) - return; - OS << "Timestamp file\n"; - OS.close(); - OS.clear_error(); // Avoid triggering a fatal error. -} - /// Given a cursor at the start of an AST file, scan ahead and drop the /// cursor into the start of the given block ID, returning false on success and /// true on failure. @@ -4707,7 +4694,7 @@ ASTReader::ASTReadResult ASTReader::ReadAST(StringRef FileName, ModuleKind Type, ImportedModule &M = Loaded[I]; if (M.Mod->Kind == MK_ImplicitModule && M.Mod->InputFilesValidationTimestamp < HSOpts.BuildSessionTimestamp) - updateModuleTimestamp(*M.Mod); + updateModuleTimestamp(M.Mod->FileName); } } diff --git a/clang/lib/Serialization/ASTWriter.cpp b/clang/lib/Serialization/ASTWriter.cpp index 938d7b525cb959..494890284d2f2c 100644 --- a/clang/lib/Serialization/ASTWriter.cpp +++ b/clang/lib/Serialization/ASTWriter.cpp @@ -4905,6 +4905,12 @@ ASTFileSignature ASTWriter::WriteAST(Sema &SemaRef, StringRef OutputFile, this->BaseDirectory.clear(); WritingAST = false; + + if (WritingModule && SemaRef.PP.getHeaderSearchInfo() + .getHeaderSearchOpts() + .ModulesValidateOncePerBuildSession) + updateModuleTimestamp(OutputFile); + if (ShouldCacheASTInMemory) { // Construct MemoryBuffer and update buffer manager. ModuleCache.addBuiltPCM(OutputFile, diff --git a/clang/lib/Serialization/ModuleManager.cpp b/clang/lib/Serialization/ModuleManager.cpp index e74a16b6368028..ba78c9ef5af67f 100644 --- a/clang/lib/Serialization/ModuleManager.cpp +++ b/clang/lib/Serialization/ModuleManager.cpp @@ -170,7 +170,8 @@ ModuleManager::addModule(StringRef FileName, ModuleKind Type, NewModule->InputFilesValidationTimestamp = 0; if (NewModule->Kind == MK_ImplicitModule) { - std::string TimestampFilename = NewModule->getTimestampFilename(); + std::string TimestampFilename = + ModuleFile::getTimestampFilename(NewModule->FileName); llvm::vfs::Status Status; // A cached stat value would be fine as well. if (!FileMgr.getNoncachedStatValue(TimestampFilename, Status)) From fe480cf9232c91d4fad883b4d2748dcc5a6fc0c5 Mon Sep 17 00:00:00 2001 From: jofrn Date: Tue, 22 Oct 2024 15:17:52 -0700 Subject: [PATCH 03/68] [ARM] Use proper types for these records. (#113370) llvm#112904 will add typechecking to submulticlass arguments, and these ones are currently mistyped. --- llvm/lib/Target/ARM/ARMInstrMVE.td | 16 ++++++++-------- llvm/lib/Target/ARM/ARMInstrNEON.td | 2 +- 2 files changed, 9 insertions(+), 9 deletions(-) diff --git a/llvm/lib/Target/ARM/ARMInstrMVE.td b/llvm/lib/Target/ARM/ARMInstrMVE.td index 12c3968b9cecea..04d5d00eef10e6 100644 --- a/llvm/lib/Target/ARM/ARMInstrMVE.td +++ b/llvm/lib/Target/ARM/ARMInstrMVE.td @@ -1998,7 +1998,7 @@ class MVE_VQxDMULH_Base size, bit rounding, def MVEvqdmulh : SDNode<"ARMISD::VQDMULH", SDTIntBinOp>; multiclass MVE_VQxDMULH_m { def "" : MVE_VQxDMULH_Base; defvar Inst = !cast(NAME); @@ -2199,7 +2199,7 @@ def subnsw : PatFrag<(ops node:$lhs, node:$rhs), }]>; multiclass MVE_VRHADD_m { + SDPatternOperator unpred_op, Intrinsic PredInt> { def "" : MVE_VRHADD_Base; defvar Inst = !cast(NAME); defm : MVE_TwoOpPattern(NAME)>; @@ -2303,7 +2303,7 @@ class MVE_VHSUB_ size, : MVE_VHADDSUB<"vhsub", suffix, U, 0b1, size, pattern>; multiclass MVE_VHADD_m { def "" : MVE_VHADD_; defvar Inst = !cast(NAME); @@ -2335,7 +2335,7 @@ defm MVE_VHADDu16 : MVE_VHADD; defm MVE_VHADDu32 : MVE_VHADD; multiclass MVE_VHSUB_m { def "" : MVE_VHSUB_; defvar Inst = !cast(NAME); @@ -4794,7 +4794,7 @@ class MVE_VxMULH size, bit round, let validForTailPredication = 1; } -multiclass MVE_VxMULH_m { def "" : MVE_VxMULH; defvar Inst = !cast(NAME); @@ -5370,8 +5370,8 @@ class MVE_VxADDSUB_qr { +multiclass MVE_VHADDSUB_qr_m { def "" : MVE_VxADDSUB_qr; defm : MVE_TwoOpPatternDup(NAME)>; defm : MVE_vec_scalar_int_pat_m(NAME), @@ -5576,7 +5576,7 @@ class MVE_VxxMUL_qr { + SDPatternOperator Op, Intrinsic int_unpred, Intrinsic int_pred> { def "" : MVE_VxxMUL_qr; let Predicates = [HasMVEInt] in { diff --git a/llvm/lib/Target/ARM/ARMInstrNEON.td b/llvm/lib/Target/ARM/ARMInstrNEON.td index 48dcbdb137123a..20c52206fd3cd6 100644 --- a/llvm/lib/Target/ARM/ARMInstrNEON.td +++ b/llvm/lib/Target/ARM/ARMInstrNEON.td @@ -4906,7 +4906,7 @@ let Predicates = [HasMatMulInt8] in { } multiclass SUDOTLane - : N3VMixedDotLane { + : N3VMixedDotLane { def : Pat< (AccumTy (int_arm_neon_usdot (AccumTy RegTy:$Vd), (InputTy (bitconvert (AccumTy From 2e0506f83bfde6db93454bdf28e4a71c160d4f5b Mon Sep 17 00:00:00 2001 From: Florian Mayer Date: Tue, 22 Oct 2024 15:23:13 -0700 Subject: [PATCH 04/68] [NFC] [MTE] Remove useless yaml2obj from test (#113374) We already have the .o, there is no reason to go .o -> YAML -> .o --- llvm/test/MC/AArch64/global-tagging.ll | 1 - 1 file changed, 1 deletion(-) diff --git a/llvm/test/MC/AArch64/global-tagging.ll b/llvm/test/MC/AArch64/global-tagging.ll index 4961ec04c5fba4..c8b3f52401dc11 100644 --- a/llvm/test/MC/AArch64/global-tagging.ll +++ b/llvm/test/MC/AArch64/global-tagging.ll @@ -11,7 +11,6 @@ ; RUN: obj2yaml %t.o -o %t.yaml ; RUN: FileCheck %s --input-file=%t.yaml --check-prefix=CHECK-YAML -; RUN: yaml2obj %t.yaml -o %t.o ; RUN: llvm-readelf -r %t.o | FileCheck %s --check-prefix=CHECK-RELOCS ;; Check we don't create relocations referencing a section symbol for sanitize_memtag globals. From b4fcaa137f057e68a9011b26c11627a16a8c9374 Mon Sep 17 00:00:00 2001 From: Michael O'Farrell Date: Tue, 22 Oct 2024 16:01:13 -0700 Subject: [PATCH 05/68] [PGO][SampledInstr] Correct off by 1s and allow 100% sampling (#113350) This corrects a couple off by ones related to the sampling of **instrumented** counters, and enables setting 100% rates for burst sampling (burst duration = period). Off by ones: Prior to this change it was impossible to set a period of 65535 because this was converted to fast sampling which rollsover at USHRT_MAX + 1 (65536). Similarly the burst durations would collect burst duration + 1 counts as they used an ULE comparison. 100% sampling: Although this is not useful for a productionized use case, it does allow for more deterministic testing with the sampling checks in place. After all the off by ones are fixed, allowing for 100% sampling is a matter of letting burst duration = period. --- .../Instrumentation/InstrProfiling.cpp | 95 +++++++++++-------- .../PGOProfile/counter_promo_sampling.ll | 2 +- .../Transforms/PGOProfile/cspgo_sample.ll | 4 +- .../instrprof_burst_sampling_fast.ll | 7 +- .../instrprof_burst_sampling_full.ll | 12 ++- .../instrprof_burst_sampling_full_intsize.ll | 6 +- .../PGOProfile/instrprof_simple_sampling.ll | 12 +-- 7 files changed, 81 insertions(+), 57 deletions(-) diff --git a/llvm/lib/Transforms/Instrumentation/InstrProfiling.cpp b/llvm/lib/Transforms/Instrumentation/InstrProfiling.cpp index d7d809dfdd5f65..2a6bda839d36ed 100644 --- a/llvm/lib/Transforms/Instrumentation/InstrProfiling.cpp +++ b/llvm/lib/Transforms/Instrumentation/InstrProfiling.cpp @@ -181,24 +181,52 @@ static cl::opt SampledInstr("sampled-instrumentation", cl::ZeroOrMore, static cl::opt SampledInstrPeriod( "sampled-instr-period", - cl::desc("Set the profile instrumentation sample period. For each sample " - "period, a fixed number of consecutive samples will be recorded. " - "The number is controlled by 'sampled-instr-burst-duration' flag. " - "The default sample period of 65535 is optimized for generating " - "efficient code that leverages unsigned integer wrapping in " - "overflow."), - cl::init(65535)); + cl::desc("Set the profile instrumentation sample period. A sample period " + "of 0 is invalid. For each sample period, a fixed number of " + "consecutive samples will be recorded. The number is controlled " + "by 'sampled-instr-burst-duration' flag. The default sample " + "period of 65536 is optimized for generating efficient code that " + "leverages unsigned short integer wrapping in overflow, but this " + "is disabled under simple sampling (burst duration = 1)."), + cl::init(USHRT_MAX + 1)); static cl::opt SampledInstrBurstDuration( "sampled-instr-burst-duration", cl::desc("Set the profile instrumentation burst duration, which can range " - "from 0 to one less than the value of 'sampled-instr-period'. " + "from 1 to the value of 'sampled-instr-period' (0 is invalid). " "This number of samples will be recorded for each " - "'sampled-instr-period' count update. Setting to 1 enables " - "simple sampling, in which case it is recommended to set " + "'sampled-instr-period' count update. Setting to 1 enables simple " + "sampling, in which case it is recommended to set " "'sampled-instr-period' to a prime number."), cl::init(200)); +struct SampledInstrumentationConfig { + unsigned BurstDuration; + unsigned Period; + bool UseShort; + bool IsSimpleSampling; + bool IsFastSampling; +}; + +static SampledInstrumentationConfig getSampledInstrumentationConfig() { + SampledInstrumentationConfig config; + config.BurstDuration = SampledInstrBurstDuration.getValue(); + config.Period = SampledInstrPeriod.getValue(); + if (config.BurstDuration > config.Period) + report_fatal_error( + "SampledBurstDuration must be less than or equal to SampledPeriod"); + if (config.Period == 0 || config.BurstDuration == 0) + report_fatal_error( + "SampledPeriod and SampledBurstDuration must be greater than 0"); + config.IsSimpleSampling = (config.BurstDuration == 1); + // If (BurstDuration == 1 && Period == 65536), generate the simple sampling + // style code. + config.IsFastSampling = + (!config.IsSimpleSampling && config.Period == USHRT_MAX + 1); + config.UseShort = (config.Period <= USHRT_MAX) || config.IsFastSampling; + return config; +} + using LoadStorePair = std::pair; static uint64_t getIntModuleFlagOrZero(const Module &M, StringRef Flag) { @@ -665,7 +693,7 @@ PreservedAnalyses InstrProfilingLoweringPass::run(Module &M, // (1) Full burst sampling: We transform: // Increment_Instruction; // to: -// if (__llvm_profile_sampling__ < SampledInstrBurstDuration) { +// if (__llvm_profile_sampling__ <= SampledInstrBurstDuration - 1) { // Increment_Instruction; // } // __llvm_profile_sampling__ += 1; @@ -680,14 +708,14 @@ PreservedAnalyses InstrProfilingLoweringPass::run(Module &M, // "__llvm_profile_sampling__" variable is an unsigned type, meaning it will // wrap around to zero when overflows. In this case, the second check is // unnecessary, so we won't generate check2 when the SampledInstrPeriod is -// set to 65535 (64K - 1). The code after: -// if (__llvm_profile_sampling__ < SampledInstrBurstDuration) { +// set to 65536 (64K). The code after: +// if (__llvm_profile_sampling__ <= SampledInstrBurstDuration - 1) { // Increment_Instruction; // } // __llvm_profile_sampling__ += 1; // // (3) Simple sampling: -// When SampledInstrBurstDuration sets to 1, we do a simple sampling: +// When SampledInstrBurstDuration is set to 1, we do a simple sampling: // __llvm_profile_sampling__ += 1; // if (__llvm_profile_sampling__ >= SampledInstrPeriod) { // __llvm_profile_sampling__ = 0; @@ -706,27 +734,16 @@ void InstrLowerer::doSampling(Instruction *I) { if (!isSamplingEnabled()) return; - unsigned SampledBurstDuration = SampledInstrBurstDuration.getValue(); - unsigned SampledPeriod = SampledInstrPeriod.getValue(); - if (SampledBurstDuration >= SampledPeriod) { - report_fatal_error( - "SampledPeriod needs to be greater than SampledBurstDuration"); - } - bool UseShort = (SampledPeriod <= USHRT_MAX); - bool IsSimpleSampling = (SampledBurstDuration == 1); - // If (SampledBurstDuration == 1 && SampledPeriod == 65535), generate - // the simple sampling style code. - bool IsFastSampling = (!IsSimpleSampling && SampledPeriod == 65535); - - auto GetConstant = [UseShort](IRBuilder<> &Builder, uint32_t C) { - if (UseShort) + SampledInstrumentationConfig config = getSampledInstrumentationConfig(); + auto GetConstant = [&config](IRBuilder<> &Builder, uint32_t C) { + if (config.UseShort) return Builder.getInt16(C); else return Builder.getInt32(C); }; IntegerType *SamplingVarTy; - if (UseShort) + if (config.UseShort) SamplingVarTy = Type::getInt16Ty(M.getContext()); else SamplingVarTy = Type::getInt32Ty(M.getContext()); @@ -741,18 +758,18 @@ void InstrLowerer::doSampling(Instruction *I) { MDNode *BranchWeight; IRBuilder<> CondBuilder(I); auto *LoadSamplingVar = CondBuilder.CreateLoad(SamplingVarTy, SamplingVar); - if (IsSimpleSampling) { + if (config.IsSimpleSampling) { // For the simple sampling, just create the load and increments. IRBuilder<> IncBuilder(I); NewSamplingVarVal = IncBuilder.CreateAdd(LoadSamplingVar, GetConstant(IncBuilder, 1)); SamplingVarIncr = IncBuilder.CreateStore(NewSamplingVarVal, SamplingVar); } else { - // For the bust-sampling, create the conditonal update. + // For the burst-sampling, create the conditional update. auto *DurationCond = CondBuilder.CreateICmpULE( - LoadSamplingVar, GetConstant(CondBuilder, SampledBurstDuration)); + LoadSamplingVar, GetConstant(CondBuilder, config.BurstDuration - 1)); BranchWeight = MDB.createBranchWeights( - SampledBurstDuration, SampledPeriod + 1 - SampledBurstDuration); + config.BurstDuration, config.Period - config.BurstDuration); Instruction *ThenTerm = SplitBlockAndInsertIfThen( DurationCond, I, /* Unreachable */ false, BranchWeight); IRBuilder<> IncBuilder(I); @@ -762,20 +779,20 @@ void InstrLowerer::doSampling(Instruction *I) { I->moveBefore(ThenTerm); } - if (IsFastSampling) + if (config.IsFastSampling) return; - // Create the condtion for checking the period. + // Create the condition for checking the period. Instruction *ThenTerm, *ElseTerm; IRBuilder<> PeriodCondBuilder(SamplingVarIncr); auto *PeriodCond = PeriodCondBuilder.CreateICmpUGE( - NewSamplingVarVal, GetConstant(PeriodCondBuilder, SampledPeriod)); - BranchWeight = MDB.createBranchWeights(1, SampledPeriod); + NewSamplingVarVal, GetConstant(PeriodCondBuilder, config.Period)); + BranchWeight = MDB.createBranchWeights(1, config.Period - 1); SplitBlockAndInsertIfThenElse(PeriodCond, SamplingVarIncr, &ThenTerm, &ElseTerm, BranchWeight); // For the simple sampling, the counter update happens in sampling var reset. - if (IsSimpleSampling) + if (config.IsSimpleSampling) I->moveBefore(ThenTerm); IRBuilder<> ResetBuilder(ThenTerm); @@ -2138,7 +2155,7 @@ void createProfileSamplingVar(Module &M) { const StringRef VarName(INSTR_PROF_QUOTE(INSTR_PROF_PROFILE_SAMPLING_VAR)); IntegerType *SamplingVarTy; Constant *ValueZero; - if (SampledInstrPeriod.getValue() <= USHRT_MAX) { + if (getSampledInstrumentationConfig().UseShort) { SamplingVarTy = Type::getInt16Ty(M.getContext()); ValueZero = Constant::getIntegerValue(SamplingVarTy, APInt(16, 0)); } else { diff --git a/llvm/test/Transforms/PGOProfile/counter_promo_sampling.ll b/llvm/test/Transforms/PGOProfile/counter_promo_sampling.ll index 9d083fe04015e6..43377f695be675 100644 --- a/llvm/test/Transforms/PGOProfile/counter_promo_sampling.ll +++ b/llvm/test/Transforms/PGOProfile/counter_promo_sampling.ll @@ -6,7 +6,7 @@ define void @foo(i32 %n, i32 %N) { ; SAMPLING-LABEL: @foo ; SAMPLING: %[[VV0:[0-9]+]] = load i16, ptr @__llvm_profile_sampling, align 2 -; SAMPLING: %[[VV1:[0-9]+]] = icmp ule i16 %[[VV0]], 200 +; SAMPLING: %[[VV1:[0-9]+]] = icmp ule i16 %[[VV0]], 199 ; SAMPLING: br i1 %[[VV1]], label {{.*}}, label {{.*}}, !prof !0 ; SAMPLING: {{.*}} = load {{.*}} @__profc_foo{{.*}} 3) ; SAMPLING-NEXT: add diff --git a/llvm/test/Transforms/PGOProfile/cspgo_sample.ll b/llvm/test/Transforms/PGOProfile/cspgo_sample.ll index 97ad4d00c9d9c0..07f1e2d8a09ee6 100644 --- a/llvm/test/Transforms/PGOProfile/cspgo_sample.ll +++ b/llvm/test/Transforms/PGOProfile/cspgo_sample.ll @@ -53,7 +53,7 @@ for.end: ; CSGEN-LABEL: @foo ; CSGEN: [[TMP0:%.*]] = load i16, ptr @__llvm_profile_sampling, align 2 -; CSGEN-NEXT: [[TMP1:%.*]] = icmp ult i16 [[TMP0]], 201 +; CSGEN-NEXT: [[TMP1:%.*]] = icmp ult i16 [[TMP0]], 200 ; CSGEN-NEXT: br i1 [[TMP1]], label %{{.*}}, label %{{.*}}, !prof [[PROF:![0-9]+]] ; CSGEN: [[TMP2:%.*]] = add i16 {{.*}}, 1 ; CSGEN-NEXT: store i16 [[TMP2]], ptr @__llvm_profile_sampling, align 2 @@ -67,7 +67,7 @@ entry: } ; CSGEN-LABEL: @main ; CSGEN: [[TMP0:%.*]] = load i16, ptr @__llvm_profile_sampling, align 2 -; CSGEN-NEXT: [[TMP1:%.*]] = icmp ult i16 [[TMP0]], 201 +; CSGEN-NEXT: [[TMP1:%.*]] = icmp ult i16 [[TMP0]], 200 ; CSGEN-NEXT: br i1 [[TMP1]], label %{{.*}}, label %{{.*}}, !prof [[PROF:![0-9]+]] ; CSGEN: [[TMP2:%.*]] = add i16 {{.*}}, 1 ; CSGEN-NEXT: store i16 [[TMP2]], ptr @__llvm_profile_sampling, align 2 diff --git a/llvm/test/Transforms/PGOProfile/instrprof_burst_sampling_fast.ll b/llvm/test/Transforms/PGOProfile/instrprof_burst_sampling_fast.ll index dcc1e805ba6f64..56d8364d8f5431 100644 --- a/llvm/test/Transforms/PGOProfile/instrprof_burst_sampling_fast.ll +++ b/llvm/test/Transforms/PGOProfile/instrprof_burst_sampling_fast.ll @@ -1,5 +1,6 @@ ; RUN: opt < %s --passes=instrprof --sampled-instrumentation -S | FileCheck %s --check-prefixes=SAMPLE-VAR,SAMPLE-CODE,SAMPLE-DURATION,SAMPLE-WEIGHT ; RUN: opt < %s --passes=instrprof --sampled-instrumentation --sampled-instr-burst-duration=100 -S | FileCheck %s --check-prefixes=SAMPLE-VAR,SAMPLE-CODE,SAMPLE-DURATION100,SAMPLE-WEIGHT100 +; RUN: opt < %s --passes=instrprof --sampled-instrumentation --sampled-instr-burst-duration=65536 -S | FileCheck %s --check-prefixes=SAMPLE-VAR,SAMPLE-CODE,UNSAMPLED-DURATION,UNSAMPLED-WEIGHT target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" target triple = "x86_64-unknown-linux-gnu" @@ -23,8 +24,9 @@ define void @f() { ; SAMPLE-CODE-LABEL: @f( ; SAMPLE-CODE: entry: ; SAMPLE-CODE-NEXT: [[TMP0:%.*]] = load i16, ptr @__llvm_profile_sampling, align 2 -; SAMPLE-DURATION: [[TMP1:%.*]] = icmp ule i16 [[TMP0]], 200 -; SAMPLE-DURATION100: [[TMP1:%.*]] = icmp ule i16 [[TMP0]], 100 +; SAMPLE-DURATION: [[TMP1:%.*]] = icmp ule i16 [[TMP0]], 199 +; SAMPLE-DURATION100: [[TMP1:%.*]] = icmp ule i16 [[TMP0]], 99 +; UNSAMPLED-DURATION: [[TMP1:%.*]] = icmp ule i16 [[TMP0]], -1 ; SAMPLE-CODE: br i1 [[TMP1]], label %[[TMP2:.*]], label %[[TMP4:.*]], !prof !0 ; SAMPLE-CODE: [[TMP2]]: ; SAMPLE-CODE-NEXT: [[PGOCOUNT:%.*]] = load i64, ptr @__profc_f @@ -43,5 +45,6 @@ entry: ; SAMPLE-WEIGHT: !0 = !{!"branch_weights", i32 200, i32 65336} ; SAMPLE-WEIGHT100: !0 = !{!"branch_weights", i32 100, i32 65436} +; UNSAMPLED-WEIGHT: !0 = !{!"branch_weights", i32 65536, i32 0} declare void @llvm.instrprof.increment(i8*, i64, i32, i32) diff --git a/llvm/test/Transforms/PGOProfile/instrprof_burst_sampling_full.ll b/llvm/test/Transforms/PGOProfile/instrprof_burst_sampling_full.ll index 57d1a0cd33fbe8..726df2886ca840 100644 --- a/llvm/test/Transforms/PGOProfile/instrprof_burst_sampling_full.ll +++ b/llvm/test/Transforms/PGOProfile/instrprof_burst_sampling_full.ll @@ -1,5 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5 -; RUN: opt < %s --passes=instrprof -sampled-instrumentation --sampled-instr-period=1009 --sampled-instr-burst-duration=32 -S | FileCheck %s +; RUN: opt < %s --passes=instrprof -sampled-instrumentation --sampled-instr-period=1009 --sampled-instr-burst-duration=32 -S | FileCheck %s --check-prefixes=CHECK,CHECK-32 +; RUN: opt < %s --passes=instrprof -sampled-instrumentation --sampled-instr-period=1009 --sampled-instr-burst-duration=1009 -S | FileCheck %s --check-prefixes=CHECK,CHECK-UNSAMPLED target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" target triple = "x86_64-unknown-linux-gnu" @@ -13,7 +14,8 @@ define void @f() { ; CHECK-LABEL: define void @f() { ; CHECK-NEXT: [[ENTRY:.*:]] ; CHECK-NEXT: [[TMP0:%.*]] = load i16, ptr @__llvm_profile_sampling, align 2 -; CHECK-NEXT: [[TMP1:%.*]] = icmp ule i16 [[TMP0]], 32 +; CHECK-32-NEXT: [[TMP1:%.*]] = icmp ule i16 [[TMP0]], 31 +; CHECK-UNSAMPLED-NEXT: [[TMP1:%.*]] = icmp ule i16 [[TMP0]], 1008 ; CHECK-NEXT: br i1 [[TMP1]], label %[[BB2:.*]], label %[[BB4:.*]], !prof [[PROF0:![0-9]+]] ; CHECK: [[BB2]]: ; CHECK-NEXT: [[PGOCOUNT:%.*]] = load i64, ptr @__profc_f, align 8 @@ -40,6 +42,8 @@ entry: declare void @llvm.instrprof.increment(i8*, i64, i32, i32) ;. -; CHECK: [[PROF0]] = !{!"branch_weights", i32 32, i32 978} -; CHECK: [[PROF1]] = !{!"branch_weights", i32 1, i32 1009} +; CHECK-32: [[PROF0]] = !{!"branch_weights", i32 32, i32 977} +; CHECK-32: [[PROF1]] = !{!"branch_weights", i32 1, i32 1008} +; CHECK-UNSAMPLED: [[PROF0]] = !{!"branch_weights", i32 1009, i32 0} +; CHECK-UNSAMPLED: [[PROF1]] = !{!"branch_weights", i32 1, i32 1008} ;. diff --git a/llvm/test/Transforms/PGOProfile/instrprof_burst_sampling_full_intsize.ll b/llvm/test/Transforms/PGOProfile/instrprof_burst_sampling_full_intsize.ll index 1ad889524bc6a8..2d6323c1034715 100644 --- a/llvm/test/Transforms/PGOProfile/instrprof_burst_sampling_full_intsize.ll +++ b/llvm/test/Transforms/PGOProfile/instrprof_burst_sampling_full_intsize.ll @@ -13,7 +13,7 @@ define void @f() { ; CHECK-LABEL: define void @f() { ; CHECK-NEXT: [[ENTRY:.*:]] ; CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr @__llvm_profile_sampling, align 4 -; CHECK-NEXT: [[TMP1:%.*]] = icmp ule i32 [[TMP0]], 3000 +; CHECK-NEXT: [[TMP1:%.*]] = icmp ule i32 [[TMP0]], 2999 ; CHECK-NEXT: br i1 [[TMP1]], label %[[BB2:.*]], label %[[BB4:.*]], !prof [[PROF0:![0-9]+]] ; CHECK: [[BB2]]: ; CHECK-NEXT: [[PGOCOUNT:%.*]] = load i64, ptr @__profc_f, align 8 @@ -40,6 +40,6 @@ entry: declare void @llvm.instrprof.increment(i8*, i64, i32, i32) ;. -; CHECK: [[PROF0]] = !{!"branch_weights", i32 3000, i32 997020} -; CHECK: [[PROF1]] = !{!"branch_weights", i32 1, i32 1000019} +; CHECK: [[PROF0]] = !{!"branch_weights", i32 3000, i32 997019} +; CHECK: [[PROF1]] = !{!"branch_weights", i32 1, i32 1000018} ;. diff --git a/llvm/test/Transforms/PGOProfile/instrprof_simple_sampling.ll b/llvm/test/Transforms/PGOProfile/instrprof_simple_sampling.ll index 8e846bbf1d9821..5ef93af881c4b3 100644 --- a/llvm/test/Transforms/PGOProfile/instrprof_simple_sampling.ll +++ b/llvm/test/Transforms/PGOProfile/instrprof_simple_sampling.ll @@ -31,18 +31,18 @@ define void @f() { ; ; DEFAULTPERIOD-LABEL: define void @f() { ; DEFAULTPERIOD-NEXT: [[ENTRY:.*:]] -; DEFAULTPERIOD-NEXT: [[TMP0:%.*]] = load i16, ptr @__llvm_profile_sampling, align 2 -; DEFAULTPERIOD-NEXT: [[TMP1:%.*]] = add i16 [[TMP0]], 1 -; DEFAULTPERIOD-NEXT: [[TMP2:%.*]] = icmp uge i16 [[TMP1]], -1 +; DEFAULTPERIOD-NEXT: [[TMP0:%.*]] = load i32, ptr @__llvm_profile_sampling, align 4 +; DEFAULTPERIOD-NEXT: [[TMP1:%.*]] = add i32 [[TMP0]], 1 +; DEFAULTPERIOD-NEXT: [[TMP2:%.*]] = icmp uge i32 [[TMP1]], 65536 ; DEFAULTPERIOD-NEXT: br i1 [[TMP2]], label %[[BB3:.*]], label %[[BB5:.*]], !prof [[PROF0:![0-9]+]] ; DEFAULTPERIOD: [[BB3]]: ; DEFAULTPERIOD-NEXT: [[PGOCOUNT:%.*]] = load i64, ptr @__profc_f, align 8 ; DEFAULTPERIOD-NEXT: [[TMP4:%.*]] = add i64 [[PGOCOUNT]], 1 ; DEFAULTPERIOD-NEXT: store i64 [[TMP4]], ptr @__profc_f, align 8 -; DEFAULTPERIOD-NEXT: store i16 0, ptr @__llvm_profile_sampling, align 2 +; DEFAULTPERIOD-NEXT: store i32 0, ptr @__llvm_profile_sampling, align 4 ; DEFAULTPERIOD-NEXT: br label %[[BB6:.*]] ; DEFAULTPERIOD: [[BB5]]: -; DEFAULTPERIOD-NEXT: store i16 [[TMP1]], ptr @__llvm_profile_sampling, align 2 +; DEFAULTPERIOD-NEXT: store i32 [[TMP1]], ptr @__llvm_profile_sampling, align 4 ; DEFAULTPERIOD-NEXT: br label %[[BB6]] ; DEFAULTPERIOD: [[BB6]]: ; DEFAULTPERIOD-NEXT: ret void @@ -54,7 +54,7 @@ entry: declare void @llvm.instrprof.increment(i8*, i64, i32, i32) ;. -; PERIOD1009: [[PROF0]] = !{!"branch_weights", i32 1, i32 1009} +; PERIOD1009: [[PROF0]] = !{!"branch_weights", i32 1, i32 1008} ;. ; DEFAULTPERIOD: [[PROF0]] = !{!"branch_weights", i32 1, i32 65535} ;. From 8a12e0131f3d84b470fac63af042aa96a1b19f56 Mon Sep 17 00:00:00 2001 From: Justin Fargnoli Date: Tue, 22 Oct 2024 16:01:32 -0700 Subject: [PATCH 06/68] Revert "[LLVM] Add IRNormalizer Pass" (#113392) Reverts llvm/llvm-project#68176 Introduced BuildBot failure: https://github.com/llvm/llvm-project/pull/68176#issuecomment-2428243474 --- llvm/docs/Passes.rst | 8 - llvm/docs/ReleaseNotes.md | 5 - .../llvm/Transforms/Utils/IRNormalizer.h | 15 - llvm/lib/Passes/PassBuilder.cpp | 1 - llvm/lib/Passes/PassRegistry.def | 1 - llvm/lib/Transforms/Utils/CMakeLists.txt | 1 - llvm/lib/Transforms/Utils/IRNormalizer.cpp | 695 ------------------ .../IRNormalizer/naming-args-instr-blocks.ll | 18 - .../IRNormalizer/naming-arguments.ll | 13 - llvm/test/Transforms/IRNormalizer/naming.ll | 30 - .../regression-convergence-tokens.ll | 27 - .../regression-coro-elide-musttail.ll | 21 - .../IRNormalizer/regression-deoptimize.ll | 18 - .../regression-dont-hoist-deoptimize.ll | 20 - .../IRNormalizer/regression-infinite-loop.ll | 195 ----- .../IRNormalizer/reordering-basic.ll | 58 -- .../Transforms/IRNormalizer/reordering.ll | 163 ---- 17 files changed, 1289 deletions(-) delete mode 100644 llvm/include/llvm/Transforms/Utils/IRNormalizer.h delete mode 100644 llvm/lib/Transforms/Utils/IRNormalizer.cpp delete mode 100644 llvm/test/Transforms/IRNormalizer/naming-args-instr-blocks.ll delete mode 100644 llvm/test/Transforms/IRNormalizer/naming-arguments.ll delete mode 100644 llvm/test/Transforms/IRNormalizer/naming.ll delete mode 100644 llvm/test/Transforms/IRNormalizer/regression-convergence-tokens.ll delete mode 100644 llvm/test/Transforms/IRNormalizer/regression-coro-elide-musttail.ll delete mode 100644 llvm/test/Transforms/IRNormalizer/regression-deoptimize.ll delete mode 100644 llvm/test/Transforms/IRNormalizer/regression-dont-hoist-deoptimize.ll delete mode 100644 llvm/test/Transforms/IRNormalizer/regression-infinite-loop.ll delete mode 100644 llvm/test/Transforms/IRNormalizer/reordering-basic.ll delete mode 100644 llvm/test/Transforms/IRNormalizer/reordering.ll diff --git a/llvm/docs/Passes.rst b/llvm/docs/Passes.rst index 5e436db62be3a1..49f633e98d16fe 100644 --- a/llvm/docs/Passes.rst +++ b/llvm/docs/Passes.rst @@ -543,14 +543,6 @@ variables with initializers are marked as internal. An interprocedural variant of :ref:`Sparse Conditional Constant Propagation `. -``ir-normalizer``: Transforms IR into a normal form that's easier to diff ----------------------------------------------------------------------------- - -This pass aims to transform LLVM Modules into a normal form by reordering and -renaming instructions while preserving the same semantics. The normalizer makes -it easier to spot semantic differences while diffing two modules which have -undergone two different passes. - ``jump-threading``: Jump Threading ---------------------------------- diff --git a/llvm/docs/ReleaseNotes.md b/llvm/docs/ReleaseNotes.md index 0c4cd437dac0b4..c8f5d22c15472a 100644 --- a/llvm/docs/ReleaseNotes.md +++ b/llvm/docs/ReleaseNotes.md @@ -42,11 +42,6 @@ point (e.g. maybe you would like to give an example of the functionality, or simply have a lot to talk about), see the comment below for adding a new subsection. --> -* Added a new IRNormalizer pass which aims to transform LLVM modules into - a normal form by reordering and renaming instructions while preserving the - same semantics. The normalizer makes it easier to spot semantic differences - when diffing two modules which have undergone different passes. - * ... Op1 != 0 diff --git a/llvm/test/Transforms/InstCombine/and-or-icmps.ll b/llvm/test/Transforms/InstCombine/and-or-icmps.ll index eb4723c86542de..2b5d430e295757 100644 --- a/llvm/test/Transforms/InstCombine/and-or-icmps.ll +++ b/llvm/test/Transforms/InstCombine/and-or-icmps.ll @@ -3335,8 +3335,7 @@ define i1 @icmp_eq_or_z_or_pow2orz_fail_bad_pred2(i8 %x, i8 %y) { define i1 @and_slt_to_mask(i8 %x) { ; CHECK-LABEL: @and_slt_to_mask( -; CHECK-NEXT: [[TMP1:%.*]] = and i8 [[X:%.*]], -2 -; CHECK-NEXT: [[AND2:%.*]] = icmp eq i8 [[TMP1]], -128 +; CHECK-NEXT: [[AND2:%.*]] = icmp slt i8 [[X:%.*]], -126 ; CHECK-NEXT: ret i1 [[AND2]] ; %cmp = icmp slt i8 %x, -124 diff --git a/llvm/test/Transforms/InstCombine/icmp-signmask.ll b/llvm/test/Transforms/InstCombine/icmp-signmask.ll index bea8da2074ab0b..5424f7d7e8021f 100644 --- a/llvm/test/Transforms/InstCombine/icmp-signmask.ll +++ b/llvm/test/Transforms/InstCombine/icmp-signmask.ll @@ -3,8 +3,7 @@ define i1 @cmp_x_and_negp2_with_eq(i8 %x) { ; CHECK-LABEL: @cmp_x_and_negp2_with_eq( -; CHECK-NEXT: [[ANDX:%.*]] = and i8 [[X:%.*]], -2 -; CHECK-NEXT: [[R:%.*]] = icmp eq i8 [[ANDX]], -128 +; CHECK-NEXT: [[R:%.*]] = icmp slt i8 [[X:%.*]], -126 ; CHECK-NEXT: ret i1 [[R]] ; %andx = and i8 %x, -2 @@ -25,8 +24,7 @@ define i1 @cmp_x_and_negp2_with_eq_fail_not_signmask(i8 %x) { define <2 x i1> @cmp_x_and_negp2_with_ne(<2 x i8> %x) { ; CHECK-LABEL: @cmp_x_and_negp2_with_ne( -; CHECK-NEXT: [[ANDX:%.*]] = and <2 x i8> [[X:%.*]], -; CHECK-NEXT: [[R:%.*]] = icmp ne <2 x i8> [[ANDX]], +; CHECK-NEXT: [[R:%.*]] = icmp sgt <2 x i8> [[X:%.*]], ; CHECK-NEXT: ret <2 x i1> [[R]] ; %andx = and <2 x i8> %x, @@ -36,8 +34,7 @@ define <2 x i1> @cmp_x_and_negp2_with_ne(<2 x i8> %x) { define <2 x i1> @cmp_x_and_negp2_with_ne_or_z(<2 x i8> %x) { ; CHECK-LABEL: @cmp_x_and_negp2_with_ne_or_z( -; CHECK-NEXT: [[ANDX:%.*]] = and <2 x i8> [[X:%.*]], -; CHECK-NEXT: [[R:%.*]] = icmp ne <2 x i8> [[ANDX]], +; CHECK-NEXT: [[R:%.*]] = icmp sge <2 x i8> [[X:%.*]], ; CHECK-NEXT: ret <2 x i1> [[R]] ; %andx = and <2 x i8> %x, diff --git a/llvm/test/Transforms/InstCombine/icmp.ll b/llvm/test/Transforms/InstCombine/icmp.ll index c695dc1cd69c8b..d52a0b76979373 100644 --- a/llvm/test/Transforms/InstCombine/icmp.ll +++ b/llvm/test/Transforms/InstCombine/icmp.ll @@ -1116,8 +1116,7 @@ define i1 @test53(i32 %a, i32 %b) { define i1 @test54(i8 %a) { ; CHECK-LABEL: @test54( -; CHECK-NEXT: [[TMP1:%.*]] = and i8 [[A:%.*]], -64 -; CHECK-NEXT: [[RET:%.*]] = icmp eq i8 [[TMP1]], -128 +; CHECK-NEXT: [[RET:%.*]] = icmp slt i8 [[A:%.*]], -64 ; CHECK-NEXT: ret i1 [[RET]] ; %ext = zext i8 %a to i32 From 8a9921f5692ab33451d11454b40a023ca0965a69 Mon Sep 17 00:00:00 2001 From: Ricardo Jesus Date: Wed, 23 Oct 2024 15:20:33 +0100 Subject: [PATCH 68/68] [AArch64] Use INDEX for constant Neon step vectors (#113424) When compiling for an SVE target we can use INDEX to generate constant fixed-length step vectors, e.g.: ``` uint32x4_t foo() { return (uint32x4_t){0, 1, 2, 3}; } ``` Currently: ``` foo(): adrp x8, .LCPI1_0 ldr q0, [x8, :lo12:.LCPI1_0] ret ``` With INDEX: ``` foo(): index z0.s, #0, #1 ret ``` The logic for this was already in `LowerBUILD_VECTOR`, though it was hidden under a check for `!Subtarget->isNeonAvailable()`. This patch refactors this to enable the corresponding code path unconditionally for constant step vectors (as long as we can use SVE for them). --- .../Target/AArch64/AArch64ISelLowering.cpp | 4 +- llvm/test/CodeGen/AArch64/active_lane_mask.ll | 20 ++- .../AArch64/sve-index-const-step-vector.ll | 135 ++++++++++++++++++ 3 files changed, 146 insertions(+), 13 deletions(-) create mode 100644 llvm/test/CodeGen/AArch64/sve-index-const-step-vector.ll diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp index 927c057adc00df..9adb11292376ce 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -14520,7 +14520,9 @@ SDValue AArch64TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const { EVT VT = Op.getValueType(); - if (useSVEForFixedLengthVectorVT(VT, !Subtarget->isNeonAvailable())) + bool OverrideNEON = !Subtarget->isNeonAvailable() || + cast(Op)->isConstantSequence(); + if (useSVEForFixedLengthVectorVT(VT, OverrideNEON)) return LowerFixedLengthBuildVectorToSVE(Op, DAG); // Try to build a simple constant vector. diff --git a/llvm/test/CodeGen/AArch64/active_lane_mask.ll b/llvm/test/CodeGen/AArch64/active_lane_mask.ll index bd5d076d1ba82e..025bbf749fc71b 100644 --- a/llvm/test/CodeGen/AArch64/active_lane_mask.ll +++ b/llvm/test/CodeGen/AArch64/active_lane_mask.ll @@ -430,10 +430,9 @@ define <2 x i1> @lane_mask_v2i1_i64(i64 %index, i64 %TC) { define <16 x i1> @lane_mask_v16i1_i8(i8 %index, i8 %TC) { ; CHECK-LABEL: lane_mask_v16i1_i8: ; CHECK: // %bb.0: -; CHECK-NEXT: adrp x8, .LCPI24_0 -; CHECK-NEXT: dup v0.16b, w0 -; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI24_0] -; CHECK-NEXT: uqadd v0.16b, v0.16b, v1.16b +; CHECK-NEXT: index z0.b, #0, #1 +; CHECK-NEXT: dup v1.16b, w0 +; CHECK-NEXT: uqadd v0.16b, v1.16b, v0.16b ; CHECK-NEXT: dup v1.16b, w1 ; CHECK-NEXT: cmhi v0.16b, v1.16b, v0.16b ; CHECK-NEXT: ret @@ -444,10 +443,9 @@ define <16 x i1> @lane_mask_v16i1_i8(i8 %index, i8 %TC) { define <8 x i1> @lane_mask_v8i1_i8(i8 %index, i8 %TC) { ; CHECK-LABEL: lane_mask_v8i1_i8: ; CHECK: // %bb.0: -; CHECK-NEXT: dup v0.8b, w0 -; CHECK-NEXT: adrp x8, .LCPI25_0 -; CHECK-NEXT: ldr d1, [x8, :lo12:.LCPI25_0] -; CHECK-NEXT: uqadd v0.8b, v0.8b, v1.8b +; CHECK-NEXT: index z0.b, #0, #1 +; CHECK-NEXT: dup v1.8b, w0 +; CHECK-NEXT: uqadd v0.8b, v1.8b, v0.8b ; CHECK-NEXT: dup v1.8b, w1 ; CHECK-NEXT: cmhi v0.8b, v1.8b, v0.8b ; CHECK-NEXT: ret @@ -459,9 +457,8 @@ define <4 x i1> @lane_mask_v4i1_i8(i8 %index, i8 %TC) { ; CHECK-LABEL: lane_mask_v4i1_i8: ; CHECK: // %bb.0: ; CHECK-NEXT: dup v0.4h, w0 -; CHECK-NEXT: adrp x8, .LCPI26_0 +; CHECK-NEXT: index z1.h, #0, #1 ; CHECK-NEXT: movi d2, #0xff00ff00ff00ff -; CHECK-NEXT: ldr d1, [x8, :lo12:.LCPI26_0] ; CHECK-NEXT: dup v3.4h, w1 ; CHECK-NEXT: bic v0.4h, #255, lsl #8 ; CHECK-NEXT: bic v3.4h, #255, lsl #8 @@ -478,8 +475,7 @@ define <2 x i1> @lane_mask_v2i1_i8(i8 %index, i8 %TC) { ; CHECK: // %bb.0: ; CHECK-NEXT: movi d0, #0x0000ff000000ff ; CHECK-NEXT: dup v1.2s, w0 -; CHECK-NEXT: adrp x8, .LCPI27_0 -; CHECK-NEXT: ldr d2, [x8, :lo12:.LCPI27_0] +; CHECK-NEXT: index z2.s, #0, #1 ; CHECK-NEXT: dup v3.2s, w1 ; CHECK-NEXT: and v1.8b, v1.8b, v0.8b ; CHECK-NEXT: add v1.2s, v1.2s, v2.2s diff --git a/llvm/test/CodeGen/AArch64/sve-index-const-step-vector.ll b/llvm/test/CodeGen/AArch64/sve-index-const-step-vector.ll new file mode 100644 index 00000000000000..433ddbd4a261b2 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/sve-index-const-step-vector.ll @@ -0,0 +1,135 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 +; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s | FileCheck %s + +; 128-bit vectors + +define <16 x i8> @v16i8() #0 { +; CHECK-LABEL: v16i8: +; CHECK: // %bb.0: +; CHECK-NEXT: index z0.b, #0, #1 +; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 +; CHECK-NEXT: ret + ret <16 x i8> +} + +define <8 x i16> @v8i16() #0 { +; CHECK-LABEL: v8i16: +; CHECK: // %bb.0: +; CHECK-NEXT: index z0.h, #0, #1 +; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 +; CHECK-NEXT: ret + ret <8 x i16> +} + +define <4 x i32> @v4i32() #0 { +; CHECK-LABEL: v4i32: +; CHECK: // %bb.0: +; CHECK-NEXT: index z0.s, #0, #1 +; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 +; CHECK-NEXT: ret + ret <4 x i32> +} + +define <2 x i64> @v2i64() #0 { +; CHECK-LABEL: v2i64: +; CHECK: // %bb.0: +; CHECK-NEXT: index z0.d, #0, #1 +; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 +; CHECK-NEXT: ret + ret <2 x i64> +} + +; 64-bit vectors + +define <8 x i8> @v8i8() #0 { +; CHECK-LABEL: v8i8: +; CHECK: // %bb.0: +; CHECK-NEXT: index z0.b, #0, #1 +; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 +; CHECK-NEXT: ret + ret <8 x i8> +} + +define <4 x i16> @v4i16() #0 { +; CHECK-LABEL: v4i16: +; CHECK: // %bb.0: +; CHECK-NEXT: index z0.h, #0, #1 +; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 +; CHECK-NEXT: ret + ret <4 x i16> +} + +define <2 x i32> @v2i32() #0 { +; CHECK-LABEL: v2i32: +; CHECK: // %bb.0: +; CHECK-NEXT: index z0.s, #0, #1 +; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 +; CHECK-NEXT: ret + ret <2 x i32> +} + +; Positive test, non-zero start and non-unitary step. +; Note: This should be INDEX z0.s, #1, #2 (without the ORR). +define <4 x i32> @v4i32_non_zero_non_one() #0 { +; CHECK-LABEL: v4i32_non_zero_non_one: +; CHECK: // %bb.0: +; CHECK-NEXT: index z0.s, #0, #2 +; CHECK-NEXT: orr z0.s, z0.s, #0x1 +; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 +; CHECK-NEXT: ret + ret <4 x i32> +} + +; Positive test, same as above but negative immediates. +define <4 x i32> @v4i32_neg_immediates() #0 { +; CHECK-LABEL: v4i32_neg_immediates: +; CHECK: // %bb.0: +; CHECK-NEXT: index z0.s, #-1, #-2 +; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 +; CHECK-NEXT: ret + ret <4 x i32> +} + +; Positive test, out of imm range start. +define <4 x i32> @v4i32_out_range_start() #0 { +; CHECK-LABEL: v4i32_out_range_start: +; CHECK: // %bb.0: +; CHECK-NEXT: index z0.s, #0, #1 +; CHECK-NEXT: add z0.s, z0.s, #16 // =0x10 +; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 +; CHECK-NEXT: ret + ret <4 x i32> +} + +; Positive test, out of imm range step. +define <4 x i32> @v4i32_out_range_step() #0 { +; CHECK-LABEL: v4i32_out_range_step: +; CHECK: // %bb.0: +; CHECK-NEXT: mov w8, #16 // =0x10 +; CHECK-NEXT: index z0.s, #0, w8 +; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 +; CHECK-NEXT: ret + ret <4 x i32> +} + +; Positive test, out of imm range start and step. +define <4 x i32> @v4i32_out_range_start_step() #0 { +; CHECK-LABEL: v4i32_out_range_start_step: +; CHECK: // %bb.0: +; CHECK-NEXT: mov w8, #16 // =0x10 +; CHECK-NEXT: index z0.s, #0, w8 +; CHECK-NEXT: add z0.s, z0.s, #16 // =0x10 +; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 +; CHECK-NEXT: ret + ret <4 x i32> +} + +; Negative test, non sequential. +define <4 x i32> @v4i32_non_sequential() #0 { +; CHECK-LABEL: v4i32_non_sequential: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, .LCPI12_0 +; CHECK-NEXT: ldr q0, [x8, :lo12:.LCPI12_0] +; CHECK-NEXT: ret + ret <4 x i32> +}