From 075a7ec71b6c964c386ff73b7df458c004bbd833 Mon Sep 17 00:00:00 2001 From: Andy Ayers Date: Wed, 13 Nov 2024 18:10:38 -0800 Subject: [PATCH 01/17] JIT: enable cloning of try regions Add a utility to clone a try region and all associated regions. Test this by enabling duplication of loops with try regions. --- src/coreclr/jit/compiler.h | 7 +- src/coreclr/jit/compmemkind.h | 1 + src/coreclr/jit/fgehopt.cpp | 652 ++++++++++++++++++++++++++++++ src/coreclr/jit/flowgraph.cpp | 267 +++++++++++- src/coreclr/jit/jitconfigvalues.h | 35 +- src/coreclr/jit/jiteh.cpp | 148 +++++-- src/coreclr/jit/loopcloning.cpp | 11 +- src/coreclr/jit/optimizer.cpp | 12 +- 8 files changed, 1047 insertions(+), 86 deletions(-) diff --git a/src/coreclr/jit/compiler.h b/src/coreclr/jit/compiler.h index 740182cdf2b7e5..770b3ea6590899 100644 --- a/src/coreclr/jit/compiler.h +++ b/src/coreclr/jit/compiler.h @@ -3002,7 +3002,7 @@ class Compiler void fgRemoveEHTableEntry(unsigned XTnum); - EHblkDsc* fgAddEHTableEntry(unsigned XTnum); + EHblkDsc* fgAddEHTableEntries(unsigned XTnum, unsigned count = 1, bool deferAdding = false); void fgSortEHTable(); @@ -5418,6 +5418,11 @@ class Compiler PhaseStatus fgCloneFinally(); + bool fgCanCloneTryRegion(BasicBlock* tryEntry); + + BasicBlock* fgCloneTryRegion(BasicBlock* tryEntry, BlockSet& visited, BlockToBlockMap* map, bool addEdges, + weight_t profileScale, BasicBlock** insertAfter, unsigned* ehRegionShift); + void fgCleanupContinuation(BasicBlock* continuation); PhaseStatus fgTailMergeThrows(); diff --git a/src/coreclr/jit/compmemkind.h b/src/coreclr/jit/compmemkind.h index 959176dcc965ad..6de55d070a6e9e 100644 --- a/src/coreclr/jit/compmemkind.h +++ b/src/coreclr/jit/compmemkind.h @@ -65,6 +65,7 @@ CompMemKindMacro(EarlyProp) CompMemKindMacro(ZeroInit) CompMemKindMacro(Pgo) CompMemKindMacro(MaskConversionOpt) +CompMemKindMacro(TryRegionClone) //clang-format on #undef CompMemKindMacro diff --git a/src/coreclr/jit/fgehopt.cpp b/src/coreclr/jit/fgehopt.cpp index 4d1fe732234eae..2d366ba095f203 100644 --- a/src/coreclr/jit/fgehopt.cpp +++ b/src/coreclr/jit/fgehopt.cpp @@ -2097,3 +2097,655 @@ PhaseStatus Compiler::fgTailMergeThrows() fgModified = false; return PhaseStatus::MODIFIED_EVERYTHING; } + +//------------------------------------------------------------------------ +// fgCloneTryRegion: clone a try region +// +// Arguments: +// tryEntry -- try entry block +// visited -- [in, out] blocks visited during cloning +// map -- [in, out] block mapping original try region blocks to their clones +// addEdges -- if true, add flow edges to the cloned blocks +// profileScale -- scale profile weight by this factor +// insertAfter -- [in, out] pointer to block to insert new blocks after +// ehIndexShift -- [out]amount by which indices of enclosing EH regions have shifted +// +// Returns: +// If map == nullptr, check if cloning is possible +// return nullptr if not, tryEntry if so +// else +// Return the cloned try entry, or nullptr if cloning failed +// map will be modified to contain keys and for the blocks to cloned. +// insertionPoint will point at the lexcially last block cloned +// ehIndexShift will indicate the amount by whcih enclosing EH region indices have changed +// +// Notes: +// * If map is nullptr, this call only checks if cloning is possible +// +// * If map is not nullptr, it is not modified unless cloning succeeds +// When cloning succeeds, entries for the try blocks and related blocks +// (handler, filter, callfinally) will be updated; other map entries will +// be left as they were +// +// * insertAfter can be nullptr, if map is nullptr +// The insertion point must be lexically after the original try region +// and be in the enclosing region for the original try +// +// * ehRegionShift can be nullptr, if map is nullptr +// +// * If cloning and adding edges, +// The new try region entry will not be reachable by any uncloned block. +// The new try region exits will target the same uncloned blocks as the original, +// or as directed by pre-existing map entries. +// +BasicBlock* Compiler::fgCloneTryRegion(BasicBlock* tryEntry, + BlockSet& visited, + BlockToBlockMap* map, + bool addEdges, + weight_t profileScale, + BasicBlock** insertAfter, + unsigned* ehRegionShift) +{ + assert(bbIsTryBeg(tryEntry)); + bool const deferCloning = (map == nullptr); + assert(deferCloning || (insertAfter != nullptr) && (*insertAfter != nullptr)); + INDEBUG(const char* msg = deferCloning ? "Checking if it is possible" : "Attempting";) + JITDUMP("%s to clone the try region EH#%02u headed by " FMT_BB "\n", msg, tryEntry->getTryIndex(), tryEntry->bbNum); + + // Determine the extent of cloning. + // + // We need to clone to the entire try region plus any + // enclosed regions and any enclosing mutual protect regions, + // plus all the the associated handlers and filters and any + // regions they enclose, plus any callfinallies that follow. + // + // This is necessary because try regions can't have multiple entries, or + // share parts in any meaningful way. + // + CompAllocator alloc = getAllocator(CMK_TryRegionClone); + ArrayStack regionsToProcess(alloc); + unsigned const tryIndex = tryEntry->getTryIndex(); + jitstd::vector blocks(alloc); + unsigned regionCount = 0; + + auto addBlockToClone = [=, &blocks, &visited](BasicBlock* block, const char* msg) { + if (!BlockSetOps::TryAddElemD(this, visited, block->bbNum)) + { + return false; + } + + JITDUMP(" %s block " FMT_BB "\n", msg, block->bbNum); + blocks.push_back(block); + return true; + }; + + JITDUMP("==> try EH#%02u\n", tryIndex); + regionsToProcess.Push(tryIndex); + + // Walk through each try region + // + while (regionsToProcess.Height() > 0) + { + regionCount++; + unsigned const regionIndex = regionsToProcess.Pop(); + EHblkDsc* const ebd = ehGetDsc(regionIndex); + JITDUMP("== processing try EH#%02u\n", regionIndex); + + // Walk the try region + // + BasicBlock* const firstTryBlock = ebd->ebdTryBeg; + BasicBlock* const lastTryBlock = ebd->ebdTryLast; + + if (BlockSetOps::IsMember(this, visited, firstTryBlock->bbNum)) + { + JITDUMP("already walked try region for EH#%02u\n", regionIndex); + assert(BlockSetOps::IsMember(this, visited, lastTryBlock->bbNum)); + } + else + { + JITDUMP("walking try region for EH#%02u\n", regionIndex); + for (BasicBlock* const block : Blocks(firstTryBlock, lastTryBlock)) + { + bool added = addBlockToClone(block, "try region"); + if (bbIsTryBeg(block) && (block != ebd->ebdTryBeg)) + { + assert(added); + JITDUMP("==> found try EH#%02u nested in try EH#%02u region at " FMT_BB "\n", block->getTryIndex(), + regionIndex, block->bbNum); + regionsToProcess.Push(block->getTryIndex()); + } + } + } + + // Walk the callfinally region + // + if (ebd->HasFinallyHandler()) + { + BasicBlock* firstCallFinallyRangeBlock = nullptr; + BasicBlock* lastCallFinallyRangeBlock = nullptr; + ehGetCallFinallyBlockRange(regionIndex, &firstCallFinallyRangeBlock, &lastCallFinallyRangeBlock); + + // Note this range is potentially quite broad... + // Instead perhaps just walk preds of the handler? + // + JITDUMP("walking callfinally region for EH#%02u [" FMT_BB " ... " FMT_BB "]\n", regionIndex, + firstCallFinallyRangeBlock->bbNum, lastCallFinallyRangeBlock->bbNum); + + for (BasicBlock* const block : Blocks(firstCallFinallyRangeBlock, lastCallFinallyRangeBlock)) + { + if (block->KindIs(BBJ_CALLFINALLY) && block->TargetIs(ebd->ebdHndBeg)) + { + addBlockToClone(block, "callfinally"); + } + else if (block->KindIs(BBJ_CALLFINALLYRET) && block->Prev()->TargetIs(ebd->ebdHndBeg)) + { + addBlockToClone(block, "callfinallyret"); + } + } + } + + // Walk the filter region + // + if (ebd->HasFilter()) + { + BasicBlock* const firstFltBlock = ebd->ebdFilter; + BasicBlock* const lastFltBlock = ebd->BBFilterLast(); + + if (BlockSetOps::IsMember(this, visited, firstFltBlock->bbNum)) + { + JITDUMP("already walked filter region for EH#%02u\n", regionIndex); + assert(BlockSetOps::IsMember(this, visited, lastFltBlock->bbNum)); + } + else + { + JITDUMP("walking filter region for EH#%02u\n", regionIndex); + for (BasicBlock* const block : Blocks(firstFltBlock, lastFltBlock)) + { + // A filter cannot enclose another EH region + // + assert(!bbIsTryBeg(block)); + addBlockToClone(block, "filter region"); + } + } + } + + // Walk the handler region + // + BasicBlock* const firstHndBlock = ebd->ebdHndBeg; + BasicBlock* const lastHndBlock = ebd->ebdHndLast; + + if (BlockSetOps::IsMember(this, visited, firstHndBlock->bbNum)) + { + JITDUMP("already walked handler region for EH#%02u\n", regionIndex); + assert(BlockSetOps::IsMember(this, visited, lastHndBlock->bbNum)); + } + else + { + JITDUMP("walking handler region for EH#%02u\n", regionIndex); + for (BasicBlock* const block : Blocks(firstHndBlock, lastHndBlock)) + { + bool added = addBlockToClone(block, "handler region"); + if (bbIsTryBeg(block)) + { + assert(added); + JITDUMP("==> found try entry for EH#%02u nested in handler at " FMT_BB "\n", block->bbNum, + block->getTryIndex()); + regionsToProcess.Push(block->getTryIndex()); + } + } + } + + // If there is an enclosing mutual-protect region, process it as well + // + unsigned const enclosingTryIndex = ebd->ebdEnclosingTryIndex; + if (enclosingTryIndex != EHblkDsc::NO_ENCLOSING_INDEX) + { + EHblkDsc* const enclosingTryEbd = ehGetDsc(enclosingTryIndex); + + if (EHblkDsc::ebdIsSameTry(ebd, enclosingTryEbd)) + { + JITDUMP("==> found mutual-protect try EH#%02u for EH#%02u\n", enclosingTryIndex, regionIndex); + regionsToProcess.Push(enclosingTryIndex); + } + } + + JITDUMP("<== finished try EH#%02u\n", regionIndex); + } + + // Find the outermost mututal-protect try egion that begins at tryEntry + // + EHblkDsc* const tryEbd = ehGetDsc(tryIndex); + unsigned outermostTryIndex = tryIndex; + unsigned enclosingTryIndex = EHblkDsc::NO_ENCLOSING_INDEX; + { + EHblkDsc* outermostEbd = ehGetDsc(outermostTryIndex); + while (true) + { + enclosingTryIndex = outermostEbd->ebdEnclosingTryIndex; + if (enclosingTryIndex == EHblkDsc::NO_ENCLOSING_INDEX) + { + break; + } + outermostEbd = ehGetDsc(enclosingTryIndex); + if (!EHblkDsc::ebdIsSameILTry(outermostEbd, tryEbd)) + { + break; + } + outermostTryIndex = enclosingTryIndex; + } + } + + unsigned enclosingHndIndex = EHblkDsc::NO_ENCLOSING_INDEX; + if (tryEntry->hasHndIndex()) + { + enclosingHndIndex = tryEntry->getHndIndex(); + } + + // Now blocks contains an entry for each block to clone, + // and maps (if non-null) contains a key for each block to clone, with nullptr value + // + JITDUMP("Will need to clone %u EH regions (outermost: EH#%02u) and %u blocks\n", regionCount, outermostTryIndex, + blocks.size()); + + // Allocate the new EH clauses. First, find the enclosing EH clause, if any... + // we will want to allocate the new clauses just "before" this point. + // + // If the region we're cloning is not enclosed, we put it at the end of the table; + // this is cheaper than any other insertion point, as no existing regions get renumbered. + // + unsigned insertBeforeIndex = enclosingTryIndex; + if (insertBeforeIndex == EHblkDsc::NO_ENCLOSING_INDEX) + { + JITDUMP("Cloned EH clauses will go at the end of the EH table\n"); + insertBeforeIndex = compHndBBtabCount; + } + else + { + JITDUMP("Cloned EH clauses will go before enclosing region EH#%02u\n", enclosingTryIndex); + } + + // Once we call fgAddEHTableEntries, all the EH indicies at or above insertBeforeIndex + // will shift, and the EH table may reallocate. + // + // This addition may also fail, if the table would become too large... + // + EHblkDsc* const clonedOutermostEbd = + fgAddEHTableEntries(insertBeforeIndex, regionCount, /* deferAdding */ deferCloning); + + if (clonedOutermostEbd == nullptr) + { + JITDUMP("fgCloneTryRegion: unable to expand EH table\n"); + return nullptr; + } + + if (deferCloning) + { + JITDUMP("fgCloneTryRegion: cloning is possible\n"); + return tryEntry; + } + + // None of the EH regions we're cloning should have been renumbered, + // though their clauses may have been moved to a new table.. + // + EHblkDsc* const oldTryEbd = ehGetDsc(outermostTryIndex); + assert(oldTryEbd->ebdTryBeg == tryEntry); + + // Callers will see enclosing EH region indices shift by this much + // + *ehRegionShift = regionCount; + + // The EH table now looks like the following, for a middle insertion: + // + // =================== + // EH 0 -- unrelated regions + // ... + // --------------- + // EH x -- innermost region to clone + // ... + // EH x + regionCount - 1 -- outermost region to clone + // --------------- + // --------------- + // EH x + regionCount -- innermost cloned region + // ... + // EH x + 2*regionCount - 1 -- outermost cloned region + // --------------- + // ... + // EH k -- enclosing try / hnd regions (if any), or other regions + // + // =================== + // + // And like this, for an end insertion: + // + // =================== + // EH 0 -- unrelated regions + // ... + // --------------- + // EH x -- innermost region to clone + // ... + // EH x + regionCount - 1 -- outermost region to clone + // --------------- + // ... + // EH k -- unrelated regions + // ... + // --------------- + // EH x + regionCount -- innermost cloned region + // ... + // EH x + 2*regionCount - 1 -- outermost cloned region + // --------------- + // =================== + // + // So the cloned clauses will have higher indices, and each cloned clause + // should be the same distance from its original, but that distance + // depends on the kind of insertion. + // + // Compute that distance as `indexShift`. + // + unsigned const clonedOutermostRegionIndex = ehGetIndex(clonedOutermostEbd); + assert(clonedOutermostRegionIndex > outermostTryIndex); + unsigned const indexShift = clonedOutermostRegionIndex - outermostTryIndex; + + // Copy over the EH table entries and adjust their enclosing indicies. + // We will adjust the block references below. + // + unsigned const clonedLowestRegionIndex = clonedOutermostRegionIndex - regionCount + 1; + JITDUMP("New EH regions are EH#%02u ... EH#%02u\n", clonedLowestRegionIndex, clonedOutermostRegionIndex); + for (unsigned XTnum = clonedLowestRegionIndex; XTnum <= clonedOutermostRegionIndex; XTnum++) + { + unsigned originalXTnum = XTnum - indexShift; + compHndBBtab[XTnum] = compHndBBtab[originalXTnum]; + EHblkDsc* const ebd = &compHndBBtab[XTnum]; + + // Note the outermost region enclosing indices stay the same, because the original + // clause entries got adjusted when we inserted the new clauses. + // + if (ebd->ebdEnclosingTryIndex != EHblkDsc::NO_ENCLOSING_INDEX) + { + if (XTnum < clonedOutermostRegionIndex) + { + ebd->ebdEnclosingTryIndex += (unsigned short)indexShift; + } + JITDUMP("EH#%02u now enclosed in try EH#%02u\n", XTnum, ebd->ebdEnclosingTryIndex); + } + else + { + JITDUMP("EH#%02u not enclosed in any try\n", XTnum); + } + + if (ebd->ebdEnclosingHndIndex != EHblkDsc::NO_ENCLOSING_INDEX) + { + if (XTnum < clonedOutermostRegionIndex) + { + ebd->ebdEnclosingHndIndex += (unsigned short)indexShift; + } + JITDUMP("EH#%02u now enclosed in handler EH#%02u\n", XTnum, ebd->ebdEnclosingHndIndex); + } + else + { + JITDUMP("EH#%02u not enclosed in any handler\n", XTnum); + } + } + + // Clone the blocks. + // + // All blocks are initially put into the enclosing EH region, and it is not + // extended to cover them all. The step below puts the blocks into the + // appropriate cloned region and fixes up region extents. + // + JITDUMP("Cloning blocks for try...\n"); + for (BasicBlock* const block : blocks) + { + BasicBlock* const newBlock = fgNewBBafter(BBJ_ALWAYS, *insertAfter, /* extendRegion */ false); + JITDUMP("Adding " FMT_BB " (copy of " FMT_BB ") after " FMT_BB "\n", newBlock->bbNum, block->bbNum, + (*insertAfter)->bbNum); + map->Set(block, newBlock, BlockToBlockMap::SetKind::Overwrite); + BasicBlock::CloneBlockState(this, newBlock, block); + newBlock->scaleBBWeight(profileScale); + *insertAfter = newBlock; + } + JITDUMP("Done cloning blocks for try...\n"); + + // Update the cloned block regions and impacted EH clauses + // + // Here we are assuming that the cloned try is always placed lexically *after* thge + // original, so that if the original try ended at the same point as an enclosing try, + // the new end point of the enclosing try is in the cloned try. + // + JITDUMP("Fixing region indices...\n"); + for (BasicBlock* const block : blocks) + { + BasicBlock* newBlock = nullptr; + bool found = map->Lookup(block, &newBlock); + assert(found); + + // Update block references in the EH table + // + // `region` is the index of a cloned EH clause that may still refer to `block`. + // Update these block references and those of enclosing regions to refer to `newBlock`. + // + auto updateBlockReferences = [=](unsigned region) { + while (true) + { + EHblkDsc* const ebd = ehGetDsc(region); + + if (ebd->ebdTryBeg == block) + { + ebd->ebdTryBeg = newBlock; + JITDUMP("Try begin for EH#%02u is " FMT_BB "\n", region, newBlock->bbNum); + } + + if (ebd->ebdTryLast == block) + { + fgSetTryEnd(ebd, newBlock); + } + + if (ebd->ebdHndBeg == block) + { + ebd->ebdHndBeg = newBlock; + JITDUMP("Handler begin for EH#%02u is " FMT_BB "\n", region, newBlock->bbNum); + } + + if (ebd->ebdHndLast == block) + { + fgSetHndEnd(ebd, newBlock); + } + + if (ebd->HasFilter() && (ebd->ebdFilter == block)) + { + ebd->ebdFilter = newBlock; + JITDUMP("Filter begin for EH#%02u is " FMT_BB "\n", region, newBlock->bbNum); + } + + bool inTry = false; + region = ehGetEnclosingRegionIndex(region, &inTry); + + if (region == EHblkDsc::NO_ENCLOSING_INDEX) + { + break; + } + } + }; + + // Fix the EH regions for each cloned block, and the block + // references in the EH table entries. + // + // If the block's try index was outside of the original try region + // (say a handler for the try) then it is already properly adjusted. + // + if (block->hasTryIndex()) + { + const unsigned originalTryIndex = block->getTryIndex(); + unsigned cloneTryIndex = originalTryIndex; + + if (originalTryIndex <= outermostTryIndex) + { + cloneTryIndex += indexShift; + } + + EHblkDsc* const originalEbd = ehGetDsc(originalTryIndex); + EHblkDsc* const clonedEbd = ehGetDsc(cloneTryIndex); + newBlock->setTryIndex(cloneTryIndex); + updateBlockReferences(cloneTryIndex); + } + + if (block->hasHndIndex()) + { + const unsigned originalHndIndex = block->getHndIndex(); + + // if (originalHndIndex == + const unsigned cloneHndIndex = originalHndIndex + indexShift; + EHblkDsc* const originalEbd = ehGetDsc(originalHndIndex); + EHblkDsc* const clonedEbd = ehGetDsc(cloneHndIndex); + newBlock->setHndIndex(cloneHndIndex); + updateBlockReferences(cloneHndIndex); + } + } + JITDUMP("Done fixing region indices\n"); + + // Redirect any branches within the newly-cloned blocks or + // from cloned blocks to non-cloned blocks + // + if (addEdges) + { + JITDUMP("Adding edges in the newly cloned try\n"); + for (BasicBlock* const block : BlockToBlockMap::KeyIteration(map)) + { + BasicBlock* newBlock = (*map)[block]; + // Jump kind/target should not be set yet + assert(newBlock->KindIs(BBJ_ALWAYS)); + assert(!newBlock->HasInitializedTarget()); + optSetMappedBlockTargets(block, newBlock, map); + } + } + else + { + JITDUMP("Not adding edges in the newly cloned try\n"); + } + + // If the original regions had any ACDs, create equivalent + // ones for the cloned regions + // + if (fgHasAddCodeDscMap()) + { + AddCodeDscMap* const map = fgGetAddCodeDscMap(); + ArrayStack cloned(getAllocator(CMK_TryRegionClone)); + + assert(clonedLowestRegionIndex >= indexShift); + assert(clonedOutermostRegionIndex >= indexShift); + + unsigned const originalLowestRegionIndex = clonedLowestRegionIndex - indexShift; + unsigned const originalOutermostRegionIndex = clonedOutermostRegionIndex - indexShift; + + for (AddCodeDsc* const add : AddCodeDscMap::ValueIteration(map)) + { + bool needsCloningForTry = false; + bool needsCloningForHnd = false; + bool inTry = add->acdTryIndex > 0; + bool inHnd = add->acdHndIndex > 0; + + // acd region numbers are shifted up by one so + // that a value of zero means "not in an EH region" + // + if (inTry) + { + unsigned const trueAcdTryIndex = add->acdTryIndex - 1; + + if ((trueAcdTryIndex >= originalLowestRegionIndex) && (trueAcdTryIndex <= originalOutermostRegionIndex)) + { + needsCloningForTry = true; + } + } + + if (inHnd) + { + unsigned const trueAcdHndIndex = add->acdHndIndex - 1; + + if ((trueAcdHndIndex >= originalLowestRegionIndex) && (trueAcdHndIndex <= originalOutermostRegionIndex)) + { + needsCloningForHnd = true; + } + } + + if (!needsCloningForTry && !needsCloningForHnd) + { + continue; + } + + JITDUMP("Will need to clone: "); + JITDUMPEXEC(add->Dump()); + + AddCodeDsc* clone = new (this, CMK_Unknown) AddCodeDsc; + clone->acdDstBlk = nullptr; + + if (needsCloningForTry) + { + clone->acdTryIndex = (unsigned short)(add->acdTryIndex + indexShift); + } + else if (inTry) + { + clone->acdTryIndex = add->acdTryIndex; + } + else + { + clone->acdTryIndex = 0; + } + + if (needsCloningForHnd) + { + clone->acdHndIndex = (unsigned short)(add->acdHndIndex + indexShift); + } + else if (inHnd) + { + clone->acdHndIndex = add->acdHndIndex; + } + else + { + clone->acdHndIndex = 0; + } + + clone->acdKeyDsg = add->acdKeyDsg; + clone->acdKind = add->acdKind; + clone->acdUsed = false; + +#if !FEATURE_FIXED_OUT_ARGS + clone->acdStkLvl = 0; + clone->acdStkLvlInit = false; +#endif // !FEATURE_FIXED_OUT_ARGS + INDEBUG(clone->acdNum = acdCount++); + cloned.Push(clone); + } + + while (cloned.Height() > 0) + { + AddCodeDsc* const clone = cloned.Pop(); + AddCodeDscKey key(clone); + map->Set(key, clone); + JITDUMP("Added clone: "); + JITDUMPEXEC(clone->Dump()); + } + } + + BasicBlock* const clonedTryEntry = (*map)[tryEntry]; + JITDUMP("Done cloning, cloned try entry is " FMT_BB "\n", clonedTryEntry->bbNum); + return clonedTryEntry; +} + +//------------------------------------------------------------------------ +// fgCanCloneTryRegion: see if a try region can be cloned +// +// Arguments: +// tryEntry - try entry block +// +// Returns: +// true if try region is clonable +// +bool Compiler::fgCanCloneTryRegion(BasicBlock* tryEntry) +{ + assert(bbIsTryBeg(tryEntry)); + EnsureBasicBlockEpoch(); + BlockSet visited(BlockSetOps::MakeEmpty(this)); + BasicBlock* const result = + fgCloneTryRegion(tryEntry, visited, /* map */ nullptr, /* addEdges */ false, /* scaleProfile */ 0.0, + /* insertAfter */ nullptr, /* ehRegionShift */ nullptr); + + return result != nullptr; +} diff --git a/src/coreclr/jit/flowgraph.cpp b/src/coreclr/jit/flowgraph.cpp index 10f929a9cd74c0..65b91dd723da3b 100644 --- a/src/coreclr/jit/flowgraph.cpp +++ b/src/coreclr/jit/flowgraph.cpp @@ -1420,10 +1420,15 @@ void Compiler::fgAddSyncMethodEnterExit() // Add the new EH region at the end, since it is the least nested, // and thus should be last. - EHblkDsc* newEntry; - unsigned XTnew = compHndBBtabCount; + EHblkDsc* newEntry = nullptr; + unsigned XTnew = compHndBBtabCount; - newEntry = fgAddEHTableEntry(XTnew); + newEntry = fgAddEHTableEntries(XTnew); + + if (newEntry == nullptr) + { + IMPL_LIMITATION("too many exception clauses"); + } // Initialize the new entry @@ -5972,17 +5977,74 @@ bool FlowGraphNaturalLoop::CanDuplicate(INDEBUG(const char** reason)) } #endif - Compiler* comp = m_dfsTree->GetCompiler(); - BasicBlockVisit result = VisitLoopBlocks([=](BasicBlock* block) { - if (!BasicBlock::sameEHRegion(block, GetHeader())) + Compiler* comp = m_dfsTree->GetCompiler(); + BasicBlock* const header = GetHeader(); + + const bool allowEHCloning = JitConfig.JitCloneLoopsWithEH() > 0; + CompAllocator alloc = comp->getAllocator(CMK_LoopClone); + ArrayStack tryRegionsToClone(alloc); + + BasicBlockVisit result = VisitLoopBlocks([=, &tryRegionsToClone](BasicBlock* block) { + const bool inSameRegionAsHeader = BasicBlock::sameEHRegion(block, header); + + if (inSameRegionAsHeader) { - INDEBUG(*reason = "Loop not entirely within one EH region"); - return BasicBlockVisit::Abort; + return BasicBlockVisit::Continue; } - return BasicBlockVisit::Continue; + if (allowEHCloning) + { + if (comp->bbIsTryBeg(block)) + { + // Check if this is an "outermost" try within the loop. + // If so, we have more checking to do later on. + // + const bool headerInTry = header->hasTryIndex(); + unsigned blockIndex = block->getTryIndex(); + unsigned outermostBlockIndex = comp->ehTrueEnclosingTryIndexIL(blockIndex); + + if ((headerInTry && (outermostBlockIndex == header->getTryIndex())) || + !headerInTry && (outermostBlockIndex == EHblkDsc::NO_ENCLOSING_INDEX)) + { + tryRegionsToClone.Push(block); + } + } + + return BasicBlockVisit::Continue; + } + + INDEBUG(*reason = "Loop not entirely within one EH region"); + return BasicBlockVisit::Abort; }); + // Check any enclosed try regions to make sure they can be cloned + // (note this is potentially misleading with multiple trys as + // we are considering cloning each in isolation). + // + const unsigned numberOfTryRegions = tryRegionsToClone.Height(); + if ((result != BasicBlockVisit::Abort) && (numberOfTryRegions > 0)) + { + assert(allowEHCloning); + + // Possibly limit to just 1 region...? + // + JITDUMP(FMT_LP " contains %u top-level try region%s\n", GetIndex(), numberOfTryRegions, + numberOfTryRegions > 1 ? "s" : ""); + + while (tryRegionsToClone.Height() > 0) + { + BasicBlock* const tryEntry = tryRegionsToClone.Pop(); + bool const canCloneTry = comp->fgCanCloneTryRegion(tryEntry); + + if (!canCloneTry) + { + INDEBUG(*reason = "Loop contains uncloneable try region"); + result = BasicBlockVisit::Abort; + break; + } + } + } + return result != BasicBlockVisit::Abort; } @@ -5998,38 +6060,182 @@ void FlowGraphNaturalLoop::Duplicate(BasicBlock** insertAfter, BlockToBlockMap* { assert(CanDuplicate(nullptr)); - Compiler* comp = m_dfsTree->GetCompiler(); + Compiler* comp = m_dfsTree->GetCompiler(); + const bool canCloneTry = JitConfig.JitCloneLoopsWithEH() > 0; + bool clonedTry = false; + BasicBlock* const insertionPoint = *insertAfter; + + // If the insertion point is within an EH region, remember all the EH regions + // current that end at the insertion point, so we can properly extend them + // when we're done cloning. + // + struct RegionEnd + { + RegionEnd(unsigned regionIndex, BasicBlock* block, bool isTryEnd) + : m_regionIndex(regionIndex) + , m_block(block) + , m_isTryEnd(isTryEnd) + { + } + unsigned m_regionIndex; + BasicBlock* m_block; + bool m_isTryEnd; + }; + + ArrayStack regionEnds(comp->getAllocator(CMK_LoopClone)); + + // Record enclosing EH region block references, + // so we can keep track of what the "before" picture looked like. + // + if (insertionPoint->hasTryIndex() || insertionPoint->hasHndIndex()) + { + bool inTry = false; + unsigned region = comp->ehGetMostNestedRegionIndex(insertionPoint, &inTry); + + if (region != 0) + { + // Convert to true region index + region--; + + while (true) + { + EHblkDsc* const ebd = comp->ehGetDsc(region); + + if (inTry) + { + JITDUMP("Noting that enclosing try EH#%02u ends at " FMT_BB "\n", region, ebd->ebdTryLast->bbNum); + regionEnds.Emplace(region, ebd->ebdTryLast, true); + } + else + { + JITDUMP("Noting that enclsoing handler EH#%02u ends at " FMT_BB "\n", region, + ebd->ebdHndLast->bbNum); + regionEnds.Emplace(region, ebd->ebdHndLast, false); + } + + region = comp->ehGetEnclosingRegionIndex(region, &inTry); + + if (region == EHblkDsc::NO_ENCLOSING_INDEX) + { + break; + } + } + } + } + + // Keep track of how much the region end EH indices change because of EH region cloning. + // + unsigned ehRegionShift = 0; + comp->EnsureBasicBlockEpoch(); + BlockSet visited(BlockSetOps::MakeEmpty(comp)); - BasicBlock* bottom = GetLexicallyBottomMostBlock(); + VisitLoopBlocksLexical([=, &visited, &clonedTry, &ehRegionShift](BasicBlock* blk) { + if (canCloneTry) + { + // If we allow cloning loops with EH, we may have already handled + // this loop block as part of a containing try region. + // + if (BlockSetOps::IsMember(comp, visited, blk->bbNum)) + { + return BasicBlockVisit::Continue; + } + + // If this is a try region entry, clone the entire region now. + // Defer adding edges and extending EH regions until later. + // + // Updates map, visited, and insertAfter. + // + if (comp->bbIsTryBeg(blk)) + { + unsigned regionShift = 0; + BasicBlock* const clonedBlock = comp->fgCloneTryRegion(blk, visited, map, /* addEdges */ false, + weightScale, insertAfter, ®ionShift); + assert(clonedBlock != nullptr); + ehRegionShift += regionShift; + clonedTry = true; + return BasicBlockVisit::Continue; + } + } + else + { + // We're not expecting to find enclosed EH regions + // + assert(!comp->bbIsTryBeg(blk)); + assert(!comp->bbIsHandlerBeg(blk)); + assert(!BlockSetOps::IsMember(comp, visited, blk->bbNum)); + } - VisitLoopBlocksLexical([=](BasicBlock* blk) { - // Initialize newBlk as BBJ_ALWAYS without jump target, and fix up jump target later - // with BasicBlock::CopyTarget(). - BasicBlock* newBlk = comp->fgNewBBafter(BBJ_ALWAYS, *insertAfter, /*extendRegion*/ true); + // `blk` was not in loop-enclosed try region or companion region. + // + // Initialize newBlk as BBJ_ALWAYS without jump target; these are fixed up subsequently. + // + // CloneBlockState puts newBlk in the proper EH region. We will fix enclosing region extents + // once cloning is done. + // + BasicBlock* newBlk = comp->fgNewBBafter(BBJ_ALWAYS, *insertAfter, /* extendRegion */ false); JITDUMP("Adding " FMT_BB " (copy of " FMT_BB ") after " FMT_BB "\n", newBlk->bbNum, blk->bbNum, (*insertAfter)->bbNum); - BasicBlock::CloneBlockState(comp, newBlk, blk); - // We're going to create the preds below, which will set the bbRefs properly, - // so clear out the cloned bbRefs field. - newBlk->bbRefs = 0; - + assert(newBlk->bbRefs == 0); newBlk->scaleBBWeight(weightScale); - - *insertAfter = newBlk; map->Set(blk, newBlk, BlockToBlockMap::Overwrite); + *insertAfter = newBlk; return BasicBlockVisit::Continue; }); + // Note the EH table may have grown, if we cloned try regions. If there was + // an enclosing EH entry, then its EH table entries will have shifted to + // higher index values. + // + // Update the enclosing EH region ends to reflect the new blocks we added. + // (here we assume cloned blocks are placed lexically after their originals, so if a + // region-ending block was cloned, the new region end is the last block cloned). + // + // Note we don't consult the block references in EH table here, since they + // may reflect interim updates to the region endpoints. + // + BasicBlock* const lastClonedBlock = *insertAfter; + while (regionEnds.Height() > 0) + { + RegionEnd r = regionEnds.Pop(); + EHblkDsc* const ebd = comp->ehGetDsc(r.m_regionIndex + ehRegionShift); + + if (r.m_block == insertionPoint) + { + if (r.m_isTryEnd) + { + comp->fgSetTryEnd(ebd, lastClonedBlock); + } + else + { + comp->fgSetHndEnd(ebd, lastClonedBlock); + } + } + else + { + if (r.m_isTryEnd) + { + comp->fgSetTryEnd(ebd, r.m_block); + } + else + { + comp->fgSetHndEnd(ebd, r.m_block); + } + } + } + // Now go through the new blocks, remapping their jump targets within the loop // and updating the preds lists. + // VisitLoopBlocks([=](BasicBlock* blk) { BasicBlock* newBlk = nullptr; bool b = map->Lookup(blk, &newBlk); assert(b && newBlk != nullptr); + JITDUMP("Updating targets: " FMT_BB " mapped to " FMT_BB "\n", blk->bbNum, newBlk->bbNum); + // Jump target should not be set yet assert(!newBlk->HasInitializedTarget()); @@ -6039,6 +6245,23 @@ void FlowGraphNaturalLoop::Duplicate(BasicBlock** insertAfter, BlockToBlockMap* return BasicBlockVisit::Continue; }); + + // If we cloned any EH regions, we may have some non-loop blocks to process as well. + // + if (clonedTry) + { + for (BasicBlock* const blk : BlockToBlockMap::KeyIteration(map)) + { + if (!this->ContainsBlock(blk)) + { + BasicBlock* newBlk = nullptr; + bool b = map->Lookup(blk, &newBlk); + assert(b && newBlk != nullptr); + assert(!newBlk->HasInitializedTarget()); + comp->optSetMappedBlockTargets(blk, newBlk, map); + } + } + } } //------------------------------------------------------------------------ diff --git a/src/coreclr/jit/jitconfigvalues.h b/src/coreclr/jit/jitconfigvalues.h index d63762eef2ec2b..00be822692d7ba 100644 --- a/src/coreclr/jit/jitconfigvalues.h +++ b/src/coreclr/jit/jitconfigvalues.h @@ -50,23 +50,24 @@ CONFIG_INTEGER(DisplayLoopHoistStats, "JitLoopHoistStats", 0) // Display JIT loo // Recommended to use with JitStdOutFile flag. CONFIG_INTEGER(DisplayLsraStats, "JitLsraStats", 0) -CONFIG_STRING(JitLsraOrdering, "JitLsraOrdering") // LSRA heuristics ordering -CONFIG_INTEGER(EnablePCRelAddr, "JitEnablePCRelAddr", 1) // Whether absolute addr be encoded as PC-rel offset by - // RyuJIT where possible -CONFIG_INTEGER(JitAssertOnMaxRAPasses, "JitAssertOnMaxRAPasses", 0) -CONFIG_INTEGER(JitBreakEmitOutputInstr, "JitBreakEmitOutputInstr", -1) -CONFIG_INTEGER(JitBreakMorphTree, "JitBreakMorphTree", 0xffffffff) -CONFIG_INTEGER(JitBreakOnBadCode, "JitBreakOnBadCode", 0) -CONFIG_INTEGER(JitBreakOnMinOpts, "JITBreakOnMinOpts", 0) // Halt if jit switches to MinOpts -CONFIG_INTEGER(JitCloneLoops, "JitCloneLoops", 1) // If 0, don't clone. Otherwise clone loops for optimizations. -CONFIG_INTEGER(JitCloneLoopsWithGdvTests, "JitCloneLoopsWithGdvTests", 1) // If 0, don't clone loops based on - // invariant type/method address tests -RELEASE_CONFIG_INTEGER(JitCloneLoopsSizeLimit, "JitCloneLoopsSizeLimit", 400) // limit cloning to loops with less - // than this many tree nodes -CONFIG_INTEGER(JitDebugLogLoopCloning, "JitDebugLogLoopCloning", 0) // In debug builds log places where loop cloning - // optimizations are performed on the fast path. -CONFIG_INTEGER(JitDefaultFill, "JitDefaultFill", 0xdd) // In debug builds, initialize the memory allocated by the nra - // with this byte. +CONFIG_STRING(JitLsraOrdering, W("JitLsraOrdering")) // LSRA heuristics ordering +CONFIG_INTEGER(EnablePCRelAddr, W("JitEnablePCRelAddr"), 1) // Whether absolute addr be encoded as PC-rel offset by + // RyuJIT where possible +CONFIG_INTEGER(JitAssertOnMaxRAPasses, W("JitAssertOnMaxRAPasses"), 0) +CONFIG_INTEGER(JitBreakEmitOutputInstr, W("JitBreakEmitOutputInstr"), -1) +CONFIG_INTEGER(JitBreakMorphTree, W("JitBreakMorphTree"), 0xffffffff) +CONFIG_INTEGER(JitBreakOnBadCode, W("JitBreakOnBadCode"), 0) +CONFIG_INTEGER(JitBreakOnMinOpts, W("JITBreakOnMinOpts"), 0) // Halt if jit switches to MinOpts +CONFIG_INTEGER(JitCloneLoops, W("JitCloneLoops"), 1) // If 0, don't clone. Otherwise clone loops for optimizations. +CONFIG_INTEGER(JitCloneLoopsWithGdvTests, W("JitCloneLoopsWithGdvTests"), 1) // If 0, don't clone loops based on + // invariant type/method address tests +CONFIG_INTEGER(JitCloneLoopsWithEH, W("JitCloneLoopsWithEH"), 1) // If 0, don't clone loops containing EH regions +RELEASE_CONFIG_INTEGER(JitCloneLoopsSizeLimit, W("JitCloneLoopsSizeLimit"), 400) // limit cloning to loops with less + // than this many tree nodes +CONFIG_INTEGER(JitDebugLogLoopCloning, W("JitDebugLogLoopCloning"), 0) // In debug builds log places where loop cloning + // optimizations are performed on the fast path. +CONFIG_INTEGER(JitDefaultFill, W("JitDefaultFill"), 0xdd) // In debug builds, initialize the memory allocated by the nra + // with this byte. // Minimum weight needed for the first block of a loop to make it a candidate for alignment. CONFIG_INTEGER(JitAlignLoopMinBlockWeight, "JitAlignLoopMinBlockWeight", DEFAULT_ALIGN_LOOP_MIN_BLOCK_WEIGHT) diff --git a/src/coreclr/jit/jiteh.cpp b/src/coreclr/jit/jiteh.cpp index e3d17147024046..1461706abf9e65 100644 --- a/src/coreclr/jit/jiteh.cpp +++ b/src/coreclr/jit/jiteh.cpp @@ -1463,7 +1463,7 @@ void Compiler::fgAllocEHTable() // the maximum number of clauses we will need might be very large. We allocate // twice the number of EH clauses in the IL, which should be good in practice. // In extreme cases, we might need to abandon this and reallocate. See - // fgAddEHTableEntry() for more details. + // fgAddEHTableEntries() for more details. #ifdef DEBUG compHndBBtabAllocCount = info.compXcptnsCount; // force the resizing code to hit more frequently in DEBUG @@ -1676,20 +1676,61 @@ void Compiler::fgRemoveEHTableEntry(unsigned XTnum) } } -/***************************************************************************** - * - * Add a single exception table entry at index 'XTnum', [0 <= XTnum <= compHndBBtabCount]. - * If 'XTnum' is compHndBBtabCount, then add the entry at the end. - * Note that this changes the size of the exception table. - * All the blocks referring to the various index values are updated. - * The table entry itself is not filled in. - * Returns a pointer to the new entry. - */ -EHblkDsc* Compiler::fgAddEHTableEntry(unsigned XTnum) +//------------------------------------------------------------------------ +// fgAddEHTableEntries: add new EH table entries +// +// Arguments: +// XTnum -- new entries will be added before this entry +// (use compHndBBtabCount to add at end) +// count -- number of entries to add +// deferAdding -- if true, don't actually add new entries, just check +// if they can be added; return nullptr if not. +// +// Returns: +// A pointer to the new entry with the highest index, or +// nullptr if the table cannot be expanded to hold the new entries +// +// Notes: +// +// Note that changes the size of the exception table. +// All the blocks referring to the various index values are updated. +// The new table entries are not filled in. +// +// Note mid-table insertions can be expensive as they must walk +// all blocks to update block EH region indices. +// +// If there are active ACDs, these are updated as needed. Callers who +// are making room for cloned EH must take pains to find and clone these +// as well... +// +EHblkDsc* Compiler::fgAddEHTableEntries(unsigned XTnum, unsigned count, bool deferAdding) { - assert(UsesFunclets()); + bool reallocate = false; + bool const insert = (XTnum != compHndBBtabCount); + unsigned const newCount = compHndBBtabCount + count; + + if (newCount > MAX_XCPTN_INDEX) + { + // We have run out of indices. Fail. + // + return nullptr; + } + + if (deferAdding) + { + // We can add count entries... + // + return compHndBBtab; + } + + if (newCount > compHndBBtabAllocCount) + { + // We need to reallocate the table + // + reallocate = true; + } - if (XTnum != compHndBBtabCount) + if (insert) { // Update all enclosing links that will get invalidated by inserting an entry at 'XTnum' @@ -1698,12 +1739,12 @@ EHblkDsc* Compiler::fgAddEHTableEntry(unsigned XTnum) if ((xtab->ebdEnclosingTryIndex != EHblkDsc::NO_ENCLOSING_INDEX) && (xtab->ebdEnclosingTryIndex >= XTnum)) { // Update the enclosing scope link - xtab->ebdEnclosingTryIndex++; + xtab->ebdEnclosingTryIndex += (unsigned short)count; } if ((xtab->ebdEnclosingHndIndex != EHblkDsc::NO_ENCLOSING_INDEX) && (xtab->ebdEnclosingHndIndex >= XTnum)) { // Update the enclosing scope link - xtab->ebdEnclosingHndIndex++; + xtab->ebdEnclosingHndIndex += (unsigned short)count; } } @@ -1713,31 +1754,68 @@ EHblkDsc* Compiler::fgAddEHTableEntry(unsigned XTnum) { if (blk->hasTryIndex() && (blk->getTryIndex() >= XTnum)) { - blk->setTryIndex(blk->getTryIndex() + 1); + blk->setTryIndex(blk->getTryIndex() + count); } if (blk->hasHndIndex() && (blk->getHndIndex() >= XTnum)) { - blk->setHndIndex(blk->getHndIndex() + 1); + blk->setHndIndex(blk->getHndIndex() + count); } } - } - // Increase the number of entries in the EH table by one + // Update impacted ACDs + // + if (fgHasAddCodeDscMap()) + { + AddCodeDscMap* const map = fgGetAddCodeDscMap(); + ArrayStack modified(getAllocator(CMK_Unknown)); - if (compHndBBtabCount == compHndBBtabAllocCount) - { - // We need to reallocate the table + for (AddCodeDsc* const add : AddCodeDscMap::ValueIteration(map)) + { + bool isModified = false; + AddCodeDscKey oldKey(add); + + if (add->acdTryIndex > XTnum) + { + add->acdTryIndex += (unsigned short)count; + isModified = true; + } - if (compHndBBtabAllocCount == MAX_XCPTN_INDEX) - { // We're already at the max size for indices to be unsigned short - IMPL_LIMITATION("too many exception clauses"); + if (add->acdHndIndex > XTnum) + { + isModified = true; + add->acdHndIndex += (unsigned short)count; + } + + if (isModified) + { + add->UpdateKeyDesignator(this); + bool const removed = map->Remove(oldKey); + assert(removed); + modified.Push(add); + } + } + + while (modified.Height() > 0) + { + AddCodeDsc* const add = modified.Pop(); + AddCodeDscKey newKey(add); + JITDUMP("ACD%u updated\n", add->acdNum); + map->Set(newKey, add); + JITDUMPEXEC(add->Dump()); + } } + } - // Double the table size. For stress, we could use +1. Note that if the table isn't allocated + // If necessary, increase the number of entries in the EH table + // + if (reallocate) + { + // Roughly double the table size. Note that if the table isn't allocated // yet, such as when we add an EH region for synchronized methods that don't already have one, // we start at zero, so we need to make sure the new table has at least one entry. - unsigned newHndBBtabAllocCount = max(1u, compHndBBtabAllocCount * 2); + // + unsigned newHndBBtabAllocCount = max(1u, compHndBBtabAllocCount + newCount); noway_assert(compHndBBtabAllocCount < newHndBBtabAllocCount); // check for overflow if (newHndBBtabAllocCount > MAX_XCPTN_INDEX) @@ -1745,21 +1823,21 @@ EHblkDsc* Compiler::fgAddEHTableEntry(unsigned XTnum) newHndBBtabAllocCount = MAX_XCPTN_INDEX; // increase to the maximum size we allow } - JITDUMP("*********** fgAddEHTableEntry: increasing EH table size from %d to %d\n", compHndBBtabAllocCount, + JITDUMP("*********** fgAddEHTableEntries: increasing EH table size from %d to %d\n", compHndBBtabAllocCount, newHndBBtabAllocCount); compHndBBtabAllocCount = newHndBBtabAllocCount; EHblkDsc* newTable = new (this, CMK_BasicBlock) EHblkDsc[compHndBBtabAllocCount]; - // Move over the stuff before the new entry + // Move over the stuff before the new entries memcpy_s(newTable, compHndBBtabAllocCount * sizeof(*compHndBBtab), compHndBBtab, XTnum * sizeof(*compHndBBtab)); if (XTnum != compHndBBtabCount) { // Move over the stuff after the new entry - memcpy_s(newTable + XTnum + 1, (compHndBBtabAllocCount - XTnum - 1) * sizeof(*compHndBBtab), + memcpy_s(newTable + XTnum + count, (compHndBBtabAllocCount - XTnum - 1) * sizeof(*compHndBBtab), compHndBBtab + XTnum, (compHndBBtabCount - XTnum) * sizeof(*compHndBBtab)); } @@ -1770,18 +1848,18 @@ EHblkDsc* Compiler::fgAddEHTableEntry(unsigned XTnum) } else if (XTnum != compHndBBtabCount) { - // Leave the elements before the new element alone. Move the ones after it, to make space. + // Leave the elements before the new elements alone. Move the ones after it, to make space. EHblkDsc* HBtab = compHndBBtab + XTnum; - memmove_s(HBtab + 1, (compHndBBtabAllocCount - XTnum - 1) * sizeof(*compHndBBtab), HBtab, + memmove_s(HBtab + count, (compHndBBtabAllocCount - XTnum - 1) * sizeof(*compHndBBtab), HBtab, (compHndBBtabCount - XTnum) * sizeof(*compHndBBtab)); } // Now the entry is there, but not filled in - - compHndBBtabCount++; - return compHndBBtab + XTnum; + // + compHndBBtabCount = newCount; + return compHndBBtab + XTnum + (count - 1); } /***************************************************************************** diff --git a/src/coreclr/jit/loopcloning.cpp b/src/coreclr/jit/loopcloning.cpp index 2901c0bb2a55bd..98aa551d534188 100644 --- a/src/coreclr/jit/loopcloning.cpp +++ b/src/coreclr/jit/loopcloning.cpp @@ -2094,12 +2094,21 @@ void Compiler::optCloneLoop(FlowGraphNaturalLoop* loop, LoopCloneContext* contex // loop itself. All failed conditions will branch to the slow preheader. // The slow preheader will unconditionally branch to the slow loop header. // This puts the slow loop in the canonical loop form. + // + // The slow preheader needs to go in the same EH region as the preheader. + // If we don't extend, we will fix up region extents when the loop is duplicated. + // JITDUMP("Create unique preheader for slow path loop\n"); - BasicBlock* slowPreheader = fgNewBBafter(BBJ_ALWAYS, newPred, /*extendRegion*/ true); + const bool extendRegion = BasicBlock::sameEHRegion(bottom, preheader); + BasicBlock* slowPreheader = fgNewBBafter(BBJ_ALWAYS, newPred, extendRegion); JITDUMP("Adding " FMT_BB " after " FMT_BB "\n", slowPreheader->bbNum, newPred->bbNum); slowPreheader->bbWeight = newPred->isRunRarely() ? BB_ZERO_WEIGHT : ambientWeight; slowPreheader->CopyFlags(newPred, (BBF_PROF_WEIGHT | BBF_RUN_RARELY)); slowPreheader->scaleBBWeight(LoopCloneContext::slowPathWeightScaleFactor); + if (!extendRegion) + { + slowPreheader->copyEHRegion(preheader); + } newPred = slowPreheader; // Now we'll clone the blocks of the loop body. These cloned blocks will be the slow path. diff --git a/src/coreclr/jit/optimizer.cpp b/src/coreclr/jit/optimizer.cpp index 28a19fe53dbe80..86a744b3e35375 100644 --- a/src/coreclr/jit/optimizer.cpp +++ b/src/coreclr/jit/optimizer.cpp @@ -585,6 +585,8 @@ void Compiler::optSetMappedBlockTargets(BasicBlock* blk, BasicBlock* newBlk, Blo case BBJ_CALLFINALLY: case BBJ_CALLFINALLYRET: case BBJ_LEAVE: + case BBJ_EHCATCHRET: + case BBJ_EHFILTERRET: { FlowEdge* newEdge; @@ -706,16 +708,6 @@ void Compiler::optSetMappedBlockTargets(BasicBlock* blk, BasicBlock* newBlk, Blo break; } - case BBJ_EHCATCHRET: - case BBJ_EHFILTERRET: - { - // newBlk's jump target should not need to be redirected - assert(!redirectMap->Lookup(blk->GetTarget(), &newTarget)); - FlowEdge* newEdge = fgAddRefPred(newBlk->GetTarget(), newBlk); - newBlk->SetKindAndTargetEdge(blk->GetKind(), newEdge); - break; - } - default: // blk doesn't have a jump destination assert(blk->NumSucc() == 0); From b63eb143bf01fcb78fc2c6b8cbf1a375af46a80d Mon Sep 17 00:00:00 2001 From: Andy Ayers Date: Wed, 20 Nov 2024 13:06:05 -0800 Subject: [PATCH 02/17] fixes --- src/coreclr/jit/flowgraph.cpp | 9 ++++++-- src/coreclr/jit/jitconfigvalues.h | 36 +++++++++++++++---------------- 2 files changed, 25 insertions(+), 20 deletions(-) diff --git a/src/coreclr/jit/flowgraph.cpp b/src/coreclr/jit/flowgraph.cpp index 65b91dd723da3b..4f4547eef5f3e9 100644 --- a/src/coreclr/jit/flowgraph.cpp +++ b/src/coreclr/jit/flowgraph.cpp @@ -5977,10 +5977,12 @@ bool FlowGraphNaturalLoop::CanDuplicate(INDEBUG(const char** reason)) } #endif + bool allowEHCloning = false; + INDEBUG(allowEHCloning = (JitConfig.JitCloneLoopsWithEH() > 0);) + Compiler* comp = m_dfsTree->GetCompiler(); BasicBlock* const header = GetHeader(); - const bool allowEHCloning = JitConfig.JitCloneLoopsWithEH() > 0; CompAllocator alloc = comp->getAllocator(CMK_LoopClone); ArrayStack tryRegionsToClone(alloc); @@ -6061,7 +6063,10 @@ void FlowGraphNaturalLoop::Duplicate(BasicBlock** insertAfter, BlockToBlockMap* assert(CanDuplicate(nullptr)); Compiler* comp = m_dfsTree->GetCompiler(); - const bool canCloneTry = JitConfig.JitCloneLoopsWithEH() > 0; + + bool canCloneTry = false; + INDEBUG(canCloneTry = (JitConfig.JitCloneLoopsWithEH() > 0);) + bool clonedTry = false; BasicBlock* const insertionPoint = *insertAfter; diff --git a/src/coreclr/jit/jitconfigvalues.h b/src/coreclr/jit/jitconfigvalues.h index 00be822692d7ba..33ae83dce967af 100644 --- a/src/coreclr/jit/jitconfigvalues.h +++ b/src/coreclr/jit/jitconfigvalues.h @@ -50,24 +50,24 @@ CONFIG_INTEGER(DisplayLoopHoistStats, "JitLoopHoistStats", 0) // Display JIT loo // Recommended to use with JitStdOutFile flag. CONFIG_INTEGER(DisplayLsraStats, "JitLsraStats", 0) -CONFIG_STRING(JitLsraOrdering, W("JitLsraOrdering")) // LSRA heuristics ordering -CONFIG_INTEGER(EnablePCRelAddr, W("JitEnablePCRelAddr"), 1) // Whether absolute addr be encoded as PC-rel offset by - // RyuJIT where possible -CONFIG_INTEGER(JitAssertOnMaxRAPasses, W("JitAssertOnMaxRAPasses"), 0) -CONFIG_INTEGER(JitBreakEmitOutputInstr, W("JitBreakEmitOutputInstr"), -1) -CONFIG_INTEGER(JitBreakMorphTree, W("JitBreakMorphTree"), 0xffffffff) -CONFIG_INTEGER(JitBreakOnBadCode, W("JitBreakOnBadCode"), 0) -CONFIG_INTEGER(JitBreakOnMinOpts, W("JITBreakOnMinOpts"), 0) // Halt if jit switches to MinOpts -CONFIG_INTEGER(JitCloneLoops, W("JitCloneLoops"), 1) // If 0, don't clone. Otherwise clone loops for optimizations. -CONFIG_INTEGER(JitCloneLoopsWithGdvTests, W("JitCloneLoopsWithGdvTests"), 1) // If 0, don't clone loops based on - // invariant type/method address tests -CONFIG_INTEGER(JitCloneLoopsWithEH, W("JitCloneLoopsWithEH"), 1) // If 0, don't clone loops containing EH regions -RELEASE_CONFIG_INTEGER(JitCloneLoopsSizeLimit, W("JitCloneLoopsSizeLimit"), 400) // limit cloning to loops with less - // than this many tree nodes -CONFIG_INTEGER(JitDebugLogLoopCloning, W("JitDebugLogLoopCloning"), 0) // In debug builds log places where loop cloning - // optimizations are performed on the fast path. -CONFIG_INTEGER(JitDefaultFill, W("JitDefaultFill"), 0xdd) // In debug builds, initialize the memory allocated by the nra - // with this byte. +CONFIG_STRING(JitLsraOrdering, "JitLsraOrdering") // LSRA heuristics ordering +CONFIG_INTEGER(EnablePCRelAddr, "JitEnablePCRelAddr", 1) // Whether absolute addr be encoded as PC-rel offset by + // RyuJIT where possible +CONFIG_INTEGER(JitAssertOnMaxRAPasses, "JitAssertOnMaxRAPasses", 0) +CONFIG_INTEGER(JitBreakEmitOutputInstr, "JitBreakEmitOutputInstr", -1) +CONFIG_INTEGER(JitBreakMorphTree, "JitBreakMorphTree", 0xffffffff) +CONFIG_INTEGER(JitBreakOnBadCode, "JitBreakOnBadCode", 0) +CONFIG_INTEGER(JitBreakOnMinOpts, "JITBreakOnMinOpts", 0) // Halt if jit switches to MinOpts +CONFIG_INTEGER(JitCloneLoops, "JitCloneLoops", 1) // If 0, don't clone. Otherwise clone loops for optimizations. +CONFIG_INTEGER(JitCloneLoopsWithEH, "JitCloneLoopsWithEH", 1) // If 0, don't clone loops containing EH regions +CONFIG_INTEGER(JitCloneLoopsWithGdvTests, "JitCloneLoopsWithGdvTests", 1) // If 0, don't clone loops based on + // invariant type/method address tests +RELEASE_CONFIG_INTEGER(JitCloneLoopsSizeLimit, "JitCloneLoopsSizeLimit", 400) // limit cloning to loops with less + // than this many tree nodes +CONFIG_INTEGER(JitDebugLogLoopCloning, "JitDebugLogLoopCloning", 0) // In debug builds log places where loop cloning + // optimizations are performed on the fast path. +CONFIG_INTEGER(JitDefaultFill, "JitDefaultFill", 0xdd) // In debug builds, initialize the memory allocated by the nra + // with this byte. // Minimum weight needed for the first block of a loop to make it a candidate for alignment. CONFIG_INTEGER(JitAlignLoopMinBlockWeight, "JitAlignLoopMinBlockWeight", DEFAULT_ALIGN_LOOP_MIN_BLOCK_WEIGHT) From 4274199b3f279323cc94876d8fe074efea99f973 Mon Sep 17 00:00:00 2001 From: Andy Ayers Date: Wed, 20 Nov 2024 13:58:13 -0800 Subject: [PATCH 03/17] fixes --- src/coreclr/jit/flowgraph.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/coreclr/jit/flowgraph.cpp b/src/coreclr/jit/flowgraph.cpp index 4f4547eef5f3e9..3d64a61b1e32b2 100644 --- a/src/coreclr/jit/flowgraph.cpp +++ b/src/coreclr/jit/flowgraph.cpp @@ -6006,7 +6006,7 @@ bool FlowGraphNaturalLoop::CanDuplicate(INDEBUG(const char** reason)) unsigned outermostBlockIndex = comp->ehTrueEnclosingTryIndexIL(blockIndex); if ((headerInTry && (outermostBlockIndex == header->getTryIndex())) || - !headerInTry && (outermostBlockIndex == EHblkDsc::NO_ENCLOSING_INDEX)) + (!headerInTry && (outermostBlockIndex == EHblkDsc::NO_ENCLOSING_INDEX))) { tryRegionsToClone.Push(block); } From bc8ee56210e8a57e38f2fe9de3d4659ce9164773 Mon Sep 17 00:00:00 2001 From: Andy Ayers Date: Wed, 20 Nov 2024 14:13:39 -0800 Subject: [PATCH 04/17] format --- src/coreclr/jit/flowgraph.cpp | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/coreclr/jit/flowgraph.cpp b/src/coreclr/jit/flowgraph.cpp index 3d64a61b1e32b2..58d8467206c008 100644 --- a/src/coreclr/jit/flowgraph.cpp +++ b/src/coreclr/jit/flowgraph.cpp @@ -5977,13 +5977,13 @@ bool FlowGraphNaturalLoop::CanDuplicate(INDEBUG(const char** reason)) } #endif - bool allowEHCloning = false; + bool allowEHCloning = false; INDEBUG(allowEHCloning = (JitConfig.JitCloneLoopsWithEH() > 0);) Compiler* comp = m_dfsTree->GetCompiler(); BasicBlock* const header = GetHeader(); - CompAllocator alloc = comp->getAllocator(CMK_LoopClone); + CompAllocator alloc = comp->getAllocator(CMK_LoopClone); ArrayStack tryRegionsToClone(alloc); BasicBlockVisit result = VisitLoopBlocks([=, &tryRegionsToClone](BasicBlock* block) { @@ -6062,9 +6062,9 @@ void FlowGraphNaturalLoop::Duplicate(BasicBlock** insertAfter, BlockToBlockMap* { assert(CanDuplicate(nullptr)); - Compiler* comp = m_dfsTree->GetCompiler(); + Compiler* comp = m_dfsTree->GetCompiler(); - bool canCloneTry = false; + bool canCloneTry = false; INDEBUG(canCloneTry = (JitConfig.JitCloneLoopsWithEH() > 0);) bool clonedTry = false; From 109595c24d3fb624f7ae216694a66006adb9e634 Mon Sep 17 00:00:00 2001 From: Andy Ayers Date: Thu, 21 Nov 2024 15:05:42 -0800 Subject: [PATCH 05/17] fixes --- src/coreclr/jit/compiler.h | 7 +++++++ src/coreclr/jit/fgehopt.cpp | 2 +- src/coreclr/jit/flowgraph.cpp | 13 +++++++------ src/coreclr/jit/loopcloning.cpp | 24 +++++++++++++++++++++++- 4 files changed, 38 insertions(+), 8 deletions(-) diff --git a/src/coreclr/jit/compiler.h b/src/coreclr/jit/compiler.h index 770b3ea6590899..28d8d221ba83d5 100644 --- a/src/coreclr/jit/compiler.h +++ b/src/coreclr/jit/compiler.h @@ -12333,6 +12333,13 @@ class EHClauses assert((m_begin != nullptr) || (m_begin == m_end)); } + EHClauses(Compiler* comp, EHblkDsc* begin) + : m_begin(begin) + , m_end(comp->compHndBBtab + comp->compHndBBtabCount) + { + assert((m_begin != nullptr) || (m_begin == m_end)); + } + iterator begin() const { return iterator(m_begin); diff --git a/src/coreclr/jit/fgehopt.cpp b/src/coreclr/jit/fgehopt.cpp index 2d366ba095f203..2f77afc25ceb01 100644 --- a/src/coreclr/jit/fgehopt.cpp +++ b/src/coreclr/jit/fgehopt.cpp @@ -2148,7 +2148,7 @@ BasicBlock* Compiler::fgCloneTryRegion(BasicBlock* tryEntry, { assert(bbIsTryBeg(tryEntry)); bool const deferCloning = (map == nullptr); - assert(deferCloning || (insertAfter != nullptr) && (*insertAfter != nullptr)); + assert(deferCloning || ((insertAfter != nullptr) && (*insertAfter != nullptr))); INDEBUG(const char* msg = deferCloning ? "Checking if it is possible" : "Attempting";) JITDUMP("%s to clone the try region EH#%02u headed by " FMT_BB "\n", msg, tryEntry->getTryIndex(), tryEntry->bbNum); diff --git a/src/coreclr/jit/flowgraph.cpp b/src/coreclr/jit/flowgraph.cpp index 58d8467206c008..34267ebf902df1 100644 --- a/src/coreclr/jit/flowgraph.cpp +++ b/src/coreclr/jit/flowgraph.cpp @@ -6062,14 +6062,13 @@ void FlowGraphNaturalLoop::Duplicate(BasicBlock** insertAfter, BlockToBlockMap* { assert(CanDuplicate(nullptr)); - Compiler* comp = m_dfsTree->GetCompiler(); - - bool canCloneTry = false; - INDEBUG(canCloneTry = (JitConfig.JitCloneLoopsWithEH() > 0);) - + Compiler* const comp = m_dfsTree->GetCompiler(); + bool canCloneTry = false; bool clonedTry = false; BasicBlock* const insertionPoint = *insertAfter; + INDEBUG(canCloneTry = (JitConfig.JitCloneLoopsWithEH() > 0);) + // If the insertion point is within an EH region, remember all the EH regions // current that end at the insertion point, so we can properly extend them // when we're done cloning. @@ -6199,9 +6198,11 @@ void FlowGraphNaturalLoop::Duplicate(BasicBlock** insertAfter, BlockToBlockMap* // region-ending block was cloned, the new region end is the last block cloned). // // Note we don't consult the block references in EH table here, since they - // may reflect interim updates to the region endpoints. + // may reflect interim updates to region endpoints (by fgCloneTry). Otherwise + // we could simply call ehUpdateLastBlocks. // BasicBlock* const lastClonedBlock = *insertAfter; + while (regionEnds.Height() > 0) { RegionEnd r = regionEnds.Pop(); diff --git a/src/coreclr/jit/loopcloning.cpp b/src/coreclr/jit/loopcloning.cpp index 98aa551d534188..3d3a2617212974 100644 --- a/src/coreclr/jit/loopcloning.cpp +++ b/src/coreclr/jit/loopcloning.cpp @@ -2096,7 +2096,6 @@ void Compiler::optCloneLoop(FlowGraphNaturalLoop* loop, LoopCloneContext* contex // This puts the slow loop in the canonical loop form. // // The slow preheader needs to go in the same EH region as the preheader. - // If we don't extend, we will fix up region extents when the loop is duplicated. // JITDUMP("Create unique preheader for slow path loop\n"); const bool extendRegion = BasicBlock::sameEHRegion(bottom, preheader); @@ -2105,9 +2104,32 @@ void Compiler::optCloneLoop(FlowGraphNaturalLoop* loop, LoopCloneContext* contex slowPreheader->bbWeight = newPred->isRunRarely() ? BB_ZERO_WEIGHT : ambientWeight; slowPreheader->CopyFlags(newPred, (BBF_PROF_WEIGHT | BBF_RUN_RARELY)); slowPreheader->scaleBBWeight(LoopCloneContext::slowPathWeightScaleFactor); + + // If we didn't extend the region above (because the last loop + // block was in some enclosed EH region), put the slow preheader + // into the appropriate region, and make appropriate extent updates. + // if (!extendRegion) { slowPreheader->copyEHRegion(preheader); + bool isTry = false; + unsigned enclosingRegion = ehGetMostNestedRegionIndex(slowPreheader, &isTry); + + if (enclosingRegion != 0) + { + EHblkDsc* const ebd = ehGetDsc(enclosingRegion - 1); + for (EHblkDsc* const HBtab : EHClauses(this, ebd)) + { + if (HBtab->ebdTryLast == bottom) + { + fgSetTryEnd(HBtab, slowPreheader); + } + if (HBtab->ebdHndLast == bottom) + { + fgSetHndEnd(HBtab, slowPreheader); + } + } + } } newPred = slowPreheader; From 931d177fa5c4f60388b11909160834ed89a3be4d Mon Sep 17 00:00:00 2001 From: Andy Ayers Date: Thu, 21 Nov 2024 15:23:52 -0800 Subject: [PATCH 06/17] test cases --- src/tests/JIT/opt/Cloning/loops_with_eh.cs | 941 ++++++++++++++++++ .../JIT/opt/Cloning/loops_with_eh.csproj | 17 + 2 files changed, 958 insertions(+) create mode 100644 src/tests/JIT/opt/Cloning/loops_with_eh.cs create mode 100644 src/tests/JIT/opt/Cloning/loops_with_eh.csproj diff --git a/src/tests/JIT/opt/Cloning/loops_with_eh.cs b/src/tests/JIT/opt/Cloning/loops_with_eh.cs new file mode 100644 index 00000000000000..ca0328e78bdaf4 --- /dev/null +++ b/src/tests/JIT/opt/Cloning/loops_with_eh.cs @@ -0,0 +1,941 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +using System; +using System.Runtime.CompilerServices; +using Xunit; + + +// Cheat codes +// +// L - loop +// TC - try catch (catch exits loop) +// TfC - try filter catch +// TF - try finally +// x - has padding between loop head and try entry +// c - catch continues loop +// m - multiple try exits (TF will remain a try finally) +// g - giant finally (TF will remain try finally) +// p - regions are serial, not nested +// +// x: we currently cannot clone loops where the try is the first thing +// as the header and preheader are different regions + +public class LoopsWithEH +{ + static int[] data; + static int n; + + static LoopsWithEH() + { + data = new int[100]; + for (int i = 0; i < data.Length; i++) + { + data[i] = i; + } + + n = data[20]; + } + + [Fact] + public static int Test_LTC() => Sum_LTC(data, n) - 90; + + public static int Sum_LTC(int[] data, int n) + { + int sum = 0; + for (int i = 0; i < n; i++) + { + try + { + sum += data[i]; + } + catch (Exception) + { + return -1; + } + } + return sum; + } + + [Fact] + public static int Test_LTfC() => Sum_LTfC(data, n) - 90; + + public static int Sum_LTfC(int[] data, int n) + { + int sum = 0; + for (int i = 0; i < n; i++) + { + try + { + sum += data[i]; + } + catch (Exception) when (n > 0) + { + return -1; + } + } + return sum; + } + + [Fact] + public static int Test_LxTC() => Sum_LxTC(data, n) - 110; + + public static int Sum_LxTC(int[] data, int n) + { + int sum = 0; + for (int i = 0; i < n; i++) + { + sum += 1; + try + { + sum += data[i]; + } + catch (Exception) + { + return -1; + } + } + return sum; + } + + [Fact] + public static int Test_LxTCc() => Sum_LxTCc(data, n) - 110; + + public static int Sum_LxTCc(int[] data, int n) + { + int sum = 0; + for (int i = 0; i < n; i++) + { + sum += 1; + try + { + sum += data[i]; + } + catch (Exception) + { + sum += 1; + } + } + return sum; + } + + [Fact] + public static int Test_LxTfC() => Sum_LxTfC(data, n) - 110; + + public static int Sum_LxTfC(int[] data, int n) + { + int sum = 0; + for (int i = 0; i < n; i++) + { + sum += 1; + try + { + sum += data[i]; + } + catch (Exception) when (n > 0) + { + return -1; + } + } + return sum; + } + + [Fact] + public static int Test_LxTfCc() => Sum_LxTfCc(data, n) - 110; + + public static int Sum_LxTfCc(int[] data, int n) + { + int sum = 0; + for (int i = 0; i < n; i++) + { + sum += 1; + try + { + sum += data[i]; + } + catch (Exception) when (n > 0) + { + sum += 1; + } + } + return sum; + } + + [Fact] + public static int Test_LxTCC() => Sum_LxTCC(data, n) - 110; + + public static int Sum_LxTCC(int[] data, int n) + { + int sum = 0; + for (int i = 0; i < n; i++) + { + sum += 1; + try + { + sum += data[i]; + } + catch (IndexOutOfRangeException) + { + return -1; + } + catch(Exception) + { + return -2; + } + } + return sum; + } + + [Fact] + public static int Test_LxTCcC() => Sum_LxTCcC(data, n) - 110; + + public static int Sum_LxTCcC(int[] data, int n) + { + int sum = 0; + for (int i = 0; i < n; i++) + { + sum += 1; + try + { + sum += data[i]; + } + catch (IndexOutOfRangeException) + { + sum +=1; + } + catch (Exception) + { + return -2; + } + } + return sum; + } + + [Fact] + public static int Test_LxTCCc() => Sum_LxTCCc(data, n) - 110; + + public static int Sum_LxTCCc(int[] data, int n) + { + int sum = 0; + for (int i = 0; i < n; i++) + { + sum += 1; + try + { + sum += data[i]; + } + catch (IndexOutOfRangeException) + { + return -1; + } + catch (Exception) + { + sum += 2; + } + } + return sum; + } + + [Fact] + public static int Test_LxTCcCc() => Sum_LxTCcCc(data, n) - 110; + + public static int Sum_LxTCcCc(int[] data, int n) + { + int sum = 0; + for (int i = 0; i < n; i++) + { + sum += 1; + try + { + sum += data[i]; + } + catch (IndexOutOfRangeException) + { + sum += 1; + } + catch (Exception) + { + sum += 2; + } + } + return sum; + } + + [Fact] + public static int Test_LxTCpTC() => Sum_LxTCpTC(data, n) - 300; + + public static int Sum_LxTCpTC(int[] data, int n) + { + int sum = 0; + for (int i = 0; i < n; i++) + { + sum += 1; + try + { + sum += data[i]; + } + catch (Exception) + { + return -1; + } + + try + { + sum += data[i]; + } + catch (Exception) + { + return -2; + } + + } + return sum; + } + + [Fact] + public static int Test_LxTCcpTC() => Sum_LxTCcpTC(data, n) - 300; + + public static int Sum_LxTCcpTC(int[] data, int n) + { + int sum = 0; + for (int i = 0; i < n; i++) + { + sum += 1; + try + { + sum += data[i]; + } + catch (Exception) + { + sum += 1; + } + + try + { + sum += data[i]; + } + catch (Exception) + { + return -2; + } + + } + return sum; + } + + [Fact] + public static int Test_LxTCpTCc() => Sum_LxTCpTCc(data, n) - 300; + + public static int Sum_LxTCpTCc(int[] data, int n) + { + int sum = 0; + for (int i = 0; i < n; i++) + { + sum += 1; + try + { + sum += data[i]; + } + catch (Exception) + { + return -1; + } + + try + { + sum += data[i]; + } + catch (Exception) + { + sum += 1; + } + + } + return sum; + } + + [Fact] + public static int Test_LxTCcpTCc() => Sum_LxTCcpTCc(data, n) - 300; + + public static int Sum_LxTCcpTCc(int[] data, int n) + { + int sum = 0; + for (int i = 0; i < n; i++) + { + sum += 1; + try + { + sum += data[i]; + } + catch (Exception) + { + sum += 2; + } + + try + { + sum += data[i]; + } + catch (Exception) + { + sum += 1; + } + + } + return sum; + } + + [Fact] + public static int Test_LxTF() => Sum_LxTF(data, n) - 130; + + public static int Sum_LxTF(int[] data, int n) + { + int sum = 0; + for (int i = 0; i < n; i++) + { + sum += 1; + try + { + sum += data[i]; + } + finally + { + sum += 1; + } + } + return sum; + } + + [Fact] + public static int Test_LxTFm() => Sum_LxTFm(data, n) - 1; + + public static int Sum_LxTFm(int[] data, int n) + { + int sum = 0; + for (int i = 0; i < n; i++) + { + sum += 1; + try + { + sum += data[i]; + + if (sum > 100) return 101; + } + finally + { + sum += 1; + } + } + return sum; + } + + [Fact] + public static int Test_LxTFg() => Sum_LxTFg(data, n) - 1; + + public static int Sum_LxTFg(int[] data, int n) + { + int sum = 0; + for (int i = 0; i < n; i++) + { + sum += 1; + try + { + sum += data[i]; + + if (sum > 100) return 101; + } + finally + { + sum += 1; sum *= 4; sum -= 1; sum /= 4; + sum += 1; sum *= 4; sum -= 1; sum /= 4; + sum += 1; sum *= 4; sum -= 1; sum /= 4; + sum += 1; sum *= 4; sum -= 1; sum /= 4; + sum += 1; sum *= 4; sum -= 1; sum /= 4; + sum += 1; sum *= 4; sum -= 1; sum /= 4; + sum += 1; sum *= 4; sum -= 1; sum /= 4; + sum += 1; sum *= 4; sum -= 1; sum /= 4; + } + } + return sum; + } + + [Fact] + public static int Test_TCLxTC() => Sum_TCLxTC(data, n) - 110; + + public static int Sum_TCLxTC(int[] data, int n) + { + int sum = 0; + try + { + for (int i = 0; i < n; i++) + { + sum += 1; + try + { + sum += data[i]; + } + catch (Exception) + { + return -1; + } + } + } + catch (Exception) + { + return -1; + } + return sum; + } + + [Fact] + public static int Test_TCLxTCc() => Sum_TCLxTCc(data, n) - 110; + + public static int Sum_TCLxTCc(int[] data, int n) + { + int sum = 0; + try + { + for (int i = 0; i < n; i++) + { + sum += 1; + try + { + sum += data[i]; + } + catch (Exception) + { + sum += 1; + } + } + } + catch (Exception) + { + return -1; + } + return sum; + } + + [Fact] + public static int Test_TCLxTfC() => Sum_TCLxTfC(data, n) - 110; + + public static int Sum_TCLxTfC(int[] data, int n) + { + int sum = 0; + try + { + for (int i = 0; i < n; i++) + { + sum += 1; + try + { + sum += data[i]; + } + catch (Exception) when (n > 0) + { + return -1; + } + } + } + catch (Exception) + { + return -1; + } + return sum; + } + + [Fact] + public static int Test_TfCLxTC() => Sum_TfCLxTC(data, n) - 110; + + public static int Sum_TfCLxTC(int[] data, int n) + { + int sum = 0; + try + { + for (int i = 0; i < n; i++) + { + sum += 1; + try + { + sum += data[i]; + } + catch (Exception) + { + return -1; + } + } + } + catch (Exception) when (n > 0) + { + return -1; + } + return sum; + } + + [Fact] + public static int Test_TfCLxTCc() => Sum_TfCLxTCc(data, n) - 110; + + public static int Sum_TfCLxTCc(int[] data, int n) + { + int sum = 0; + try + { + for (int i = 0; i < n; i++) + { + sum += 1; + try + { + sum += data[i]; + } + catch (Exception) + { + sum += 1; + } + } + } + catch (Exception) when (n > 0) + { + return -1; + } + return sum; + } + + [Fact] + public static int Test_TfCLxTfC() => Sum_TfCLxTfC(data, n) - 110; + + public static int Sum_TfCLxTfC(int[] data, int n) + { + int sum = 0; + try + { + for (int i = 0; i < n; i++) + { + sum += 1; + try + { + sum += data[i]; + } + catch (Exception) when (n > 0) + { + return -1; + } + } + } + catch (Exception) when (n > 0) + { + return -1; + } + return sum; + } + + [Fact] + public static int Test_TCLxTF() => Sum_TCLxTF(data, n) - 130; + + public static int Sum_TCLxTF(int[] data, int n) + { + int sum = 0; + try + { + for (int i = 0; i < n; i++) + { + sum += 1; + try + { + sum += data[i]; + } + finally + { + sum += 1; + } + } + } + catch (Exception) + { + return -1; + } + return sum; + } + + [Fact] + public static int Test_LxTCTF() => Sum_LxTCTF(data, n) - 130; + + public static int Sum_LxTCTF(int[] data, int n) + { + int sum = 0; + + for (int i = 0; i < n; i++) + { + sum += 1; + try + { + try + { + sum += data[i]; + } + finally + { + sum += 1; + } + } + catch (Exception) + { + return -1; + } + } + + return sum; + } + + [Fact] + public static int Test_LxTCcTF() => Sum_LxTCcTF(data, n) - 130; + + public static int Sum_LxTCcTF(int[] data, int n) + { + int sum = 0; + + for (int i = 0; i < n; i++) + { + sum += 1; + try + { + try + { + sum += data[i]; + } + finally + { + sum += 1; + } + } + catch (Exception) + { + sum += 2; + } + } + + return sum; + } + + [Fact] + public static int Test_LxTFTC() => Sum_LxTFTC(data, n) - 130; + + public static int Sum_LxTFTC(int[] data, int n) + { + int sum = 0; + + for (int i = 0; i < n; i++) + { + sum += 1; + try + { + try + { + sum += data[i]; + } + catch (Exception) + { + return -1; + } + } + finally + { + sum += 1; + } + } + + return sum; + } + + [Fact] + public static int Test_LxTFTCc() => Sum_LxTFTCc(data, n) - 130; + + public static int Sum_LxTFTCc(int[] data, int n) + { + int sum = 0; + + for (int i = 0; i < n; i++) + { + sum += 1; + try + { + try + { + sum += data[i]; + } + catch (Exception) + { + sum += 2; + } + } + finally + { + sum += 1; + } + } + + return sum; + } + + [Fact] + public static int Test_LxTFTF() => Sum_LxTFTF(data, n) - 110; + + public static int Sum_LxTFTF(int[] data, int n) + { + int sum = 0; + + for (int i = 0; i < n; i++) + { + sum += 1; + try + { + try + { + sum += data[i]; + } + finally + { + sum += -1; + } + } + finally + { + sum += 1; + } + } + + return sum; + } + + [Fact] + public static int Test_LxTFxTF() => Sum_LxTFTF(data, n) - 110; + + public static int Sum_TFLxTF(int[] data, int n) + { + int sum = 0; + try + { + for (int i = 0; i < n; i++) + { + sum += 1; + try + { + sum += data[i]; + } + finally + { + sum += 1; + } + } + } + finally + { + sum += 1; + } + return sum; + } + + [Fact] + public static int Test_TFTFLxTF() => Sum_TFTFLxTF(data, n) - 132; + + public static int Sum_TFTFLxTF(int[] data, int n) + { + int sum = 0; + try + { + try + { + for (int i = 0; i < n; i++) + { + sum += 1; + try + { + sum += data[i]; + } + finally + { + sum += 1; + } + } + } + finally + { + sum += 1; + } + } + finally + { + sum += 1; + } + return sum; + } + + [Fact] + public static int Test_TCTFLxTF() => Sum_TCTFLxTF(data, n) - 131; + + public static int Sum_TCTFLxTF(int[] data, int n) + { + int sum = 0; + try + { + try + { + for (int i = 0; i < n; i++) + { + sum += 1; + try + { + sum += data[i]; + } + finally + { + sum += 1; + } + } + } + finally + { + sum += 1; + } + } + catch(Exception) + { + return -1; + } + return sum; + } + + [Fact] + public static int Test_TFTCLxTF() => Sum_TCTFLxTF(data, n) - 131; + + public static int Sum_TFTCLxTF(int[] data, int n) + { + int sum = 0; + try + { + try + { + for (int i = 0; i < n; i++) + { + sum += 1; + try + { + sum += data[i]; + } + finally + { + sum += 1; + } + } + } + catch (Exception) + { + return -1; + } + } + finally + { + sum += 1; + } + return sum; + } +} + diff --git a/src/tests/JIT/opt/Cloning/loops_with_eh.csproj b/src/tests/JIT/opt/Cloning/loops_with_eh.csproj new file mode 100644 index 00000000000000..f071d60bc864f0 --- /dev/null +++ b/src/tests/JIT/opt/Cloning/loops_with_eh.csproj @@ -0,0 +1,17 @@ + + + + true + + + None + True + + + + true + + + + + From 11ba46a99644c158f3c4df899908048455507a8b Mon Sep 17 00:00:00 2001 From: Andy Ayers Date: Thu, 21 Nov 2024 15:48:07 -0800 Subject: [PATCH 07/17] refactor a bit; get rid of BlockSet --- src/coreclr/jit/compiler.h | 14 ++++++- src/coreclr/jit/fgehopt.cpp | 74 ++++++++++++++++------------------- src/coreclr/jit/flowgraph.cpp | 25 +++++++----- 3 files changed, 62 insertions(+), 51 deletions(-) diff --git a/src/coreclr/jit/compiler.h b/src/coreclr/jit/compiler.h index 28d8d221ba83d5..16a94c8108bedb 100644 --- a/src/coreclr/jit/compiler.h +++ b/src/coreclr/jit/compiler.h @@ -5418,10 +5418,20 @@ class Compiler PhaseStatus fgCloneFinally(); + struct CloneTryInfo + { + CloneTryInfo(BitVecTraits& traits, BitVec& visited) : m_traits(traits), m_visited(visited) {} + BitVecTraits m_traits; + BitVec& m_visited; + BlockToBlockMap* m_map = nullptr; + bool m_addEdges = false; + weight_t m_profileScale = 0.0; + unsigned m_ehRegionShift = 0; + }; + bool fgCanCloneTryRegion(BasicBlock* tryEntry); - BasicBlock* fgCloneTryRegion(BasicBlock* tryEntry, BlockSet& visited, BlockToBlockMap* map, bool addEdges, - weight_t profileScale, BasicBlock** insertAfter, unsigned* ehRegionShift); + BasicBlock* fgCloneTryRegion(BasicBlock* tryEntry, CloneTryInfo& info, BasicBlock** insertAfter = nullptr); void fgCleanupContinuation(BasicBlock* continuation); diff --git a/src/coreclr/jit/fgehopt.cpp b/src/coreclr/jit/fgehopt.cpp index 2f77afc25ceb01..ef9db2dd984a55 100644 --- a/src/coreclr/jit/fgehopt.cpp +++ b/src/coreclr/jit/fgehopt.cpp @@ -2103,52 +2103,44 @@ PhaseStatus Compiler::fgTailMergeThrows() // // Arguments: // tryEntry -- try entry block -// visited -- [in, out] blocks visited during cloning -// map -- [in, out] block mapping original try region blocks to their clones -// addEdges -- if true, add flow edges to the cloned blocks -// profileScale -- scale profile weight by this factor +// info -- [in, out] information about the cloning // insertAfter -- [in, out] pointer to block to insert new blocks after -// ehIndexShift -- [out]amount by which indices of enclosing EH regions have shifted // // Returns: -// If map == nullptr, check if cloning is possible +// If insertAfter == nullptr, check if cloning is possible // return nullptr if not, tryEntry if so // else // Return the cloned try entry, or nullptr if cloning failed -// map will be modified to contain keys and for the blocks to cloned. -// insertionPoint will point at the lexcially last block cloned -// ehIndexShift will indicate the amount by whcih enclosing EH region indices have changed +// cloned blocks will be created and scaled by profile weight +// and if info.m_addEdges is true have proper bbkinds and flow edges +// info data will be updated: +// m_map will be modified to contain keys and for the blocks cloned +// m_visited will include bits for each newly cloned block +// m_ehRegionShift will describe number of EH regions added +// insertAfter will point at the lexcially last block cloned // // Notes: -// * If map is nullptr, this call only checks if cloning is possible +// * if insertAfter is non null, map must also be non null // -// * If map is not nullptr, it is not modified unless cloning succeeds +// * If info.m_map is not nullptr, it is not modified unless cloning succeeds // When cloning succeeds, entries for the try blocks and related blocks // (handler, filter, callfinally) will be updated; other map entries will // be left as they were // -// * insertAfter can be nullptr, if map is nullptr -// The insertion point must be lexically after the original try region -// and be in the enclosing region for the original try -// -// * ehRegionShift can be nullptr, if map is nullptr +// * the insertion point must be lexically after the original try region +// and be a block in the enclosing region for the original try. // // * If cloning and adding edges, // The new try region entry will not be reachable by any uncloned block. // The new try region exits will target the same uncloned blocks as the original, // or as directed by pre-existing map entries. // -BasicBlock* Compiler::fgCloneTryRegion(BasicBlock* tryEntry, - BlockSet& visited, - BlockToBlockMap* map, - bool addEdges, - weight_t profileScale, - BasicBlock** insertAfter, - unsigned* ehRegionShift) +BasicBlock* Compiler::fgCloneTryRegion(BasicBlock* tryEntry, CloneTryInfo& info, BasicBlock** insertAfter) + { assert(bbIsTryBeg(tryEntry)); - bool const deferCloning = (map == nullptr); - assert(deferCloning || ((insertAfter != nullptr) && (*insertAfter != nullptr))); + bool const deferCloning = (insertAfter == nullptr); + assert(deferCloning || ((*insertAfter != nullptr) && (info.m_map != nullptr))); INDEBUG(const char* msg = deferCloning ? "Checking if it is possible" : "Attempting";) JITDUMP("%s to clone the try region EH#%02u headed by " FMT_BB "\n", msg, tryEntry->getTryIndex(), tryEntry->bbNum); @@ -2167,9 +2159,12 @@ BasicBlock* Compiler::fgCloneTryRegion(BasicBlock* tryEntry, unsigned const tryIndex = tryEntry->getTryIndex(); jitstd::vector blocks(alloc); unsigned regionCount = 0; + BitVecTraits* const traits = &info.m_traits; + BitVec& visited = info.m_visited; + BlockToBlockMap* const map = info.m_map; auto addBlockToClone = [=, &blocks, &visited](BasicBlock* block, const char* msg) { - if (!BlockSetOps::TryAddElemD(this, visited, block->bbNum)) + if (!BitVecOps::TryAddElemD(traits, visited, block->bbNum)) { return false; } @@ -2196,10 +2191,10 @@ BasicBlock* Compiler::fgCloneTryRegion(BasicBlock* tryEntry, BasicBlock* const firstTryBlock = ebd->ebdTryBeg; BasicBlock* const lastTryBlock = ebd->ebdTryLast; - if (BlockSetOps::IsMember(this, visited, firstTryBlock->bbNum)) + if (BitVecOps::IsMember(traits, visited, firstTryBlock->bbNum)) { JITDUMP("already walked try region for EH#%02u\n", regionIndex); - assert(BlockSetOps::IsMember(this, visited, lastTryBlock->bbNum)); + assert(BitVecOps::IsMember(traits, visited, lastTryBlock->bbNum)); } else { @@ -2251,10 +2246,10 @@ BasicBlock* Compiler::fgCloneTryRegion(BasicBlock* tryEntry, BasicBlock* const firstFltBlock = ebd->ebdFilter; BasicBlock* const lastFltBlock = ebd->BBFilterLast(); - if (BlockSetOps::IsMember(this, visited, firstFltBlock->bbNum)) + if (BitVecOps::IsMember(traits, visited, firstFltBlock->bbNum)) { JITDUMP("already walked filter region for EH#%02u\n", regionIndex); - assert(BlockSetOps::IsMember(this, visited, lastFltBlock->bbNum)); + assert(BitVecOps::IsMember(traits, visited, lastFltBlock->bbNum)); } else { @@ -2274,10 +2269,10 @@ BasicBlock* Compiler::fgCloneTryRegion(BasicBlock* tryEntry, BasicBlock* const firstHndBlock = ebd->ebdHndBeg; BasicBlock* const lastHndBlock = ebd->ebdHndLast; - if (BlockSetOps::IsMember(this, visited, firstHndBlock->bbNum)) + if (BitVecOps::IsMember(traits, visited, firstHndBlock->bbNum)) { JITDUMP("already walked handler region for EH#%02u\n", regionIndex); - assert(BlockSetOps::IsMember(this, visited, lastHndBlock->bbNum)); + assert(BitVecOps::IsMember(traits, visited, lastHndBlock->bbNum)); } else { @@ -2392,7 +2387,7 @@ BasicBlock* Compiler::fgCloneTryRegion(BasicBlock* tryEntry, // Callers will see enclosing EH region indices shift by this much // - *ehRegionShift = regionCount; + info.m_ehRegionShift = regionCount; // The EH table now looks like the following, for a middle insertion: // @@ -2499,7 +2494,7 @@ BasicBlock* Compiler::fgCloneTryRegion(BasicBlock* tryEntry, (*insertAfter)->bbNum); map->Set(block, newBlock, BlockToBlockMap::SetKind::Overwrite); BasicBlock::CloneBlockState(this, newBlock, block); - newBlock->scaleBBWeight(profileScale); + newBlock->scaleBBWeight(info.m_profileScale); *insertAfter = newBlock; } JITDUMP("Done cloning blocks for try...\n"); @@ -2604,7 +2599,7 @@ BasicBlock* Compiler::fgCloneTryRegion(BasicBlock* tryEntry, // Redirect any branches within the newly-cloned blocks or // from cloned blocks to non-cloned blocks // - if (addEdges) + if (info.m_addEdges) { JITDUMP("Adding edges in the newly cloned try\n"); for (BasicBlock* const block : BlockToBlockMap::KeyIteration(map)) @@ -2741,11 +2736,10 @@ BasicBlock* Compiler::fgCloneTryRegion(BasicBlock* tryEntry, bool Compiler::fgCanCloneTryRegion(BasicBlock* tryEntry) { assert(bbIsTryBeg(tryEntry)); - EnsureBasicBlockEpoch(); - BlockSet visited(BlockSetOps::MakeEmpty(this)); - BasicBlock* const result = - fgCloneTryRegion(tryEntry, visited, /* map */ nullptr, /* addEdges */ false, /* scaleProfile */ 0.0, - /* insertAfter */ nullptr, /* ehRegionShift */ nullptr); + BitVecTraits traits(compBasicBlockID, this); + BitVec visited = BitVecOps::MakeEmpty(&traits); + CloneTryInfo info(traits, visited); + BasicBlock* const result = fgCloneTryRegion(tryEntry, info); return result != nullptr; } diff --git a/src/coreclr/jit/flowgraph.cpp b/src/coreclr/jit/flowgraph.cpp index 34267ebf902df1..8e8ae80c1f8fec 100644 --- a/src/coreclr/jit/flowgraph.cpp +++ b/src/coreclr/jit/flowgraph.cpp @@ -6130,16 +6130,19 @@ void FlowGraphNaturalLoop::Duplicate(BasicBlock** insertAfter, BlockToBlockMap* // Keep track of how much the region end EH indices change because of EH region cloning. // unsigned ehRegionShift = 0; - comp->EnsureBasicBlockEpoch(); - BlockSet visited(BlockSetOps::MakeEmpty(comp)); - VisitLoopBlocksLexical([=, &visited, &clonedTry, &ehRegionShift](BasicBlock* blk) { + // Keep track of which blocks were handled by EH region cloning + // + BitVecTraits traits(comp->compBasicBlockID, comp); + BitVec visited(BitVecOps::MakeEmpty(&traits)); + + VisitLoopBlocksLexical([=, &traits, &visited, &clonedTry, &ehRegionShift](BasicBlock* blk) { if (canCloneTry) { // If we allow cloning loops with EH, we may have already handled // this loop block as part of a containing try region. // - if (BlockSetOps::IsMember(comp, visited, blk->bbNum)) + if (BitVecOps::IsMember(&traits, visited, blk->bbNum)) { return BasicBlockVisit::Continue; } @@ -6151,11 +6154,15 @@ void FlowGraphNaturalLoop::Duplicate(BasicBlock** insertAfter, BlockToBlockMap* // if (comp->bbIsTryBeg(blk)) { - unsigned regionShift = 0; - BasicBlock* const clonedBlock = comp->fgCloneTryRegion(blk, visited, map, /* addEdges */ false, - weightScale, insertAfter, ®ionShift); + Compiler::CloneTryInfo info(traits, visited); + info.m_map = map; + info.m_addEdges = false; + info.m_profileScale = weightScale; + + BasicBlock* const clonedBlock = comp->fgCloneTryRegion(blk, info, insertAfter); + assert(clonedBlock != nullptr); - ehRegionShift += regionShift; + ehRegionShift += info.m_ehRegionShift; clonedTry = true; return BasicBlockVisit::Continue; } @@ -6166,7 +6173,7 @@ void FlowGraphNaturalLoop::Duplicate(BasicBlock** insertAfter, BlockToBlockMap* // assert(!comp->bbIsTryBeg(blk)); assert(!comp->bbIsHandlerBeg(blk)); - assert(!BlockSetOps::IsMember(comp, visited, blk->bbNum)); + assert(!BitVecOps::IsMember(&traits, visited, blk->bbNum)); } // `blk` was not in loop-enclosed try region or companion region. From 55d4b574a0024cac14118e77fc46374d1490aab8 Mon Sep 17 00:00:00 2001 From: Andy Ayers Date: Fri, 22 Nov 2024 09:13:30 -0800 Subject: [PATCH 08/17] disable by default, fix up some comments --- src/coreclr/jit/fgehopt.cpp | 19 +++++++++---------- src/coreclr/jit/jitconfigvalues.h | 2 +- 2 files changed, 10 insertions(+), 11 deletions(-) diff --git a/src/coreclr/jit/fgehopt.cpp b/src/coreclr/jit/fgehopt.cpp index ef9db2dd984a55..99f482adda742f 100644 --- a/src/coreclr/jit/fgehopt.cpp +++ b/src/coreclr/jit/fgehopt.cpp @@ -2307,7 +2307,7 @@ BasicBlock* Compiler::fgCloneTryRegion(BasicBlock* tryEntry, CloneTryInfo& info, JITDUMP("<== finished try EH#%02u\n", regionIndex); } - // Find the outermost mututal-protect try egion that begins at tryEntry + // Find the outermost mututal-protect try region that begins at tryEntry // EHblkDsc* const tryEbd = ehGetDsc(tryIndex); unsigned outermostTryIndex = tryIndex; @@ -2336,8 +2336,7 @@ BasicBlock* Compiler::fgCloneTryRegion(BasicBlock* tryEntry, CloneTryInfo& info, enclosingHndIndex = tryEntry->getHndIndex(); } - // Now blocks contains an entry for each block to clone, - // and maps (if non-null) contains a key for each block to clone, with nullptr value + // Now blocks contains an entry for each block to clone. // JITDUMP("Will need to clone %u EH regions (outermost: EH#%02u) and %u blocks\n", regionCount, outermostTryIndex, blocks.size()); @@ -2392,10 +2391,10 @@ BasicBlock* Compiler::fgCloneTryRegion(BasicBlock* tryEntry, CloneTryInfo& info, // The EH table now looks like the following, for a middle insertion: // // =================== - // EH 0 -- unrelated regions + // EH 0 -- unrelated regions // ... // --------------- - // EH x -- innermost region to clone + // EH x -- innermost region to clone // ... // EH x + regionCount - 1 -- outermost region to clone // --------------- @@ -2412,20 +2411,20 @@ BasicBlock* Compiler::fgCloneTryRegion(BasicBlock* tryEntry, CloneTryInfo& info, // And like this, for an end insertion: // // =================== - // EH 0 -- unrelated regions + // EH 0 -- unrelated regions // ... // --------------- - // EH x -- innermost region to clone + // EH x -- innermost region to clone // ... // EH x + regionCount - 1 -- outermost region to clone // --------------- // ... - // EH k -- unrelated regions + // EH k -- unrelated regions // ... // --------------- - // EH x + regionCount -- innermost cloned region + // EH c -- innermost cloned region // ... - // EH x + 2*regionCount - 1 -- outermost cloned region + // EH c + regionCount - 1 -- outermost cloned region // --------------- // =================== // diff --git a/src/coreclr/jit/jitconfigvalues.h b/src/coreclr/jit/jitconfigvalues.h index b12deda7f5b2d6..e50f419e28b745 100644 --- a/src/coreclr/jit/jitconfigvalues.h +++ b/src/coreclr/jit/jitconfigvalues.h @@ -59,7 +59,7 @@ CONFIG_INTEGER(JitBreakMorphTree, "JitBreakMorphTree", 0xffffffff) CONFIG_INTEGER(JitBreakOnBadCode, "JitBreakOnBadCode", 0) CONFIG_INTEGER(JitBreakOnMinOpts, "JITBreakOnMinOpts", 0) // Halt if jit switches to MinOpts CONFIG_INTEGER(JitCloneLoops, "JitCloneLoops", 1) // If 0, don't clone. Otherwise clone loops for optimizations. -CONFIG_INTEGER(JitCloneLoopsWithEH, "JitCloneLoopsWithEH", 1) // If 0, don't clone loops containing EH regions +CONFIG_INTEGER(JitCloneLoopsWithEH, "JitCloneLoopsWithEH", 0) // If 0, don't clone loops containing EH regions CONFIG_INTEGER(JitCloneLoopsWithGdvTests, "JitCloneLoopsWithGdvTests", 1) // If 0, don't clone loops based on // invariant type/method address tests RELEASE_CONFIG_INTEGER(JitCloneLoopsSizeLimit, "JitCloneLoopsSizeLimit", 400) // limit cloning to loops with less From 8ff37413cd2eb7e7d9efe9a3839f915e51cf6f52 Mon Sep 17 00:00:00 2001 From: Andy Ayers Date: Tue, 26 Nov 2024 08:12:03 -0800 Subject: [PATCH 09/17] pick up changes from de-abstraction integration --- src/coreclr/jit/compiler.h | 1 + src/coreclr/jit/fgehopt.cpp | 42 ++++++++++++++++++++++++------------- 2 files changed, 28 insertions(+), 15 deletions(-) diff --git a/src/coreclr/jit/compiler.h b/src/coreclr/jit/compiler.h index 599d20d9071556..4ce8ed94445665 100644 --- a/src/coreclr/jit/compiler.h +++ b/src/coreclr/jit/compiler.h @@ -5364,6 +5364,7 @@ class Compiler BitVecTraits m_traits; BitVec& m_visited; BlockToBlockMap* m_map = nullptr; + jitstd::vector* m_blocksToClone = nullptr; bool m_addEdges = false; weight_t m_profileScale = 0.0; unsigned m_ehRegionShift = 0; diff --git a/src/coreclr/jit/fgehopt.cpp b/src/coreclr/jit/fgehopt.cpp index 99f482adda742f..a773a1b5a5a69d 100644 --- a/src/coreclr/jit/fgehopt.cpp +++ b/src/coreclr/jit/fgehopt.cpp @@ -2136,13 +2136,12 @@ PhaseStatus Compiler::fgTailMergeThrows() // or as directed by pre-existing map entries. // BasicBlock* Compiler::fgCloneTryRegion(BasicBlock* tryEntry, CloneTryInfo& info, BasicBlock** insertAfter) - { assert(bbIsTryBeg(tryEntry)); bool const deferCloning = (insertAfter == nullptr); assert(deferCloning || ((*insertAfter != nullptr) && (info.m_map != nullptr))); - INDEBUG(const char* msg = deferCloning ? "Checking if it is possible" : "Attempting";) - JITDUMP("%s to clone the try region EH#%02u headed by " FMT_BB "\n", msg, tryEntry->getTryIndex(), tryEntry->bbNum); + INDEBUG(const char* msg = deferCloning ? "Checking if it is possible to clone" : "Cloning";) + JITDUMP("%s the try region EH#%02u headed by " FMT_BB "\n", msg, tryEntry->getTryIndex(), tryEntry->bbNum); // Determine the extent of cloning. // @@ -2154,14 +2153,23 @@ BasicBlock* Compiler::fgCloneTryRegion(BasicBlock* tryEntry, CloneTryInfo& info, // This is necessary because try regions can't have multiple entries, or // share parts in any meaningful way. // - CompAllocator alloc = getAllocator(CMK_TryRegionClone); - ArrayStack regionsToProcess(alloc); - unsigned const tryIndex = tryEntry->getTryIndex(); - jitstd::vector blocks(alloc); - unsigned regionCount = 0; - BitVecTraits* const traits = &info.m_traits; - BitVec& visited = info.m_visited; - BlockToBlockMap* const map = info.m_map; + CompAllocator alloc = getAllocator(CMK_TryRegionClone); + ArrayStack regionsToProcess(alloc); + unsigned const tryIndex = tryEntry->getTryIndex(); + + // Track blocks to clone for caller, or if we are cloning and + // caller doesn't care. + // + jitstd::vector* blocks = info.m_blocksToClone; + if (!deferCloning && (blocks == nullptr)) + { + blocks = new (alloc) jitstd::vector(alloc); + } + + unsigned regionCount = 0; + BitVecTraits* const traits = &info.m_traits; + BitVec& visited = info.m_visited; + BlockToBlockMap* const map = info.m_map; auto addBlockToClone = [=, &blocks, &visited](BasicBlock* block, const char* msg) { if (!BitVecOps::TryAddElemD(traits, visited, block->bbNum)) @@ -2170,7 +2178,11 @@ BasicBlock* Compiler::fgCloneTryRegion(BasicBlock* tryEntry, CloneTryInfo& info, } JITDUMP(" %s block " FMT_BB "\n", msg, block->bbNum); - blocks.push_back(block); + + if (blocks != nullptr) + { + blocks->push_back(block); + } return true; }; @@ -2339,7 +2351,7 @@ BasicBlock* Compiler::fgCloneTryRegion(BasicBlock* tryEntry, CloneTryInfo& info, // Now blocks contains an entry for each block to clone. // JITDUMP("Will need to clone %u EH regions (outermost: EH#%02u) and %u blocks\n", regionCount, outermostTryIndex, - blocks.size()); + blocks->size()); // Allocate the new EH clauses. First, find the enclosing EH clause, if any... // we will want to allocate the new clauses just "before" this point. @@ -2486,7 +2498,7 @@ BasicBlock* Compiler::fgCloneTryRegion(BasicBlock* tryEntry, CloneTryInfo& info, // appropriate cloned region and fixes up region extents. // JITDUMP("Cloning blocks for try...\n"); - for (BasicBlock* const block : blocks) + for (BasicBlock* const block : *blocks) { BasicBlock* const newBlock = fgNewBBafter(BBJ_ALWAYS, *insertAfter, /* extendRegion */ false); JITDUMP("Adding " FMT_BB " (copy of " FMT_BB ") after " FMT_BB "\n", newBlock->bbNum, block->bbNum, @@ -2505,7 +2517,7 @@ BasicBlock* Compiler::fgCloneTryRegion(BasicBlock* tryEntry, CloneTryInfo& info, // the new end point of the enclosing try is in the cloned try. // JITDUMP("Fixing region indices...\n"); - for (BasicBlock* const block : blocks) + for (BasicBlock* const block : *blocks) { BasicBlock* newBlock = nullptr; bool found = map->Lookup(block, &newBlock); From ecccc18bfc040746fee35801ecd41f740810ccf1 Mon Sep 17 00:00:00 2001 From: Andy Ayers Date: Tue, 26 Nov 2024 08:19:45 -0800 Subject: [PATCH 10/17] first bit of review feedback --- src/coreclr/jit/compiler.h | 2 +- src/coreclr/jit/fgehopt.cpp | 13 ++++++------- src/coreclr/jit/flowgraph.cpp | 2 +- src/coreclr/jit/jiteh.cpp | 8 ++++---- 4 files changed, 12 insertions(+), 13 deletions(-) diff --git a/src/coreclr/jit/compiler.h b/src/coreclr/jit/compiler.h index 4ce8ed94445665..c842610177fe2d 100644 --- a/src/coreclr/jit/compiler.h +++ b/src/coreclr/jit/compiler.h @@ -3001,7 +3001,7 @@ class Compiler void fgRemoveEHTableEntry(unsigned XTnum); - EHblkDsc* fgAddEHTableEntries(unsigned XTnum, unsigned count = 1, bool deferAdding = false); + EHblkDsc* fgTryAddEHTableEntries(unsigned XTnum, unsigned count = 1, bool deferAdding = false); void fgSortEHTable(); diff --git a/src/coreclr/jit/fgehopt.cpp b/src/coreclr/jit/fgehopt.cpp index a773a1b5a5a69d..dd2e8a114e2e08 100644 --- a/src/coreclr/jit/fgehopt.cpp +++ b/src/coreclr/jit/fgehopt.cpp @@ -2319,7 +2319,7 @@ BasicBlock* Compiler::fgCloneTryRegion(BasicBlock* tryEntry, CloneTryInfo& info, JITDUMP("<== finished try EH#%02u\n", regionIndex); } - // Find the outermost mututal-protect try region that begins at tryEntry + // Find the outermost mutual-protect try region that begins at tryEntry // EHblkDsc* const tryEbd = ehGetDsc(tryIndex); unsigned outermostTryIndex = tryIndex; @@ -2350,7 +2350,7 @@ BasicBlock* Compiler::fgCloneTryRegion(BasicBlock* tryEntry, CloneTryInfo& info, // Now blocks contains an entry for each block to clone. // - JITDUMP("Will need to clone %u EH regions (outermost: EH#%02u) and %u blocks\n", regionCount, outermostTryIndex, + JITDUMP("Will need to clone %u EH regions (outermost: EH#%02u) and %zu blocks\n", regionCount, outermostTryIndex, blocks->size()); // Allocate the new EH clauses. First, find the enclosing EH clause, if any... @@ -2370,13 +2370,12 @@ BasicBlock* Compiler::fgCloneTryRegion(BasicBlock* tryEntry, CloneTryInfo& info, JITDUMP("Cloned EH clauses will go before enclosing region EH#%02u\n", enclosingTryIndex); } - // Once we call fgAddEHTableEntries, all the EH indicies at or above insertBeforeIndex - // will shift, and the EH table may reallocate. - // - // This addition may also fail, if the table would become too large... + // Once we call fgTryAddEHTableEntries with deferCloning = false, + // all the EH indicies at or above insertBeforeIndex will shift, + // and the EH table may reallocate. // EHblkDsc* const clonedOutermostEbd = - fgAddEHTableEntries(insertBeforeIndex, regionCount, /* deferAdding */ deferCloning); + fgTryAddEHTableEntries(insertBeforeIndex, regionCount, /* deferAdding */ deferCloning); if (clonedOutermostEbd == nullptr) { diff --git a/src/coreclr/jit/flowgraph.cpp b/src/coreclr/jit/flowgraph.cpp index 8e8ae80c1f8fec..bdefa0644ecfa7 100644 --- a/src/coreclr/jit/flowgraph.cpp +++ b/src/coreclr/jit/flowgraph.cpp @@ -1423,7 +1423,7 @@ void Compiler::fgAddSyncMethodEnterExit() EHblkDsc* newEntry = nullptr; unsigned XTnew = compHndBBtabCount; - newEntry = fgAddEHTableEntries(XTnew); + newEntry = fgTryAddEHTableEntries(XTnew); if (newEntry == nullptr) { diff --git a/src/coreclr/jit/jiteh.cpp b/src/coreclr/jit/jiteh.cpp index 1461706abf9e65..71aed4846e2f05 100644 --- a/src/coreclr/jit/jiteh.cpp +++ b/src/coreclr/jit/jiteh.cpp @@ -1463,7 +1463,7 @@ void Compiler::fgAllocEHTable() // the maximum number of clauses we will need might be very large. We allocate // twice the number of EH clauses in the IL, which should be good in practice. // In extreme cases, we might need to abandon this and reallocate. See - // fgAddEHTableEntries() for more details. + // fgTryAddEHTableEntries() for more details. #ifdef DEBUG compHndBBtabAllocCount = info.compXcptnsCount; // force the resizing code to hit more frequently in DEBUG @@ -1677,7 +1677,7 @@ void Compiler::fgRemoveEHTableEntry(unsigned XTnum) } //------------------------------------------------------------------------ -// fgAddEHTableEntries: add new EH table entries +// fgTryAddEHTableEntries: try to add new EH table entries // // Arguments: // XTnum -- new entries will be added before this entry @@ -1703,7 +1703,7 @@ void Compiler::fgRemoveEHTableEntry(unsigned XTnum) // are making room for cloned EH must take pains to find and clone these // as well... // -EHblkDsc* Compiler::fgAddEHTableEntries(unsigned XTnum, unsigned count, bool deferAdding) +EHblkDsc* Compiler::fgTryAddEHTableEntries(unsigned XTnum, unsigned count, bool deferAdding) { bool reallocate = false; bool const insert = (XTnum != compHndBBtabCount); @@ -1823,7 +1823,7 @@ EHblkDsc* Compiler::fgAddEHTableEntries(unsigned XTnum, unsigned count, bool def newHndBBtabAllocCount = MAX_XCPTN_INDEX; // increase to the maximum size we allow } - JITDUMP("*********** fgAddEHTableEntries: increasing EH table size from %d to %d\n", compHndBBtabAllocCount, + JITDUMP("*********** fgTryAddEHTableEntries: increasing EH table size from %d to %d\n", compHndBBtabAllocCount, newHndBBtabAllocCount); compHndBBtabAllocCount = newHndBBtabAllocCount; From 4ba2c9de77ea52718b5bfa2eeca0fac8f6bc4675 Mon Sep 17 00:00:00 2001 From: Andy Ayers Date: Sat, 30 Nov 2024 17:35:42 -0800 Subject: [PATCH 11/17] pick up changes from de-abstraction integration --- src/coreclr/jit/compiler.h | 3 ++- src/coreclr/jit/fgehopt.cpp | 7 +++++++ 2 files changed, 9 insertions(+), 1 deletion(-) diff --git a/src/coreclr/jit/compiler.h b/src/coreclr/jit/compiler.h index c842610177fe2d..4ac5ef2a6e84c8 100644 --- a/src/coreclr/jit/compiler.h +++ b/src/coreclr/jit/compiler.h @@ -5365,9 +5365,10 @@ class Compiler BitVec& m_visited; BlockToBlockMap* m_map = nullptr; jitstd::vector* m_blocksToClone = nullptr; - bool m_addEdges = false; weight_t m_profileScale = 0.0; unsigned m_ehRegionShift = 0; + bool m_addEdges = false; + bool m_scaleOriginal = false; }; bool fgCanCloneTryRegion(BasicBlock* tryEntry); diff --git a/src/coreclr/jit/fgehopt.cpp b/src/coreclr/jit/fgehopt.cpp index dd2e8a114e2e08..d97710f303c895 100644 --- a/src/coreclr/jit/fgehopt.cpp +++ b/src/coreclr/jit/fgehopt.cpp @@ -2505,6 +2505,13 @@ BasicBlock* Compiler::fgCloneTryRegion(BasicBlock* tryEntry, CloneTryInfo& info, map->Set(block, newBlock, BlockToBlockMap::SetKind::Overwrite); BasicBlock::CloneBlockState(this, newBlock, block); newBlock->scaleBBWeight(info.m_profileScale); + + if (info.m_scaleOriginal) + { + weight_t originalScale = max(0.0, 1.0 - info.m_profileScale); + block->scaleBBWeight(originalScale); + } + *insertAfter = newBlock; } JITDUMP("Done cloning blocks for try...\n"); From ce9de9aa874f59d1f9d8e7dcdcaad3a5258bfd72 Mon Sep 17 00:00:00 2001 From: Andy Ayers Date: Sun, 1 Dec 2024 16:55:22 -0800 Subject: [PATCH 12/17] review feedback --- src/coreclr/jit/compiler.h | 22 ++++----- src/coreclr/jit/fgehopt.cpp | 26 +++++------ src/coreclr/jit/flowgraph.cpp | 78 +++++++++++++++---------------- src/coreclr/jit/jitconfigvalues.h | 2 + src/coreclr/jit/loopcloning.cpp | 10 +++- src/coreclr/jit/optimizer.cpp | 6 ++- 6 files changed, 76 insertions(+), 68 deletions(-) diff --git a/src/coreclr/jit/compiler.h b/src/coreclr/jit/compiler.h index 4ac5ef2a6e84c8..27c91c1a9cb677 100644 --- a/src/coreclr/jit/compiler.h +++ b/src/coreclr/jit/compiler.h @@ -2232,8 +2232,8 @@ class FlowGraphNaturalLoop bool HasDef(unsigned lclNum); - bool CanDuplicate(INDEBUG(const char** reason)); - void Duplicate(BasicBlock** insertAfter, BlockToBlockMap* map, weight_t weightScale); + bool CanDuplicate(bool allowEH DEBUGARG(const char** reason)); + void Duplicate(BasicBlock** insertAfter, BlockToBlockMap* map, weight_t weightScale, bool allowEH); bool MayExecuteBlockMultipleTimesPerIteration(BasicBlock* block); @@ -5360,15 +5360,15 @@ class Compiler struct CloneTryInfo { - CloneTryInfo(BitVecTraits& traits, BitVec& visited) : m_traits(traits), m_visited(visited) {} - BitVecTraits m_traits; - BitVec& m_visited; - BlockToBlockMap* m_map = nullptr; - jitstd::vector* m_blocksToClone = nullptr; - weight_t m_profileScale = 0.0; - unsigned m_ehRegionShift = 0; - bool m_addEdges = false; - bool m_scaleOriginal = false; + CloneTryInfo(BitVecTraits& traits, BitVec& visited) : Traits(traits), Visited(visited) {} + BitVecTraits Traits; + BitVec& Visited; + BlockToBlockMap* Map = nullptr; + jitstd::vector* BlocksToClone = nullptr; + weight_t ProfileScale = 0.0; + unsigned EHIndexShift = 0; + bool AddEdges = false; + bool ScaleOriginalBlockProfile = false; }; bool fgCanCloneTryRegion(BasicBlock* tryEntry); diff --git a/src/coreclr/jit/fgehopt.cpp b/src/coreclr/jit/fgehopt.cpp index d97710f303c895..b343b14c99700c 100644 --- a/src/coreclr/jit/fgehopt.cpp +++ b/src/coreclr/jit/fgehopt.cpp @@ -2114,15 +2114,15 @@ PhaseStatus Compiler::fgTailMergeThrows() // cloned blocks will be created and scaled by profile weight // and if info.m_addEdges is true have proper bbkinds and flow edges // info data will be updated: -// m_map will be modified to contain keys and for the blocks cloned -// m_visited will include bits for each newly cloned block +// Map will be modified to contain keys and for the blocks cloned +// Visited will include bits for each newly cloned block // m_ehRegionShift will describe number of EH regions added // insertAfter will point at the lexcially last block cloned // // Notes: // * if insertAfter is non null, map must also be non null // -// * If info.m_map is not nullptr, it is not modified unless cloning succeeds +// * If info.Map is not nullptr, it is not modified unless cloning succeeds // When cloning succeeds, entries for the try blocks and related blocks // (handler, filter, callfinally) will be updated; other map entries will // be left as they were @@ -2139,7 +2139,7 @@ BasicBlock* Compiler::fgCloneTryRegion(BasicBlock* tryEntry, CloneTryInfo& info, { assert(bbIsTryBeg(tryEntry)); bool const deferCloning = (insertAfter == nullptr); - assert(deferCloning || ((*insertAfter != nullptr) && (info.m_map != nullptr))); + assert(deferCloning || ((*insertAfter != nullptr) && (info.Map != nullptr))); INDEBUG(const char* msg = deferCloning ? "Checking if it is possible to clone" : "Cloning";) JITDUMP("%s the try region EH#%02u headed by " FMT_BB "\n", msg, tryEntry->getTryIndex(), tryEntry->bbNum); @@ -2160,16 +2160,16 @@ BasicBlock* Compiler::fgCloneTryRegion(BasicBlock* tryEntry, CloneTryInfo& info, // Track blocks to clone for caller, or if we are cloning and // caller doesn't care. // - jitstd::vector* blocks = info.m_blocksToClone; + jitstd::vector* blocks = info.BlocksToClone; if (!deferCloning && (blocks == nullptr)) { blocks = new (alloc) jitstd::vector(alloc); } unsigned regionCount = 0; - BitVecTraits* const traits = &info.m_traits; - BitVec& visited = info.m_visited; - BlockToBlockMap* const map = info.m_map; + BitVecTraits* const traits = &info.Traits; + BitVec& visited = info.Visited; + BlockToBlockMap* const map = info.Map; auto addBlockToClone = [=, &blocks, &visited](BasicBlock* block, const char* msg) { if (!BitVecOps::TryAddElemD(traits, visited, block->bbNum)) @@ -2397,7 +2397,7 @@ BasicBlock* Compiler::fgCloneTryRegion(BasicBlock* tryEntry, CloneTryInfo& info, // Callers will see enclosing EH region indices shift by this much // - info.m_ehRegionShift = regionCount; + info.EHIndexShift = regionCount; // The EH table now looks like the following, for a middle insertion: // @@ -2504,11 +2504,11 @@ BasicBlock* Compiler::fgCloneTryRegion(BasicBlock* tryEntry, CloneTryInfo& info, (*insertAfter)->bbNum); map->Set(block, newBlock, BlockToBlockMap::SetKind::Overwrite); BasicBlock::CloneBlockState(this, newBlock, block); - newBlock->scaleBBWeight(info.m_profileScale); + newBlock->scaleBBWeight(info.ProfileScale); - if (info.m_scaleOriginal) + if (info.ScaleOriginalBlockProfile) { - weight_t originalScale = max(0.0, 1.0 - info.m_profileScale); + weight_t originalScale = max(0.0, 1.0 - info.ProfileScale); block->scaleBBWeight(originalScale); } @@ -2616,7 +2616,7 @@ BasicBlock* Compiler::fgCloneTryRegion(BasicBlock* tryEntry, CloneTryInfo& info, // Redirect any branches within the newly-cloned blocks or // from cloned blocks to non-cloned blocks // - if (info.m_addEdges) + if (info.AddEdges) { JITDUMP("Adding edges in the newly cloned try\n"); for (BasicBlock* const block : BlockToBlockMap::KeyIteration(map)) diff --git a/src/coreclr/jit/flowgraph.cpp b/src/coreclr/jit/flowgraph.cpp index bdefa0644ecfa7..c154532f197716 100644 --- a/src/coreclr/jit/flowgraph.cpp +++ b/src/coreclr/jit/flowgraph.cpp @@ -5959,15 +5959,18 @@ bool FlowGraphNaturalLoop::HasDef(unsigned lclNum) // CanDuplicate: Check if this loop can be duplicated. // // Parameters: +// allowEH - enable cloning if loop contains EH // reason - If this function returns false, the reason why. // // Returns: // True if the loop can be duplicated. // -// Remarks: -// We currently do not support duplicating loops with EH constructs in them. +// Notes: +// If allowEH is true, CanDuplicate will return true if the loop contains try entries, +// and a subsequent Duplicate() call will duplicate the loop and any other blocks needed +// to properly clone the EH. // -bool FlowGraphNaturalLoop::CanDuplicate(INDEBUG(const char** reason)) +bool FlowGraphNaturalLoop::CanDuplicate(bool allowEH DEBUGARG(const char** reason)) { #ifdef DEBUG const char* localReason; @@ -5977,9 +5980,6 @@ bool FlowGraphNaturalLoop::CanDuplicate(INDEBUG(const char** reason)) } #endif - bool allowEHCloning = false; - INDEBUG(allowEHCloning = (JitConfig.JitCloneLoopsWithEH() > 0);) - Compiler* comp = m_dfsTree->GetCompiler(); BasicBlock* const header = GetHeader(); @@ -5994,29 +5994,29 @@ bool FlowGraphNaturalLoop::CanDuplicate(INDEBUG(const char** reason)) return BasicBlockVisit::Continue; } - if (allowEHCloning) + if (!allowEH) { - if (comp->bbIsTryBeg(block)) - { - // Check if this is an "outermost" try within the loop. - // If so, we have more checking to do later on. - // - const bool headerInTry = header->hasTryIndex(); - unsigned blockIndex = block->getTryIndex(); - unsigned outermostBlockIndex = comp->ehTrueEnclosingTryIndexIL(blockIndex); + INDEBUG(*reason = "Loop not entirely within one EH region"); + return BasicBlockVisit::Abort; + } - if ((headerInTry && (outermostBlockIndex == header->getTryIndex())) || - (!headerInTry && (outermostBlockIndex == EHblkDsc::NO_ENCLOSING_INDEX))) - { - tryRegionsToClone.Push(block); - } - } + if (comp->bbIsTryBeg(block)) + { + // Check if this is an "outermost" try within the loop. + // If so, we have more checking to do later on. + // + const bool headerInTry = header->hasTryIndex(); + unsigned blockIndex = block->getTryIndex(); + unsigned outermostBlockIndex = comp->ehTrueEnclosingTryIndexIL(blockIndex); - return BasicBlockVisit::Continue; + if ((headerInTry && (outermostBlockIndex == header->getTryIndex())) || + (!headerInTry && (outermostBlockIndex == EHblkDsc::NO_ENCLOSING_INDEX))) + { + tryRegionsToClone.Push(block); + } } - INDEBUG(*reason = "Loop not entirely within one EH region"); - return BasicBlockVisit::Abort; + return BasicBlockVisit::Continue; }); // Check any enclosed try regions to make sure they can be cloned @@ -6026,7 +6026,7 @@ bool FlowGraphNaturalLoop::CanDuplicate(INDEBUG(const char** reason)) const unsigned numberOfTryRegions = tryRegionsToClone.Height(); if ((result != BasicBlockVisit::Abort) && (numberOfTryRegions > 0)) { - assert(allowEHCloning); + assert(allowEH); // Possibly limit to just 1 region...? // @@ -6057,18 +6057,16 @@ bool FlowGraphNaturalLoop::CanDuplicate(INDEBUG(const char** reason)) // insertAfter - [in, out] Block to insert duplicated blocks after; updated to last block inserted. // map - A map that will have mappings from loop blocks to duplicated blocks added to it. // weightScale - Factor to scale weight of new blocks by +// allowEH - Allow cloning loops that contain try region entries // -void FlowGraphNaturalLoop::Duplicate(BasicBlock** insertAfter, BlockToBlockMap* map, weight_t weightScale) +void FlowGraphNaturalLoop::Duplicate(BasicBlock** insertAfter, BlockToBlockMap* map, weight_t weightScale, bool allowEH) { - assert(CanDuplicate(nullptr)); + assert(CanDuplicate(allowEH, nullptr)); Compiler* const comp = m_dfsTree->GetCompiler(); - bool canCloneTry = false; bool clonedTry = false; BasicBlock* const insertionPoint = *insertAfter; - INDEBUG(canCloneTry = (JitConfig.JitCloneLoopsWithEH() > 0);) - // If the insertion point is within an EH region, remember all the EH regions // current that end at the insertion point, so we can properly extend them // when we're done cloning. @@ -6127,17 +6125,17 @@ void FlowGraphNaturalLoop::Duplicate(BasicBlock** insertAfter, BlockToBlockMap* } } - // Keep track of how much the region end EH indices change because of EH region cloning. + // Keep track of how much the EH indices change because of EH region cloning. // - unsigned ehRegionShift = 0; + unsigned ehIndexShift = 0; // Keep track of which blocks were handled by EH region cloning // BitVecTraits traits(comp->compBasicBlockID, comp); BitVec visited(BitVecOps::MakeEmpty(&traits)); - VisitLoopBlocksLexical([=, &traits, &visited, &clonedTry, &ehRegionShift](BasicBlock* blk) { - if (canCloneTry) + VisitLoopBlocksLexical([=, &traits, &visited, &clonedTry, &ehIndexShift](BasicBlock* blk) { + if (allowEH) { // If we allow cloning loops with EH, we may have already handled // this loop block as part of a containing try region. @@ -6155,14 +6153,14 @@ void FlowGraphNaturalLoop::Duplicate(BasicBlock** insertAfter, BlockToBlockMap* if (comp->bbIsTryBeg(blk)) { Compiler::CloneTryInfo info(traits, visited); - info.m_map = map; - info.m_addEdges = false; - info.m_profileScale = weightScale; + info.Map = map; + info.AddEdges = false; + info.ProfileScale = weightScale; BasicBlock* const clonedBlock = comp->fgCloneTryRegion(blk, info, insertAfter); assert(clonedBlock != nullptr); - ehRegionShift += info.m_ehRegionShift; + ehIndexShift += info.EHIndexShift; clonedTry = true; return BasicBlockVisit::Continue; } @@ -6213,7 +6211,7 @@ void FlowGraphNaturalLoop::Duplicate(BasicBlock** insertAfter, BlockToBlockMap* while (regionEnds.Height() > 0) { RegionEnd r = regionEnds.Pop(); - EHblkDsc* const ebd = comp->ehGetDsc(r.m_regionIndex + ehRegionShift); + EHblkDsc* const ebd = comp->ehGetDsc(r.m_regionIndex + ehIndexShift); if (r.m_block == insertionPoint) { @@ -6265,7 +6263,7 @@ void FlowGraphNaturalLoop::Duplicate(BasicBlock** insertAfter, BlockToBlockMap* { for (BasicBlock* const blk : BlockToBlockMap::KeyIteration(map)) { - if (!this->ContainsBlock(blk)) + if (!ContainsBlock(blk)) { BasicBlock* newBlk = nullptr; bool b = map->Lookup(blk, &newBlk); diff --git a/src/coreclr/jit/jitconfigvalues.h b/src/coreclr/jit/jitconfigvalues.h index e50f419e28b745..71eee5bff65e74 100644 --- a/src/coreclr/jit/jitconfigvalues.h +++ b/src/coreclr/jit/jitconfigvalues.h @@ -96,6 +96,8 @@ CONFIG_INTEGER(JitUnrollLoopMaxIterationCount, "JitUnrollLoopMaxIterationCount", DEFAULT_UNROLL_LOOP_MAX_ITERATION_COUNT) +CONFIG_INTEGER(JitUnrollLoopsWithEH, "JitUnrollLoopsWithEH", 0) // If 0, don't unroll loops containing EH regions + CONFIG_INTEGER(JitDirectAlloc, "JitDirectAlloc", 0) CONFIG_INTEGER(JitDoubleAlign, "JitDoubleAlign", 1) CONFIG_INTEGER(JitEmitPrintRefRegs, "JitEmitPrintRefRegs", 0) diff --git a/src/coreclr/jit/loopcloning.cpp b/src/coreclr/jit/loopcloning.cpp index 3d3a2617212974..c535aa9124dc44 100644 --- a/src/coreclr/jit/loopcloning.cpp +++ b/src/coreclr/jit/loopcloning.cpp @@ -1888,8 +1888,11 @@ bool Compiler::optIsLoopClonable(FlowGraphNaturalLoop* loop, LoopCloneContext* c return false; } + bool cloneLoopsWithEH = false; + INDEBUG(cloneLoopsWithEH = (JitConfig.JitCloneLoopsWithEH() > 0);) INDEBUG(const char* reason); - if (!loop->CanDuplicate(INDEBUG(&reason))) + + if (!loop->CanDuplicate(cloneLoopsWithEH DEBUGARG(&reason))) { JITDUMP("Loop cloning: rejecting loop " FMT_LP ": %s\n", loop->GetIndex(), reason); return false; @@ -2031,6 +2034,9 @@ void Compiler::optCloneLoop(FlowGraphNaturalLoop* loop, LoopCloneContext* contex } #endif + bool cloneLoopsWithEH = false; + INDEBUG(cloneLoopsWithEH = (JitConfig.JitCloneLoopsWithEH() > 0);) + // Determine the depth of the loop, so we can properly weight blocks added (outside the cloned loop blocks). unsigned depth = loop->GetDepth(); weight_t ambientWeight = 1; @@ -2137,7 +2143,7 @@ void Compiler::optCloneLoop(FlowGraphNaturalLoop* loop, LoopCloneContext* contex BlockToBlockMap* blockMap = new (getAllocator(CMK_LoopClone)) BlockToBlockMap(getAllocator(CMK_LoopClone)); - loop->Duplicate(&newPred, blockMap, LoopCloneContext::slowPathWeightScaleFactor); + loop->Duplicate(&newPred, blockMap, LoopCloneContext::slowPathWeightScaleFactor, cloneLoopsWithEH); // Scale old blocks to the fast path weight. loop->VisitLoopBlocks([=](BasicBlock* block) { diff --git a/src/coreclr/jit/optimizer.cpp b/src/coreclr/jit/optimizer.cpp index 62c8307034cea3..3ccf6bb39b6059 100644 --- a/src/coreclr/jit/optimizer.cpp +++ b/src/coreclr/jit/optimizer.cpp @@ -1508,8 +1508,10 @@ bool Compiler::optTryUnrollLoop(FlowGraphNaturalLoop* loop, bool* changedIR) } // clang-format on + bool unrollLoopsWithEH = false; + INDEBUG(unrollLoopsWithEH = (JitConfig.JitUnrollLoopsWithEH() > 0);) INDEBUG(const char* reason); - if (!loop->CanDuplicate(INDEBUG(&reason))) + if (!loop->CanDuplicate(unrollLoopsWithEH DEBUGARG(&reason))) { JITDUMP("Failed to unroll loop " FMT_LP ": %s\n", loop->GetIndex(), reason); return false; @@ -1616,7 +1618,7 @@ bool Compiler::optTryUnrollLoop(FlowGraphNaturalLoop* loop, bool* changedIR) // and we might not have upscaled at all, if we had profile data. // weight_t scaleWeight = 1.0 / BB_LOOP_WEIGHT_SCALE; - loop->Duplicate(&insertAfter, &blockMap, scaleWeight); + loop->Duplicate(&insertAfter, &blockMap, scaleWeight, unrollLoopsWithEH); // Replace all uses of the loop iterator with the current value. loop->VisitLoopBlocks([=, &blockMap](BasicBlock* block) { From 04b941761ba36e8b0d6c11cb9ea2de000a0766a5 Mon Sep 17 00:00:00 2001 From: Andy Ayers Date: Mon, 2 Dec 2024 08:39:56 -0800 Subject: [PATCH 13/17] split CanDuplicate/Duplicate into simple and with EH versions --- src/coreclr/jit/compiler.h | 6 +- src/coreclr/jit/flowgraph.cpp | 167 ++++++++++++++++++++++++-------- src/coreclr/jit/loopcloning.cpp | 19 +++- src/coreclr/jit/optimizer.cpp | 22 ++++- 4 files changed, 165 insertions(+), 49 deletions(-) diff --git a/src/coreclr/jit/compiler.h b/src/coreclr/jit/compiler.h index 27c91c1a9cb677..f784b8ccfb35aa 100644 --- a/src/coreclr/jit/compiler.h +++ b/src/coreclr/jit/compiler.h @@ -2232,8 +2232,10 @@ class FlowGraphNaturalLoop bool HasDef(unsigned lclNum); - bool CanDuplicate(bool allowEH DEBUGARG(const char** reason)); - void Duplicate(BasicBlock** insertAfter, BlockToBlockMap* map, weight_t weightScale, bool allowEH); + bool CanDuplicate(INDEBUG(const char** reason)); + bool CanDuplicateWithEH(INDEBUG(const char** reason)); + void Duplicate(BasicBlock** insertAfter, BlockToBlockMap* map, weight_t weightScale); + void DuplicateWithEH(BasicBlock** insertAfter, BlockToBlockMap* map, weight_t weightScale); bool MayExecuteBlockMultipleTimesPerIteration(BasicBlock* block); diff --git a/src/coreclr/jit/flowgraph.cpp b/src/coreclr/jit/flowgraph.cpp index c154532f197716..3f1d6c8d016e39 100644 --- a/src/coreclr/jit/flowgraph.cpp +++ b/src/coreclr/jit/flowgraph.cpp @@ -5959,18 +5959,108 @@ bool FlowGraphNaturalLoop::HasDef(unsigned lclNum) // CanDuplicate: Check if this loop can be duplicated. // // Parameters: -// allowEH - enable cloning if loop contains EH +// reason - If this function returns false, the reason why. +// +// Returns: +// True if the loop can be duplicated. +// +// Remarks: +// Does not support duplicating loops with EH constructs in them. +// (see CanDuplicateWithEH) +// +bool FlowGraphNaturalLoop::CanDuplicate(INDEBUG(const char** reason)) +{ +#ifdef DEBUG + const char* localReason; + if (reason == nullptr) + { + reason = &localReason; + } +#endif + + Compiler* comp = m_dfsTree->GetCompiler(); + BasicBlockVisit result = VisitLoopBlocks([=](BasicBlock* block) { + if (!BasicBlock::sameEHRegion(block, GetHeader())) + { + INDEBUG(*reason = "Loop not entirely within one EH region"); + return BasicBlockVisit::Abort; + } + + return BasicBlockVisit::Continue; + }); + + return result != BasicBlockVisit::Abort; +} + +//------------------------------------------------------------------------ +// Duplicate: Duplicate the blocks of this loop, inserting them after `insertAfter`. +// +// Parameters: +// insertAfter - [in, out] Block to insert duplicated blocks after; updated to last block inserted. +// map - A map that will have mappings from loop blocks to duplicated blocks added to it. +// weightScale - Factor to scale weight of new blocks by +// +void FlowGraphNaturalLoop::Duplicate(BasicBlock** insertAfter, BlockToBlockMap* map, weight_t weightScale) +{ + assert(CanDuplicate(nullptr)); + + Compiler* comp = m_dfsTree->GetCompiler(); + + BasicBlock* bottom = GetLexicallyBottomMostBlock(); + + VisitLoopBlocksLexical([=](BasicBlock* blk) { + // Initialize newBlk as BBJ_ALWAYS without jump target, and fix up jump target later + // with BasicBlock::CopyTarget(). + BasicBlock* newBlk = comp->fgNewBBafter(BBJ_ALWAYS, *insertAfter, /*extendRegion*/ true); + JITDUMP("Adding " FMT_BB " (copy of " FMT_BB ") after " FMT_BB "\n", newBlk->bbNum, blk->bbNum, + (*insertAfter)->bbNum); + + BasicBlock::CloneBlockState(comp, newBlk, blk); + + // We're going to create the preds below, which will set the bbRefs properly, + // so clear out the cloned bbRefs field. + newBlk->bbRefs = 0; + + newBlk->scaleBBWeight(weightScale); + + *insertAfter = newBlk; + map->Set(blk, newBlk, BlockToBlockMap::Overwrite); + + return BasicBlockVisit::Continue; + }); + + // Now go through the new blocks, remapping their jump targets within the loop + // and updating the preds lists. + VisitLoopBlocks([=](BasicBlock* blk) { + BasicBlock* newBlk = nullptr; + bool b = map->Lookup(blk, &newBlk); + assert(b && newBlk != nullptr); + + // Jump target should not be set yet + assert(!newBlk->HasInitializedTarget()); + + // Redirect the new block according to "blockMap". + // optSetMappedBlockTargets will set newBlk's successors, and add pred edges for the successors. + comp->optSetMappedBlockTargets(blk, newBlk, map); + + return BasicBlockVisit::Continue; + }); +} + +//------------------------------------------------------------------------ +// CanDuplicateWithEH: Check if this loop (possibly containing try entries) +// can be duplicated. +// +// Parameters: // reason - If this function returns false, the reason why. // // Returns: // True if the loop can be duplicated. // // Notes: -// If allowEH is true, CanDuplicate will return true if the loop contains try entries, -// and a subsequent Duplicate() call will duplicate the loop and any other blocks needed -// to properly clone the EH. +// Extends CanDuplicate to cover loops with try region entries. // -bool FlowGraphNaturalLoop::CanDuplicate(bool allowEH DEBUGARG(const char** reason)) +bool FlowGraphNaturalLoop::CanDuplicateWithEH(INDEBUG(const char** reason)) { #ifdef DEBUG const char* localReason; @@ -5994,12 +6084,6 @@ bool FlowGraphNaturalLoop::CanDuplicate(bool allowEH DEBUGARG(const char** reaso return BasicBlockVisit::Continue; } - if (!allowEH) - { - INDEBUG(*reason = "Loop not entirely within one EH region"); - return BasicBlockVisit::Abort; - } - if (comp->bbIsTryBeg(block)) { // Check if this is an "outermost" try within the loop. @@ -6026,9 +6110,7 @@ bool FlowGraphNaturalLoop::CanDuplicate(bool allowEH DEBUGARG(const char** reaso const unsigned numberOfTryRegions = tryRegionsToClone.Height(); if ((result != BasicBlockVisit::Abort) && (numberOfTryRegions > 0)) { - assert(allowEH); - - // Possibly limit to just 1 region...? + // Possibly limit to just 1 region. // JITDUMP(FMT_LP " contains %u top-level try region%s\n", GetIndex(), numberOfTryRegions, numberOfTryRegions > 1 ? "s" : ""); @@ -6051,17 +6133,20 @@ bool FlowGraphNaturalLoop::CanDuplicate(bool allowEH DEBUGARG(const char** reaso } //------------------------------------------------------------------------ -// Duplicate: Duplicate the blocks of this loop, inserting them after `insertAfter`. +// DuplicateWithEH: Duplicate the blocks of this loop, inserting them after `insertAfter`, +// and also fully clone any try regions // // Parameters: // insertAfter - [in, out] Block to insert duplicated blocks after; updated to last block inserted. // map - A map that will have mappings from loop blocks to duplicated blocks added to it. // weightScale - Factor to scale weight of new blocks by -// allowEH - Allow cloning loops that contain try region entries // -void FlowGraphNaturalLoop::Duplicate(BasicBlock** insertAfter, BlockToBlockMap* map, weight_t weightScale, bool allowEH) +// Notes: +// Extends uplicate to cover loops with try region entries. +// +void FlowGraphNaturalLoop::DuplicateWithEH(BasicBlock** insertAfter, BlockToBlockMap* map, weight_t weightScale) { - assert(CanDuplicate(allowEH, nullptr)); + assert(CanDuplicateWithEH(nullptr)); Compiler* const comp = m_dfsTree->GetCompiler(); bool clonedTry = false; @@ -6135,35 +6220,31 @@ void FlowGraphNaturalLoop::Duplicate(BasicBlock** insertAfter, BlockToBlockMap* BitVec visited(BitVecOps::MakeEmpty(&traits)); VisitLoopBlocksLexical([=, &traits, &visited, &clonedTry, &ehIndexShift](BasicBlock* blk) { - if (allowEH) + // Try cloning may have already handled this block + // + if (BitVecOps::IsMember(&traits, visited, blk->bbNum)) { - // If we allow cloning loops with EH, we may have already handled - // this loop block as part of a containing try region. - // - if (BitVecOps::IsMember(&traits, visited, blk->bbNum)) - { - return BasicBlockVisit::Continue; - } + return BasicBlockVisit::Continue; + } - // If this is a try region entry, clone the entire region now. - // Defer adding edges and extending EH regions until later. - // - // Updates map, visited, and insertAfter. - // - if (comp->bbIsTryBeg(blk)) - { - Compiler::CloneTryInfo info(traits, visited); - info.Map = map; - info.AddEdges = false; - info.ProfileScale = weightScale; + // If this is a try region entry, clone the entire region now. + // Defer adding edges and extending EH regions until later. + // + // Updates map, visited, and insertAfter. + // + if (comp->bbIsTryBeg(blk)) + { + Compiler::CloneTryInfo info(traits, visited); + info.Map = map; + info.AddEdges = false; + info.ProfileScale = weightScale; - BasicBlock* const clonedBlock = comp->fgCloneTryRegion(blk, info, insertAfter); + BasicBlock* const clonedBlock = comp->fgCloneTryRegion(blk, info, insertAfter); - assert(clonedBlock != nullptr); - ehIndexShift += info.EHIndexShift; - clonedTry = true; - return BasicBlockVisit::Continue; - } + assert(clonedBlock != nullptr); + ehIndexShift += info.EHIndexShift; + clonedTry = true; + return BasicBlockVisit::Continue; } else { diff --git a/src/coreclr/jit/loopcloning.cpp b/src/coreclr/jit/loopcloning.cpp index c535aa9124dc44..982e87f832d89c 100644 --- a/src/coreclr/jit/loopcloning.cpp +++ b/src/coreclr/jit/loopcloning.cpp @@ -1892,7 +1892,15 @@ bool Compiler::optIsLoopClonable(FlowGraphNaturalLoop* loop, LoopCloneContext* c INDEBUG(cloneLoopsWithEH = (JitConfig.JitCloneLoopsWithEH() > 0);) INDEBUG(const char* reason); - if (!loop->CanDuplicate(cloneLoopsWithEH DEBUGARG(&reason))) + if (cloneLoopsWithEH) + { + if (!loop->CanDuplicateWithEH(INDEBUG(&reason))) + { + JITDUMP("Loop cloning: rejecting loop " FMT_LP ": %s\n", loop->GetIndex(), reason); + return false; + } + } + else if (!loop->CanDuplicate(INDEBUG(&reason))) { JITDUMP("Loop cloning: rejecting loop " FMT_LP ": %s\n", loop->GetIndex(), reason); return false; @@ -2143,7 +2151,14 @@ void Compiler::optCloneLoop(FlowGraphNaturalLoop* loop, LoopCloneContext* contex BlockToBlockMap* blockMap = new (getAllocator(CMK_LoopClone)) BlockToBlockMap(getAllocator(CMK_LoopClone)); - loop->Duplicate(&newPred, blockMap, LoopCloneContext::slowPathWeightScaleFactor, cloneLoopsWithEH); + if (cloneLoopsWithEH) + { + loop->DuplicateWithEH(&newPred, blockMap, LoopCloneContext::slowPathWeightScaleFactor); + } + else + { + loop->Duplicate(&newPred, blockMap, LoopCloneContext::slowPathWeightScaleFactor); + } // Scale old blocks to the fast path weight. loop->VisitLoopBlocks([=](BasicBlock* block) { diff --git a/src/coreclr/jit/optimizer.cpp b/src/coreclr/jit/optimizer.cpp index 3ccf6bb39b6059..a1ebae5e25eaee 100644 --- a/src/coreclr/jit/optimizer.cpp +++ b/src/coreclr/jit/optimizer.cpp @@ -1511,7 +1511,16 @@ bool Compiler::optTryUnrollLoop(FlowGraphNaturalLoop* loop, bool* changedIR) bool unrollLoopsWithEH = false; INDEBUG(unrollLoopsWithEH = (JitConfig.JitUnrollLoopsWithEH() > 0);) INDEBUG(const char* reason); - if (!loop->CanDuplicate(unrollLoopsWithEH DEBUGARG(&reason))) + + if (unrollLoopsWithEH) + { + if (!loop->CanDuplicateWithEH(INDEBUG(&reason))) + { + JITDUMP("Failed to unroll loop " FMT_LP ": %s\n", loop->GetIndex(), reason); + return false; + } + } + else if (!loop->CanDuplicate(INDEBUG(&reason))) { JITDUMP("Failed to unroll loop " FMT_LP ": %s\n", loop->GetIndex(), reason); return false; @@ -1522,6 +1531,7 @@ bool Compiler::optTryUnrollLoop(FlowGraphNaturalLoop* loop, bool* changedIR) *changedIR = true; // Heuristic: Estimated cost in code size of the unrolled loop. + // TODO: duplication cost is higher if there is EH... ClrSafeInt loopCostSz; // Cost is size of one iteration @@ -1618,7 +1628,15 @@ bool Compiler::optTryUnrollLoop(FlowGraphNaturalLoop* loop, bool* changedIR) // and we might not have upscaled at all, if we had profile data. // weight_t scaleWeight = 1.0 / BB_LOOP_WEIGHT_SCALE; - loop->Duplicate(&insertAfter, &blockMap, scaleWeight, unrollLoopsWithEH); + + if (unrollLoopsWithEH) + { + loop->DuplicateWithEH(&insertAfter, &blockMap, scaleWeight); + } + else + { + loop->Duplicate(&insertAfter, &blockMap, scaleWeight); + } // Replace all uses of the loop iterator with the current value. loop->VisitLoopBlocks([=, &blockMap](BasicBlock* block) { From a0797181ca61f8af89fb19c2dc0531d6ba1425f4 Mon Sep 17 00:00:00 2001 From: Andy Ayers Date: Mon, 2 Dec 2024 15:38:44 -0800 Subject: [PATCH 14/17] fix accounting; add extra refs to handler entries when cloning --- src/coreclr/jit/fgehopt.cpp | 19 +++++++++++++++---- 1 file changed, 15 insertions(+), 4 deletions(-) diff --git a/src/coreclr/jit/fgehopt.cpp b/src/coreclr/jit/fgehopt.cpp index 1779fc9cc3ad53..21bb52a6e28694 100644 --- a/src/coreclr/jit/fgehopt.cpp +++ b/src/coreclr/jit/fgehopt.cpp @@ -2519,7 +2519,8 @@ BasicBlock* Compiler::fgCloneTryRegion(BasicBlock* tryEntry, CloneTryInfo& info, // CompAllocator alloc = getAllocator(CMK_TryRegionClone); ArrayStack regionsToProcess(alloc); - unsigned const tryIndex = tryEntry->getTryIndex(); + unsigned const tryIndex = tryEntry->getTryIndex(); + unsigned numberOfBlocksToClone = 0; // Track blocks to clone for caller, or if we are cloning and // caller doesn't care. @@ -2535,7 +2536,7 @@ BasicBlock* Compiler::fgCloneTryRegion(BasicBlock* tryEntry, CloneTryInfo& info, BitVec& visited = info.Visited; BlockToBlockMap* const map = info.Map; - auto addBlockToClone = [=, &blocks, &visited](BasicBlock* block, const char* msg) { + auto addBlockToClone = [=, &blocks, &visited, &numberOfBlocksToClone](BasicBlock* block, const char* msg) { if (!BitVecOps::TryAddElemD(traits, visited, block->bbNum)) { return false; @@ -2543,6 +2544,8 @@ BasicBlock* Compiler::fgCloneTryRegion(BasicBlock* tryEntry, CloneTryInfo& info, JITDUMP(" %s block " FMT_BB "\n", msg, block->bbNum); + numberOfBlocksToClone++; + if (blocks != nullptr) { blocks->push_back(block); @@ -2714,8 +2717,8 @@ BasicBlock* Compiler::fgCloneTryRegion(BasicBlock* tryEntry, CloneTryInfo& info, // Now blocks contains an entry for each block to clone. // - JITDUMP("Will need to clone %u EH regions (outermost: EH#%02u) and %zu blocks\n", regionCount, outermostTryIndex, - blocks->size()); + JITDUMP("Will need to clone %u EH regions (outermost: EH#%02u) and %u blocks\n", regionCount, outermostTryIndex, + numberOfBlocksToClone); // Allocate the new EH clauses. First, find the enclosing EH clause, if any... // we will want to allocate the new clauses just "before" this point. @@ -2973,6 +2976,14 @@ BasicBlock* Compiler::fgCloneTryRegion(BasicBlock* tryEntry, CloneTryInfo& info, EHblkDsc* const clonedEbd = ehGetDsc(cloneHndIndex); newBlock->setHndIndex(cloneHndIndex); updateBlockReferences(cloneHndIndex); + + // Handler and filter entries also have an + // additional artificial reference count. + // + if (bbIsHandlerBeg(newBlock)) + { + newBlock->bbRefs++; + } } } JITDUMP("Done fixing region indices\n"); From adaa0c3ae69303251c86667fed0fbadc5f73489c Mon Sep 17 00:00:00 2001 From: Andy Ayers Date: Mon, 2 Dec 2024 15:47:48 -0800 Subject: [PATCH 15/17] fix memory region kinds --- src/coreclr/jit/flowgraph.cpp | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/src/coreclr/jit/flowgraph.cpp b/src/coreclr/jit/flowgraph.cpp index 1cf442a383086e..b30556b1f7c516 100644 --- a/src/coreclr/jit/flowgraph.cpp +++ b/src/coreclr/jit/flowgraph.cpp @@ -6068,8 +6068,7 @@ bool FlowGraphNaturalLoop::CanDuplicateWithEH(INDEBUG(const char** reason)) Compiler* comp = m_dfsTree->GetCompiler(); BasicBlock* const header = GetHeader(); - CompAllocator alloc = comp->getAllocator(CMK_LoopClone); - ArrayStack tryRegionsToClone(alloc); + ArrayStack tryRegionsToClone(comp->getAllocator(CMK_TryRegionClone)); BasicBlockVisit result = VisitLoopBlocks([=, &tryRegionsToClone](BasicBlock* block) { const bool inSameRegionAsHeader = BasicBlock::sameEHRegion(block, header); @@ -6164,7 +6163,7 @@ void FlowGraphNaturalLoop::DuplicateWithEH(BasicBlock** insertAfter, BlockToBloc bool m_isTryEnd; }; - ArrayStack regionEnds(comp->getAllocator(CMK_LoopClone)); + ArrayStack regionEnds(comp->getAllocator(CMK_TryRegionClone)); // Record enclosing EH region block references, // so we can keep track of what the "before" picture looked like. From d16582d1ac944c06a55757469b9d45a85581ecf7 Mon Sep 17 00:00:00 2001 From: Andy Ayers Date: Tue, 3 Dec 2024 08:09:08 -0800 Subject: [PATCH 16/17] use bbID consistently; have CloneTryInfo construct traits --- src/coreclr/jit/compiler.h | 36 ++++++++++++++++++++++------------- src/coreclr/jit/fgehopt.cpp | 30 +++++++++++++++++++---------- src/coreclr/jit/flowgraph.cpp | 11 ++++++----- 3 files changed, 49 insertions(+), 28 deletions(-) diff --git a/src/coreclr/jit/compiler.h b/src/coreclr/jit/compiler.h index 6f580595a4c03d..2a2dc567e0b098 100644 --- a/src/coreclr/jit/compiler.h +++ b/src/coreclr/jit/compiler.h @@ -2559,6 +2559,29 @@ struct RelopImplicationInfo bool reverseSense = false; }; +//------------------------------------------------------------------------ +// CloneTryInfo +// +// Describes information needed to clone a try region, and information +// produced by cloning that region +// +struct CloneTryInfo +{ + CloneTryInfo(Compiler* comp); + + // bbID based traits and vector + // + BitVecTraits Traits; + BitVec Visited; + + BlockToBlockMap* Map = nullptr; + jitstd::vector* BlocksToClone = nullptr; + weight_t ProfileScale = 0.0; + unsigned EHIndexShift = 0; + bool AddEdges = false; + bool ScaleOriginalBlockProfile = false; +}; + /* XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX @@ -5361,19 +5384,6 @@ class Compiler PhaseStatus fgCloneFinally(); - struct CloneTryInfo - { - CloneTryInfo(BitVecTraits& traits, BitVec& visited) : Traits(traits), Visited(visited) {} - BitVecTraits Traits; - BitVec& Visited; - BlockToBlockMap* Map = nullptr; - jitstd::vector* BlocksToClone = nullptr; - weight_t ProfileScale = 0.0; - unsigned EHIndexShift = 0; - bool AddEdges = false; - bool ScaleOriginalBlockProfile = false; - }; - bool fgCanCloneTryRegion(BasicBlock* tryEntry); BasicBlock* fgCloneTryRegion(BasicBlock* tryEntry, CloneTryInfo& info, BasicBlock** insertAfter = nullptr); diff --git a/src/coreclr/jit/fgehopt.cpp b/src/coreclr/jit/fgehopt.cpp index 21bb52a6e28694..18c2244336de05 100644 --- a/src/coreclr/jit/fgehopt.cpp +++ b/src/coreclr/jit/fgehopt.cpp @@ -2537,7 +2537,7 @@ BasicBlock* Compiler::fgCloneTryRegion(BasicBlock* tryEntry, CloneTryInfo& info, BlockToBlockMap* const map = info.Map; auto addBlockToClone = [=, &blocks, &visited, &numberOfBlocksToClone](BasicBlock* block, const char* msg) { - if (!BitVecOps::TryAddElemD(traits, visited, block->bbNum)) + if (!BitVecOps::TryAddElemD(traits, visited, block->bbID)) { return false; } @@ -2570,10 +2570,10 @@ BasicBlock* Compiler::fgCloneTryRegion(BasicBlock* tryEntry, CloneTryInfo& info, BasicBlock* const firstTryBlock = ebd->ebdTryBeg; BasicBlock* const lastTryBlock = ebd->ebdTryLast; - if (BitVecOps::IsMember(traits, visited, firstTryBlock->bbNum)) + if (BitVecOps::IsMember(traits, visited, firstTryBlock->bbID)) { JITDUMP("already walked try region for EH#%02u\n", regionIndex); - assert(BitVecOps::IsMember(traits, visited, lastTryBlock->bbNum)); + assert(BitVecOps::IsMember(traits, visited, lastTryBlock->bbID)); } else { @@ -2625,10 +2625,10 @@ BasicBlock* Compiler::fgCloneTryRegion(BasicBlock* tryEntry, CloneTryInfo& info, BasicBlock* const firstFltBlock = ebd->ebdFilter; BasicBlock* const lastFltBlock = ebd->BBFilterLast(); - if (BitVecOps::IsMember(traits, visited, firstFltBlock->bbNum)) + if (BitVecOps::IsMember(traits, visited, firstFltBlock->bbID)) { JITDUMP("already walked filter region for EH#%02u\n", regionIndex); - assert(BitVecOps::IsMember(traits, visited, lastFltBlock->bbNum)); + assert(BitVecOps::IsMember(traits, visited, lastFltBlock->bbID)); } else { @@ -2648,10 +2648,10 @@ BasicBlock* Compiler::fgCloneTryRegion(BasicBlock* tryEntry, CloneTryInfo& info, BasicBlock* const firstHndBlock = ebd->ebdHndBeg; BasicBlock* const lastHndBlock = ebd->ebdHndLast; - if (BitVecOps::IsMember(traits, visited, firstHndBlock->bbNum)) + if (BitVecOps::IsMember(traits, visited, firstHndBlock->bbID)) { JITDUMP("already walked handler region for EH#%02u\n", regionIndex); - assert(BitVecOps::IsMember(traits, visited, lastHndBlock->bbNum)); + assert(BitVecOps::IsMember(traits, visited, lastHndBlock->bbID)); } else { @@ -3129,9 +3129,19 @@ bool Compiler::fgCanCloneTryRegion(BasicBlock* tryEntry) { assert(bbIsTryBeg(tryEntry)); - BitVecTraits traits(compBasicBlockID, this); - BitVec visited = BitVecOps::MakeEmpty(&traits); - CloneTryInfo info(traits, visited); + CloneTryInfo info(this); BasicBlock* const result = fgCloneTryRegion(tryEntry, info); return result != nullptr; } + +//------------------------------------------------------------------------ +// CloneTryInfo::CloneTryInfo +// +// Arguments: +// construct an object for cloning a try region +// +CloneTryInfo::CloneTryInfo(Compiler* comp) + : Traits(comp->compBasicBlockID, comp) + , Visited(BitVecOps::MakeEmpty(&Traits)) +{ +} diff --git a/src/coreclr/jit/flowgraph.cpp b/src/coreclr/jit/flowgraph.cpp index b30556b1f7c516..91d62de8316b75 100644 --- a/src/coreclr/jit/flowgraph.cpp +++ b/src/coreclr/jit/flowgraph.cpp @@ -6136,7 +6136,7 @@ bool FlowGraphNaturalLoop::CanDuplicateWithEH(INDEBUG(const char** reason)) // weightScale - Factor to scale weight of new blocks by // // Notes: -// Extends uplicate to cover loops with try region entries. +// Extends Duplicate to cover loops with try region entries. // void FlowGraphNaturalLoop::DuplicateWithEH(BasicBlock** insertAfter, BlockToBlockMap* map, weight_t weightScale) { @@ -6216,7 +6216,7 @@ void FlowGraphNaturalLoop::DuplicateWithEH(BasicBlock** insertAfter, BlockToBloc VisitLoopBlocksLexical([=, &traits, &visited, &clonedTry, &ehIndexShift](BasicBlock* blk) { // Try cloning may have already handled this block // - if (BitVecOps::IsMember(&traits, visited, blk->bbNum)) + if (BitVecOps::IsMember(&traits, visited, blk->bbID)) { return BasicBlockVisit::Continue; } @@ -6224,11 +6224,11 @@ void FlowGraphNaturalLoop::DuplicateWithEH(BasicBlock** insertAfter, BlockToBloc // If this is a try region entry, clone the entire region now. // Defer adding edges and extending EH regions until later. // - // Updates map, visited, and insertAfter. + // Updates map, and insertAfter. // if (comp->bbIsTryBeg(blk)) { - Compiler::CloneTryInfo info(traits, visited); + CloneTryInfo info(comp); info.Map = map; info.AddEdges = false; info.ProfileScale = weightScale; @@ -6236,6 +6236,7 @@ void FlowGraphNaturalLoop::DuplicateWithEH(BasicBlock** insertAfter, BlockToBloc BasicBlock* const clonedBlock = comp->fgCloneTryRegion(blk, info, insertAfter); assert(clonedBlock != nullptr); + BitVecOps::UnionD(&traits, visited, info.Visited); ehIndexShift += info.EHIndexShift; clonedTry = true; return BasicBlockVisit::Continue; @@ -6246,7 +6247,7 @@ void FlowGraphNaturalLoop::DuplicateWithEH(BasicBlock** insertAfter, BlockToBloc // assert(!comp->bbIsTryBeg(blk)); assert(!comp->bbIsHandlerBeg(blk)); - assert(!BitVecOps::IsMember(&traits, visited, blk->bbNum)); + assert(!BitVecOps::IsMember(&traits, visited, blk->bbID)); } // `blk` was not in loop-enclosed try region or companion region. From 5b5b82c44c8953f1539fb520ad5b20b861f360df Mon Sep 17 00:00:00 2001 From: Andy Ayers Date: Tue, 3 Dec 2024 14:38:58 -0800 Subject: [PATCH 17/17] non-funclet x86 needs one extra block cloned --- src/coreclr/jit/fgehopt.cpp | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/src/coreclr/jit/fgehopt.cpp b/src/coreclr/jit/fgehopt.cpp index 18c2244336de05..5ed56c2beea0a4 100644 --- a/src/coreclr/jit/fgehopt.cpp +++ b/src/coreclr/jit/fgehopt.cpp @@ -2476,7 +2476,7 @@ PhaseStatus Compiler::fgTailMergeThrows() // else // Return the cloned try entry, or nullptr if cloning failed // cloned blocks will be created and scaled by profile weight -// and if info.m_addEdges is true have proper bbkinds and flow edges +// and if info.AddEdges is true have proper bbkinds and flow edges // info data will be updated: // Map will be modified to contain keys and for the blocks cloned // Visited will include bits for each newly cloned block @@ -2614,6 +2614,17 @@ BasicBlock* Compiler::fgCloneTryRegion(BasicBlock* tryEntry, CloneTryInfo& info, else if (block->KindIs(BBJ_CALLFINALLYRET) && block->Prev()->TargetIs(ebd->ebdHndBeg)) { addBlockToClone(block, "callfinallyret"); + +#if defined(FEATURE_EH_WINDOWS_X86) + + // For non-funclet X86 we must also clone the next block after the callfinallyret. + // (it will contain an END_LFIN) + // + if (!UsesFunclets()) + { + addBlockToClone(block->GetTarget(), "lfin-continuation"); + } +#endif } } }