Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

JIT: Enable inlining for late devirtualization #110827

Merged
merged 61 commits into from
Jan 29, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
61 commits
Select commit Hold shift + click to select a range
686f64e
Enable inlining for late devirt
hez2010 Jan 20, 2025
27cc1ce
Pass correct IL offset
hez2010 Jan 20, 2025
21a5adb
Only creates RET_EXPR if the node is not top-level or not returning void
hez2010 Jan 20, 2025
d1d5181
Do not try inlining if BBF_INTERNAL is set
hez2010 Jan 20, 2025
cd9f817
Ensure the type matches
hez2010 Jan 20, 2025
aad3d7e
Set inliner context correctly
hez2010 Jan 20, 2025
3f5204b
Merge branch 'main' into inline-exact-devirt
hez2010 Jan 21, 2025
3b5b446
Address review feedbacks
hez2010 Jan 21, 2025
27bc0aa
Fix non inline candidate marking
hez2010 Jan 21, 2025
dbf204a
Handle calls with retbuf
hez2010 Jan 21, 2025
f6d38c4
Handle BBF_INTERNAL
hez2010 Jan 21, 2025
10de97b
Get real type from local
hez2010 Jan 22, 2025
472e140
Always set IL offset
hez2010 Jan 22, 2025
20b1a94
Add comments
hez2010 Jan 22, 2025
5c9c927
Oops
hez2010 Jan 22, 2025
97326e2
Use gtReturnType instead
hez2010 Jan 22, 2025
096ab2a
Remove unused ilOffset
hez2010 Jan 22, 2025
095a981
Use genActualType
hez2010 Jan 22, 2025
614618a
Merge branch 'main' into inline-exact-devirt
hez2010 Jan 22, 2025
5951462
Remove unnecessary spillTemp
hez2010 Jan 22, 2025
03c0df5
Handle nested call correctly
hez2010 Jan 22, 2025
0042519
Don't promote compCurStmt
hez2010 Jan 22, 2025
b35d1ba
Merge branch 'main' into inline-exact-devirt
hez2010 Jan 23, 2025
0e13553
Remove unused ilOffset
hez2010 Jan 24, 2025
edfa2ac
Handle BBF_INTERNAL
hez2010 Jan 24, 2025
d40d7e1
Use correct return type
hez2010 Jan 24, 2025
e540cac
Use bbInCatchHandlerBBRange and bbInFilterBBRange
hez2010 Jan 24, 2025
036010c
Cleanup fncRetType
hez2010 Jan 24, 2025
121242e
Add a runtime check to prevent accidental execution order change
hez2010 Jan 24, 2025
0b7ada3
Merge remote-tracking branch 'hez2010/inline-exact-devirt-prelim' int…
hez2010 Jan 24, 2025
8447a71
Format jit
hez2010 Jan 24, 2025
0a0e11c
Revert some changes
hez2010 Jan 24, 2025
e03fda2
Merge branch 'main' into inline-exact-devirt
hez2010 Jan 24, 2025
4507692
Remove unused local
hez2010 Jan 24, 2025
8fe9716
Check whether a call can be spilled without side effect
hez2010 Jan 24, 2025
778edcd
Get rid of BAD_VAR_NUM
hez2010 Jan 24, 2025
85cd617
Add comments for CanSpillCallWithoutSideEffect
hez2010 Jan 24, 2025
38cff9f
Use ancestors to estimate whether a call can be spilled or not
hez2010 Jan 24, 2025
67a7b54
Reset m_ancestors before walking
hez2010 Jan 24, 2025
ff1576c
Nit
hez2010 Jan 24, 2025
9d3befe
Fix assertion
hez2010 Jan 24, 2025
dbf7c21
Limit to GT_STORE_LCL_VAR only
hez2010 Jan 24, 2025
bda1438
Oops
hez2010 Jan 24, 2025
6d21975
Inline the check
hez2010 Jan 24, 2025
05492fa
Remove leftovers
hez2010 Jan 24, 2025
7164fc3
Hoist the check
hez2010 Jan 25, 2025
0415063
Make sure the store parent is the statement root
hez2010 Jan 25, 2025
1eb0422
Format JIT
hez2010 Jan 25, 2025
9d72f3a
Check side effects before trying inlining
hez2010 Jan 26, 2025
1e0b2d1
Fix
hez2010 Jan 26, 2025
a90b6de
Nit
hez2010 Jan 26, 2025
0378aab
Merge branch 'main' into inline-exact-devirt
hez2010 Jan 27, 2025
c7e041a
Merge branch 'main' into inline-exact-devirt
hez2010 Jan 27, 2025
0457417
Make lvHasLdAddrOp check optional
hez2010 Jan 27, 2025
b27a25a
Rename to early
hez2010 Jan 27, 2025
6c0e11b
Split effects if necessary
hez2010 Jan 28, 2025
5de54f8
Use gtSplitTree
hez2010 Jan 28, 2025
1e967c8
Teach gtSplitTree to support early use
hez2010 Jan 28, 2025
10cd910
Cleanup
hez2010 Jan 28, 2025
067064f
ClearInlineInfo is not needed
hez2010 Jan 28, 2025
6e067ce
Merge branch 'main' into inline-exact-devirt
hez2010 Jan 28, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 3 additions & 1 deletion src/coreclr/jit/compiler.h
Original file line number Diff line number Diff line change
Expand Up @@ -3790,7 +3790,7 @@ class Compiler
bool ignoreRoot = false);

bool gtSplitTree(
BasicBlock* block, Statement* stmt, GenTree* splitPoint, Statement** firstNewStmt, GenTree*** splitPointUse);
BasicBlock* block, Statement* stmt, GenTree* splitPoint, Statement** firstNewStmt, GenTree*** splitPointUse, bool early = false);

bool gtStoreDefinesField(
LclVarDsc* fieldVarDsc, ssize_t offset, unsigned size, ssize_t* pFieldStoreOffset, unsigned* pFieldStoreSize);
Expand Down Expand Up @@ -5114,6 +5114,8 @@ class Compiler
SpillCliqueSucc
};

friend class SubstitutePlaceholdersAndDevirtualizeWalker;

// Abstract class for receiving a callback while walking a spill clique
class SpillCliqueWalker
{
Expand Down
97 changes: 84 additions & 13 deletions src/coreclr/jit/fginline.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -204,7 +204,9 @@ bool Compiler::TypeInstantiationComplexityExceeds(CORINFO_CLASS_HANDLE handle, i

class SubstitutePlaceholdersAndDevirtualizeWalker : public GenTreeVisitor<SubstitutePlaceholdersAndDevirtualizeWalker>
{
bool m_madeChanges = false;
bool m_madeChanges = false;
Statement* m_curStmt = nullptr;
Statement* m_firstNewStmt = nullptr;

public:
enum
Expand All @@ -219,11 +221,29 @@ class SubstitutePlaceholdersAndDevirtualizeWalker : public GenTreeVisitor<Substi
{
}

bool MadeChanges()
bool MadeChanges() const
{
return m_madeChanges;
}

// ------------------------------------------------------------------------
// WalkStatement: Walk the tree of a statement, and return the first newly
// added statement if any, otherwise return the original statement.
//
// Arguments:
// stmt - the statement to walk.
//
// Return Value:
// The first newly added statement if any, or the original statement.
//
Statement* WalkStatement(Statement* stmt)
{
m_curStmt = stmt;
m_firstNewStmt = nullptr;
WalkTree(m_curStmt->GetRootNodePointer(), nullptr);
return m_firstNewStmt == nullptr ? m_curStmt : m_firstNewStmt;
}

fgWalkResult PreOrderVisit(GenTree** use, GenTree* user)
{
GenTree* tree = *use;
Expand Down Expand Up @@ -586,8 +606,59 @@ class SubstitutePlaceholdersAndDevirtualizeWalker : public GenTreeVisitor<Substi
}

CORINFO_CONTEXT_HANDLE contextInput = context;
context = nullptr;
m_compiler->impDevirtualizeCall(call, nullptr, &method, &methodFlags, &contextInput, &context,
isLateDevirtualization, explicitTailCall);

if (!call->IsVirtual())
{
assert(context != nullptr);
CORINFO_CALL_INFO callInfo = {};
callInfo.hMethod = method;
callInfo.methodFlags = methodFlags;
m_compiler->impMarkInlineCandidate(call, context, false, &callInfo);

if (call->IsInlineCandidate())
{
Statement* newStmt = nullptr;
GenTree** callUse = nullptr;
if (m_compiler->gtSplitTree(m_compiler->compCurBB, m_curStmt, call, &newStmt, &callUse, true))
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Strictly speaking this split is unnecessarily conservative: it will introduce temps for the arguments of the call, which can bypass optimizations the inliner can make depending on the specific shape of the argument. Fixing that requires enhancing gtSplitTree a bit (see e.g. my version in this branch), however, even with that done you will run into issues around incorrect treatment of the retbuffer in the inliner. So I wouldn't suggest trying to do anything about it at this point, but it is a potential future improvement.

{
if (m_firstNewStmt == nullptr)
{
m_firstNewStmt = newStmt;
}
}

// If the call is the root expression in a statement, and it returns void,
// we can inline it directly without creating a RET_EXPR.
if (parent != nullptr || call->gtReturnType != TYP_VOID)
hez2010 marked this conversation as resolved.
Show resolved Hide resolved
{
Statement* stmt = m_compiler->gtNewStmt(call);
m_compiler->fgInsertStmtBefore(m_compiler->compCurBB, m_curStmt, stmt);
if (m_firstNewStmt == nullptr)
{
m_firstNewStmt = stmt;
}

GenTreeRetExpr* retExpr =
m_compiler->gtNewInlineCandidateReturnExpr(call->AsCall(),
genActualType(call->TypeGet()));
call->GetSingleInlineCandidateInfo()->retExpr = retExpr;

JITDUMP("Creating new RET_EXPR for [%06u]:\n", call->gtTreeID);
DISPTREE(retExpr);

*pTree = retExpr;
}

call->GetSingleInlineCandidateInfo()->exactContextHandle = context;
INDEBUG(call->GetSingleInlineCandidateInfo()->inlinersContext = call->gtInlineContext);

JITDUMP("New inline candidate due to late devirtualization:\n");
DISPTREE(call);
}
}
m_madeChanges = true;
}
}
Expand Down Expand Up @@ -730,17 +801,10 @@ PhaseStatus Compiler::fgInline()
do
{
// Make the current basic block address available globally
compCurBB = block;

for (Statement* const stmt : block->Statements())
compCurBB = block;
Statement* stmt = block->firstStmt();
while (stmt != nullptr)
{

#if defined(DEBUG)
// In debug builds we want the inline tree to show all failed
// inlines. Some inlines may fail very early and never make it to
// candidate stage. So scan the tree looking for those early failures.
fgWalkTreePre(stmt->GetRootNodePointer(), fgFindNonInlineCandidate, stmt);
#endif
// See if we need to replace some return value place holders.
// Also, see if this replacement enables further devirtualization.
//
Expand All @@ -755,7 +819,7 @@ PhaseStatus Compiler::fgInline()
// possible further optimization, as the (now complete) GT_RET_EXPR
// replacement may have enabled optimizations by providing more
// specific types for trees or variables.
walker.WalkTree(stmt->GetRootNodePointer(), nullptr);
stmt = walker.WalkStatement(stmt);

GenTree* expr = stmt->GetRootNode();

Expand Down Expand Up @@ -805,6 +869,13 @@ PhaseStatus Compiler::fgInline()
madeChanges = true;
stmt->SetRootNode(expr->AsOp()->gtOp1);
}

#if defined(DEBUG)
// In debug builds we want the inline tree to show all failed
// inlines.
fgWalkTreePre(stmt->GetRootNodePointer(), fgFindNonInlineCandidate, stmt);
#endif
stmt = stmt->GetNextStmt();
}

block = block->Next();
Expand Down
19 changes: 14 additions & 5 deletions src/coreclr/jit/gentree.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -17005,6 +17005,8 @@ bool Compiler::gtTreeHasSideEffects(GenTree* tree, GenTreeFlags flags /* = GTF_S
// firstNewStmt - [out] The first new statement that was introduced.
// [firstNewStmt..stmt) are the statements added by this function.
// splitNodeUse - The use of the tree to split at.
// early - The run is in the early phase where we still don't have valid
// GTF_GLOB_REF yet.
//
// Notes:
// This method turns all non-invariant nodes that would be executed before
Expand All @@ -17025,14 +17027,19 @@ bool Compiler::gtTreeHasSideEffects(GenTree* tree, GenTreeFlags flags /* = GTF_S
// Returns:
// True if any changes were made; false if nothing needed to be done to split the tree.
//
bool Compiler::gtSplitTree(
BasicBlock* block, Statement* stmt, GenTree* splitPoint, Statement** firstNewStmt, GenTree*** splitNodeUse)
bool Compiler::gtSplitTree(BasicBlock* block,
Statement* stmt,
GenTree* splitPoint,
Statement** firstNewStmt,
GenTree*** splitNodeUse,
bool early)
{
class Splitter final : public GenTreeVisitor<Splitter>
{
BasicBlock* m_bb;
Statement* m_splitStmt;
GenTree* m_splitNode;
bool m_early;

struct UseInfo
{
Expand All @@ -17049,11 +17056,12 @@ bool Compiler::gtSplitTree(
UseExecutionOrder = true
};

Splitter(Compiler* compiler, BasicBlock* bb, Statement* stmt, GenTree* splitNode)
Splitter(Compiler* compiler, BasicBlock* bb, Statement* stmt, GenTree* splitNode, bool early)
: GenTreeVisitor(compiler)
, m_bb(bb)
, m_splitStmt(stmt)
, m_splitNode(splitNode)
, m_early(early)
, m_useStack(compiler->getAllocator(CMK_ArrayStack))
{
}
Expand Down Expand Up @@ -17195,7 +17203,8 @@ bool Compiler::gtSplitTree(
return;
}

if ((*use)->OperIs(GT_LCL_VAR) && !m_compiler->lvaGetDesc((*use)->AsLclVarCommon())->IsAddressExposed())
if ((*use)->OperIs(GT_LCL_VAR) && !m_compiler->lvaGetDesc((*use)->AsLclVarCommon())->IsAddressExposed() &&
!(m_early && m_compiler->lvaGetDesc((*use)->AsLclVarCommon())->lvHasLdAddrOp))
{
// The splitting we do here should always guarantee that we
// only introduce locals for the tree edges that overlap the
Expand Down Expand Up @@ -17278,7 +17287,7 @@ bool Compiler::gtSplitTree(
}
};

Splitter splitter(this, block, stmt, splitPoint);
Splitter splitter(this, block, stmt, splitPoint, early);
splitter.WalkTree(stmt->GetRootNodePointer(), nullptr);
*firstNewStmt = splitter.FirstStatement;
*splitNodeUse = splitter.SplitNodeUse;
Expand Down
2 changes: 1 addition & 1 deletion src/coreclr/jit/importercalls.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -329,7 +329,7 @@ var_types Compiler::impImportCall(OPCODE opcode,
assert((sig->callConv & CORINFO_CALLCONV_MASK) != CORINFO_CALLCONV_VARARG &&
(sig->callConv & CORINFO_CALLCONV_MASK) != CORINFO_CALLCONV_NATIVEVARARG);

call = gtNewIndCallNode(stubAddr, callRetTyp);
call = gtNewIndCallNode(stubAddr, callRetTyp, di);

call->gtFlags |= GTF_EXCEPT | (stubAddr->gtFlags & GTF_GLOB_EFFECT);
call->gtFlags |= GTF_CALL_VIRT_STUB;
Expand Down
Loading