Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions src/coreclr/jit/compiler.h
Original file line number Diff line number Diff line change
Expand Up @@ -3935,6 +3935,9 @@ class Compiler

void gtPeelOffsets(GenTree** addr, target_ssize_t* offset, FieldSeq** fldSeq = nullptr) const;

GenTree* gtPeelFieldAddrs(GenTree* addr) const;
const GenTree* gtPeelFieldAddrs(const GenTree* addr) const;

// Return true if call is a recursive call; return false otherwise.
// Note when inlining, this looks for calls back to the root method.
bool gtIsRecursiveCall(GenTreeCall* call, bool useInlineRoot = true)
Expand Down Expand Up @@ -7382,6 +7385,7 @@ class Compiler
PhaseStatus fgForwardSub();
bool fgForwardSubBlock(BasicBlock* block);
bool fgForwardSubStatement(Statement* statement);
bool fgForwardSubMultiUse(Statement* nextStmt, unsigned lclNum, GenTree* fwdSubNode);
bool fgForwardSubHasStoreInterference(Statement* defStmt, Statement* nextStmt, GenTree* nextStmtUse);
void fgForwardSubUpdateLiveness(GenTree* newSubListFirst, GenTree* newSubListLast);

Expand Down
6 changes: 1 addition & 5 deletions src/coreclr/jit/flowgraph.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -989,11 +989,7 @@ bool Compiler::fgAddrCouldBeNull(GenTree* addr)
//
bool Compiler::fgAddrCouldBeHeap(GenTree* addr)
{
GenTree* op = addr;
while (op->OperIs(GT_FIELD_ADDR) && op->AsFieldAddr()->IsInstance())
{
op = op->AsFieldAddr()->GetFldObj();
}
GenTree* op = gtPeelFieldAddrs(addr);

target_ssize_t offset;
gtPeelOffsets(&op, &offset);
Expand Down
256 changes: 255 additions & 1 deletion src/coreclr/jit/forwardsub.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -79,6 +79,11 @@
//
// Possible enhancements:
// * Allow fwd sub of "simple, cheap" trees when there's more than one use.
// PARTIAL: cheap reorderable address-of-local trees with up to four uses
// in the next statement (e.g. dup-spilled `&this.foo.bar` that feeds both
// `IND` and `STOREIND` for a compound assignment) are now handled below.
// See `fgIsCheapReorderableAddressTree` and the multi-use path in
// `fgForwardSubStatement`.
// * Search more widely for the use.
// * Use height/depth to avoid blowing morph's recursion, rather than tree size.
// * Sub across a block boundary if successor block is unique, join-free,
Expand All @@ -91,6 +96,226 @@
//
//------------------------------------------------------------------------

//------------------------------------------------------------------------
// fgIsCheapReorderableAddressTree: Return true if `tree` is a small,
// reorderable address-of-local expression that is cheap to recompute
// and safe to clone into multiple use sites.
//
// Arguments:
// compiler - the compiler instance (for the chain-walk helper)
// tree - candidate tree
//
// Returns:
// true if `tree` is a FIELD_ADDR chain (instance fields only) rooted
// at a LCL_VAR or LCL_ADDR, with no side effects other than GTF_EXCEPT
// (the standard FIELD_ADDR null-check).
//
Comment on lines +108 to +112
// Remarks:
// Such trees morph into a single `ADD(base, constOffset)` per copy plus
// a NULLCHECK that assertion prop subsequently dedups. Duplicating them
// is essentially free at runtime and unblocks address-mode containment
// for the contained indirections downstream.
//
static bool fgIsCheapReorderableAddressTree(Compiler* compiler, GenTree* tree)
{
if (!tree->TypeIs(TYP_BYREF, TYP_I_IMPL))
{
return false;
}

// Only allow GTF_EXCEPT side effects (from FIELD_ADDR null checks).
if ((tree->gtFlags & GTF_ALL_EFFECT & ~GTF_EXCEPT) != 0)
{
return false;
}

GenTree* base = compiler->gtPeelFieldAddrs(tree);
return base->OperIs(GT_LCL_VAR, GT_LCL_ADDR);
}

//------------------------------------------------------------------------
// fgForwardSubMultiUse: substitute `fwdSubNode` at every use of `lclNum`
// in `nextStmt` (cloning for every site but the last).
//
// Arguments:
// nextStmt - the consumer statement
// lclNum - the local whose uses are to be replaced
// fwdSubNode - the tree to substitute (must be a side-effect-free or
// only-GTF_EXCEPT cheap address expression as established
// by `fgIsCheapReorderableAddressTree`)
//
// Returns:
// true on success. False indicates the caller should fall back to a
// no-op (the IR has not been modified).
//
// Remarks:
// Re-sequences the locals list of `nextStmt`, updates side effects, and
// conservatively clears last-use bits on locals inside the inserted
// clones (they may have been marked dead in the original def position
// but cannot be considered dead once duplicated across several uses).
//
bool Compiler::fgForwardSubMultiUse(Statement* nextStmt, unsigned lclNum, GenTree* fwdSubNode)
{
// Cap the number of clones we'll make. The targeted patterns (e.g. the
// C# compiler's `obj.struct.field op= rhs` lowering) hit exactly two uses.
// Anything beyond a handful starts to look more like a code-size hazard
// than an address-mode containment win.
constexpr int MaxUses = 4;

struct CollectVisitor : public GenTreeVisitor<CollectVisitor>
{
enum
{
DoPreOrder = true,
UseExecutionOrder = true,
};

unsigned m_lclNum;
ArrayStack<GenTree**> m_useSlots;
bool m_bail = false;

CollectVisitor(Compiler* comp, unsigned lclNum)
: GenTreeVisitor<CollectVisitor>(comp)
, m_lclNum(lclNum)
, m_useSlots(comp->getAllocator(CMK_Generic))
{
}

fgWalkResult PreOrderVisit(GenTree** use, GenTree* user)
{
GenTree* node = *use;
if (node->OperIs(GT_LCL_VAR) && (node->AsLclVarCommon()->GetLclNum() == m_lclNum))
{
// Mirror the single-use ForwardSubVisitor check: substituting a
// complex tree (e.g. FIELD_ADDR) into an indirect-call control
// expression would later trip fgGetStubAddrArg, which calls
// gtClone(complexOK=true) and cannot handle FIELD_ADDR. Bail
// out of the multi-use path if we hit such a context.
if ((user != nullptr) && user->IsCall())
{
GenTreeCall* const parentCall = user->AsCall();
if ((parentCall->gtCallType == CT_INDIRECT) && (parentCall->gtControlExpr == node))
{
m_bail = true;
return fgWalkResult::WALK_ABORT;
}
}
m_useSlots.Push(use);
}
return fgWalkResult::WALK_CONTINUE;
}
};

CollectVisitor v(this, lclNum);
v.WalkTree(nextStmt->GetRootNodePointer(), nullptr);

if (v.m_bail)
{
return false;
}

int const useCount = v.m_useSlots.Height();
if ((useCount < 2) || (useCount > MaxUses))
{
return false;
}

// Pre-allocate every clone up front so we can bail without mutating the IR if
// gtCloneExpr ever refuses to duplicate the tree.
int const lastIdx = useCount - 1;
ArrayStack<GenTree*> clones(getAllocator(CMK_Generic));
for (int i = 0; i < lastIdx; i++)
{
GenTree* const clone = gtCloneExpr(fwdSubNode);
if (clone == nullptr)
{
return false;
}
clones.Push(clone);
}

// Replace all-but-last use sites with a clone; the last use site gets the original tree.
for (int i = 0; i < lastIdx; i++)
{
*v.m_useSlots.BottomRef(i) = clones.Bottom(i);
}
*v.m_useSlots.BottomRef(lastIdx) = fwdSubNode;

// After substitution we have N clones inserted at the original use sites
// of `lclNum`, each potentially referencing locals that already appeared
// elsewhere in `nextStmt`. Two correctness fixups are required:
//
// (a) GTF_VAR_DEATH_MASK was copied from the def position by gtCloneExpr;
// any one (or all) copies may have death bits that are no longer
// semantically valid now that there are multiple copies.
// (b) Earlier LCL_VAR references in nextStmt to one of the locals appearing
// inside fwdSubNode may have been a "last use" of that local; the new
// copies make them no longer last.
//
// Be conservative: clear GTF_VAR_DEATH_MASK on every LCL_VAR in nextStmt
// whose lclNum appears anywhere in fwdSubNode. This is the multi-use analogue
// of fgForwardSubUpdateLiveness.
struct CollectLclNumsVisitor : public GenTreeVisitor<CollectLclNumsVisitor>
{
enum
{
DoPreOrder = true,
};

ArrayStack<unsigned> m_lclNums;

CollectLclNumsVisitor(Compiler* comp)
: GenTreeVisitor<CollectLclNumsVisitor>(comp)
, m_lclNums(comp->getAllocator(CMK_Generic))
{
}

fgWalkResult PreOrderVisit(GenTree** use, GenTree* user)
{
GenTree* node = *use;
if (node->OperIsLocal())
{
unsigned const ln = node->AsLclVarCommon()->GetLclNum();
bool seen = false;
for (int i = 0; i < m_lclNums.Height(); i++)
{
if (m_lclNums.Bottom(i) == ln)
{
seen = true;
break;
}
}
if (!seen)
{
m_lclNums.Push(ln);
}
}
return fgWalkResult::WALK_CONTINUE;
}
};

CollectLclNumsVisitor cnv(this);
cnv.WalkTree(&fwdSubNode, nullptr);

fgSequenceLocals(nextStmt);

for (GenTreeLclVarCommon* lcl : nextStmt->LocalsTreeList())
{
unsigned const ln = lcl->GetLclNum();
for (int i = 0; i < cnv.m_lclNums.Height(); i++)
{
if (cnv.m_lclNums.Bottom(i) == ln)
{
lcl->gtFlags &= ~GTF_VAR_DEATH_MASK;
break;
}
}
}

gtUpdateStmtSideEffects(nextStmt);
return true;
}

//------------------------------------------------------------------------
// fgForwardSub: run forward substitution in this method
//
Expand Down Expand Up @@ -533,9 +758,16 @@ bool Compiler::fgForwardSubStatement(Statement* stmt)
//
Statement* const nextStmt = stmt->GetNextStmt();

// Allow multi-use forward sub of cheap, reorderable address trees (e.g. dup-spilled
// `&this.struct.field`). C# compound assignments on struct fields produce two such
// uses in the consumer statement, and leaving the temp in place blocks address-mode
// containment downstream. See `fgIsCheapReorderableAddressTree`.
bool const isCheapAddressTree = fgIsCheapReorderableAddressTree(this, fwdSubNode);

ForwardSubVisitor fsv(this, lclNum);
// Do a quick scan through the linked locals list to see if there is a last use.
bool found = false;
bool found = false;
bool multiUse = false;
for (GenTreeLclVarCommon* lcl : nextStmt->LocalsTreeList())
{
if (lcl->OperIs(GT_LCL_VAR) && (lcl->GetLclNum() == lclNum))
Expand All @@ -545,6 +777,14 @@ bool Compiler::fgForwardSubStatement(Statement* stmt)
found = true;
break;
}

// Non-last direct use of the candidate local. Tolerate it only when we
// intend to clone the substitution tree at every site.
if (isCheapAddressTree)
{
multiUse = true;
continue;
}
}

if (fsv.IsUse(lcl))
Expand Down Expand Up @@ -853,6 +1093,20 @@ bool Compiler::fgForwardSubStatement(Statement* stmt)

// Looks good, forward sub!
//
if (multiUse)
{
if (!fgForwardSubMultiUse(nextStmt, lclNum, fwdSubNode))
{
JITDUMP(" multi-use sub found too few or too many uses\n");
return false;
}
Comment on lines +1098 to +1102

JITDUMP(" -- multi-use fwd subbing [%06u]; new next stmt is\n", dspTreeID(fwdSubNode));
DISPSTMT(nextStmt);

return true;
}

GenTree** use = fsv.GetUse();
GenTreeLclVarCommon* useLcl = (*use)->AsLclVarCommon();
*use = fwdSubNode;
Expand Down
6 changes: 1 addition & 5 deletions src/coreclr/jit/importer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -12945,11 +12945,7 @@ bool Compiler::impIsInvariant(const GenTree* tree)
//
bool Compiler::impIsAddressInLocal(const GenTree* tree, GenTree** lclVarTreeOut)
{
const GenTree* op = tree;
while (op->OperIs(GT_FIELD_ADDR) && op->AsFieldAddr()->IsInstance())
{
op = op->AsFieldAddr()->GetFldObj();
}
const GenTree* op = gtPeelFieldAddrs(tree);

if (op->OperIs(GT_LCL_ADDR))
{
Expand Down
38 changes: 38 additions & 0 deletions src/coreclr/jit/promotiondecomposition.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1297,6 +1297,44 @@ void Compiler::gtPeelOffsets(GenTree** addr, target_ssize_t* offset, FieldSeq**
}
}

//------------------------------------------------------------------------
// gtPeelFieldAddrs: Peel any chain of instance GT_FIELD_ADDR nodes off the
// specified address and return the underlying base node.
//
// Arguments:
// addr - The address node.
//
// Returns:
// The first node along the chain that is not an instance GT_FIELD_ADDR.
// For example, given FIELD_ADDR(FIELD_ADDR(LCL_VAR this, a), b), returns
// the LCL_VAR.
//
// Remarks:
// Static field addresses (where `IsInstance()` is false) are not peeled,
// since they carry a runtime helper call rather than a simple
// constant-offset addend.
//
GenTree* Compiler::gtPeelFieldAddrs(GenTree* addr) const
{
while (addr->OperIs(GT_FIELD_ADDR) && addr->AsFieldAddr()->IsInstance())
{
addr = addr->AsFieldAddr()->GetFldObj();
}
return addr;
}

//------------------------------------------------------------------------
// gtPeelFieldAddrs (const overload): see the non-const variant above.
//
// GenTreeFieldAddr::GetFldObj() returns a mutable GenTree* even from a const
// receiver, so we localize the const_cast here rather than asking every
// const-correct caller to perform one at the use site.
//
const GenTree* Compiler::gtPeelFieldAddrs(const GenTree* addr) const
{
return gtPeelFieldAddrs(const_cast<GenTree*>(addr));
}

// HandleStructStore:
// Handle a store that may be between struct locals with replacements.
//
Expand Down
Loading