diff --git a/src/coreclr/jit/compiler.h b/src/coreclr/jit/compiler.h index de116a31f9ebb1..131e20faaef3d1 100644 --- a/src/coreclr/jit/compiler.h +++ b/src/coreclr/jit/compiler.h @@ -3935,6 +3935,9 @@ class Compiler void gtPeelOffsets(GenTree** addr, target_ssize_t* offset, FieldSeq** fldSeq = nullptr) const; + GenTree* gtPeelFieldAddrs(GenTree* addr) const; + const GenTree* gtPeelFieldAddrs(const GenTree* addr) const; + // Return true if call is a recursive call; return false otherwise. // Note when inlining, this looks for calls back to the root method. bool gtIsRecursiveCall(GenTreeCall* call, bool useInlineRoot = true) @@ -7382,6 +7385,7 @@ class Compiler PhaseStatus fgForwardSub(); bool fgForwardSubBlock(BasicBlock* block); bool fgForwardSubStatement(Statement* statement); + bool fgForwardSubMultiUse(Statement* nextStmt, unsigned lclNum, GenTree* fwdSubNode); bool fgForwardSubHasStoreInterference(Statement* defStmt, Statement* nextStmt, GenTree* nextStmtUse); void fgForwardSubUpdateLiveness(GenTree* newSubListFirst, GenTree* newSubListLast); diff --git a/src/coreclr/jit/flowgraph.cpp b/src/coreclr/jit/flowgraph.cpp index b3c78ca1a5d515..d4cf3f03a5e91f 100644 --- a/src/coreclr/jit/flowgraph.cpp +++ b/src/coreclr/jit/flowgraph.cpp @@ -989,11 +989,7 @@ bool Compiler::fgAddrCouldBeNull(GenTree* addr) // bool Compiler::fgAddrCouldBeHeap(GenTree* addr) { - GenTree* op = addr; - while (op->OperIs(GT_FIELD_ADDR) && op->AsFieldAddr()->IsInstance()) - { - op = op->AsFieldAddr()->GetFldObj(); - } + GenTree* op = gtPeelFieldAddrs(addr); target_ssize_t offset; gtPeelOffsets(&op, &offset); diff --git a/src/coreclr/jit/forwardsub.cpp b/src/coreclr/jit/forwardsub.cpp index 2b8ac8558dd59b..69e72474c43a98 100644 --- a/src/coreclr/jit/forwardsub.cpp +++ b/src/coreclr/jit/forwardsub.cpp @@ -79,6 +79,11 @@ // // Possible enhancements: // * Allow fwd sub of "simple, cheap" trees when there's more than one use. +// PARTIAL: cheap reorderable address-of-local trees with up to four uses +// in the next statement (e.g. dup-spilled `&this.foo.bar` that feeds both +// `IND` and `STOREIND` for a compound assignment) are now handled below. +// See `fgIsCheapReorderableAddressTree` and the multi-use path in +// `fgForwardSubStatement`. // * Search more widely for the use. // * Use height/depth to avoid blowing morph's recursion, rather than tree size. // * Sub across a block boundary if successor block is unique, join-free, @@ -91,6 +96,228 @@ // //------------------------------------------------------------------------ +//------------------------------------------------------------------------ +// fgIsCheapReorderableAddressTree: Return true if `tree` is a small, +// reorderable address-of-local expression that is cheap to recompute +// and safe to clone into multiple use sites. +// +// Arguments: +// compiler - the compiler instance (for the chain-walk helper) +// tree - candidate tree +// +// Returns: +// true if `tree` is a (possibly empty) chain of instance FIELD_ADDR +// nodes wrapping a LCL_VAR or LCL_ADDR of type BYREF/I_IMPL, with no +// side effects other than GTF_EXCEPT (the standard FIELD_ADDR null +// check). The zero-hop case (a bare LCL_VAR/LCL_ADDR address-of-local) +// is included. +// +// Remarks: +// Such trees morph into a single `ADD(base, constOffset)` per copy plus +// a NULLCHECK that assertion prop subsequently dedups. Duplicating them +// is essentially free at runtime and unblocks address-mode containment +// for the contained indirections downstream. +// +static bool fgIsCheapReorderableAddressTree(Compiler* compiler, GenTree* tree) +{ + if (!tree->TypeIs(TYP_BYREF, TYP_I_IMPL)) + { + return false; + } + + // Only allow GTF_EXCEPT side effects (from FIELD_ADDR null checks). + if ((tree->gtFlags & GTF_ALL_EFFECT & ~GTF_EXCEPT) != 0) + { + return false; + } + + GenTree* base = compiler->gtPeelFieldAddrs(tree); + return base->OperIs(GT_LCL_VAR, GT_LCL_ADDR); +} + +//------------------------------------------------------------------------ +// fgForwardSubMultiUse: substitute `fwdSubNode` at every use of `lclNum` +// in `nextStmt` (cloning for every site but the last). +// +// Arguments: +// nextStmt - the consumer statement +// lclNum - the local whose uses are to be replaced +// fwdSubNode - the tree to substitute (must be a side-effect-free or +// only-GTF_EXCEPT cheap address expression as established +// by `fgIsCheapReorderableAddressTree`) +// +// Returns: +// true on success. False indicates the caller should fall back to a +// no-op (the IR has not been modified). +// +// Remarks: +// Re-sequences the locals list of `nextStmt`, updates side effects, and +// conservatively clears last-use bits on locals inside the inserted +// clones (they may have been marked dead in the original def position +// but cannot be considered dead once duplicated across several uses). +// +bool Compiler::fgForwardSubMultiUse(Statement* nextStmt, unsigned lclNum, GenTree* fwdSubNode) +{ + // Cap the number of clones we'll make. The targeted patterns (e.g. the + // C# compiler's `obj.struct.field op= rhs` lowering) hit exactly two uses. + // Anything beyond a handful starts to look more like a code-size hazard + // than an address-mode containment win. + constexpr int MaxUses = 4; + + struct CollectVisitor : public GenTreeVisitor + { + enum + { + DoPreOrder = true, + UseExecutionOrder = true, + }; + + unsigned m_lclNum; + ArrayStack m_useSlots; + bool m_bail = false; + + CollectVisitor(Compiler* comp, unsigned lclNum) + : GenTreeVisitor(comp) + , m_lclNum(lclNum) + , m_useSlots(comp->getAllocator(CMK_Generic)) + { + } + + fgWalkResult PreOrderVisit(GenTree** use, GenTree* user) + { + GenTree* node = *use; + if (node->OperIs(GT_LCL_VAR) && (node->AsLclVarCommon()->GetLclNum() == m_lclNum)) + { + // Mirror the single-use ForwardSubVisitor check: substituting a + // complex tree (e.g. FIELD_ADDR) into an indirect-call control + // expression would later trip fgGetStubAddrArg, which calls + // gtClone(complexOK=true) and cannot handle FIELD_ADDR. Bail + // out of the multi-use path if we hit such a context. + if ((user != nullptr) && user->IsCall()) + { + GenTreeCall* const parentCall = user->AsCall(); + if ((parentCall->gtCallType == CT_INDIRECT) && (parentCall->gtControlExpr == node)) + { + m_bail = true; + return fgWalkResult::WALK_ABORT; + } + } + m_useSlots.Push(use); + } + return fgWalkResult::WALK_CONTINUE; + } + }; + + CollectVisitor v(this, lclNum); + v.WalkTree(nextStmt->GetRootNodePointer(), nullptr); + + if (v.m_bail) + { + return false; + } + + int const useCount = v.m_useSlots.Height(); + if ((useCount < 2) || (useCount > MaxUses)) + { + return false; + } + + // Pre-allocate every clone up front so we can bail without mutating the IR if + // gtCloneExpr ever refuses to duplicate the tree. + int const lastIdx = useCount - 1; + ArrayStack clones(getAllocator(CMK_Generic)); + for (int i = 0; i < lastIdx; i++) + { + GenTree* const clone = gtCloneExpr(fwdSubNode); + if (clone == nullptr) + { + return false; + } + clones.Push(clone); + } + + // Replace all-but-last use sites with a clone; the last use site gets the original tree. + for (int i = 0; i < lastIdx; i++) + { + *v.m_useSlots.BottomRef(i) = clones.Bottom(i); + } + *v.m_useSlots.BottomRef(lastIdx) = fwdSubNode; + + // After substitution we have N clones inserted at the original use sites + // of `lclNum`, each potentially referencing locals that already appeared + // elsewhere in `nextStmt`. Two correctness fixups are required: + // + // (a) GTF_VAR_DEATH_MASK was copied from the def position by gtCloneExpr; + // any one (or all) copies may have death bits that are no longer + // semantically valid now that there are multiple copies. + // (b) Earlier LCL_VAR references in nextStmt to one of the locals appearing + // inside fwdSubNode may have been a "last use" of that local; the new + // copies make them no longer last. + // + // Be conservative: clear GTF_VAR_DEATH_MASK on every LCL_VAR in nextStmt + // whose lclNum appears anywhere in fwdSubNode. This is the multi-use analogue + // of fgForwardSubUpdateLiveness. + struct CollectLclNumsVisitor : public GenTreeVisitor + { + enum + { + DoPreOrder = true, + }; + + ArrayStack m_lclNums; + + CollectLclNumsVisitor(Compiler* comp) + : GenTreeVisitor(comp) + , m_lclNums(comp->getAllocator(CMK_Generic)) + { + } + + fgWalkResult PreOrderVisit(GenTree** use, GenTree* user) + { + GenTree* node = *use; + if (node->OperIsLocal()) + { + unsigned const ln = node->AsLclVarCommon()->GetLclNum(); + bool seen = false; + for (int i = 0; i < m_lclNums.Height(); i++) + { + if (m_lclNums.Bottom(i) == ln) + { + seen = true; + break; + } + } + if (!seen) + { + m_lclNums.Push(ln); + } + } + return fgWalkResult::WALK_CONTINUE; + } + }; + + CollectLclNumsVisitor cnv(this); + cnv.WalkTree(&fwdSubNode, nullptr); + + fgSequenceLocals(nextStmt); + + for (GenTreeLclVarCommon* lcl : nextStmt->LocalsTreeList()) + { + unsigned const ln = lcl->GetLclNum(); + for (int i = 0; i < cnv.m_lclNums.Height(); i++) + { + if (cnv.m_lclNums.Bottom(i) == ln) + { + lcl->gtFlags &= ~GTF_VAR_DEATH_MASK; + break; + } + } + } + + gtUpdateStmtSideEffects(nextStmt); + return true; +} + //------------------------------------------------------------------------ // fgForwardSub: run forward substitution in this method // @@ -533,9 +760,16 @@ bool Compiler::fgForwardSubStatement(Statement* stmt) // Statement* const nextStmt = stmt->GetNextStmt(); + // Allow multi-use forward sub of cheap, reorderable address trees (e.g. dup-spilled + // `&this.struct.field`). C# compound assignments on struct fields produce two such + // uses in the consumer statement, and leaving the temp in place blocks address-mode + // containment downstream. See `fgIsCheapReorderableAddressTree`. + bool const isCheapAddressTree = fgIsCheapReorderableAddressTree(this, fwdSubNode); + ForwardSubVisitor fsv(this, lclNum); // Do a quick scan through the linked locals list to see if there is a last use. - bool found = false; + bool found = false; + bool multiUse = false; for (GenTreeLclVarCommon* lcl : nextStmt->LocalsTreeList()) { if (lcl->OperIs(GT_LCL_VAR) && (lcl->GetLclNum() == lclNum)) @@ -545,6 +779,14 @@ bool Compiler::fgForwardSubStatement(Statement* stmt) found = true; break; } + + // Non-last direct use of the candidate local. Tolerate it only when we + // intend to clone the substitution tree at every site. + if (isCheapAddressTree) + { + multiUse = true; + continue; + } } if (fsv.IsUse(lcl)) @@ -853,6 +1095,20 @@ bool Compiler::fgForwardSubStatement(Statement* stmt) // Looks good, forward sub! // + if (multiUse) + { + if (!fgForwardSubMultiUse(nextStmt, lclNum, fwdSubNode)) + { + JITDUMP(" multi-use sub failed (count out of range, indirect-call context, or clone failed)\n"); + return false; + } + + JITDUMP(" -- multi-use fwd subbing [%06u]; new next stmt is\n", dspTreeID(fwdSubNode)); + DISPSTMT(nextStmt); + + return true; + } + GenTree** use = fsv.GetUse(); GenTreeLclVarCommon* useLcl = (*use)->AsLclVarCommon(); *use = fwdSubNode; diff --git a/src/coreclr/jit/importer.cpp b/src/coreclr/jit/importer.cpp index 082d86cedd93f8..2387cfac2980b9 100644 --- a/src/coreclr/jit/importer.cpp +++ b/src/coreclr/jit/importer.cpp @@ -12945,11 +12945,7 @@ bool Compiler::impIsInvariant(const GenTree* tree) // bool Compiler::impIsAddressInLocal(const GenTree* tree, GenTree** lclVarTreeOut) { - const GenTree* op = tree; - while (op->OperIs(GT_FIELD_ADDR) && op->AsFieldAddr()->IsInstance()) - { - op = op->AsFieldAddr()->GetFldObj(); - } + const GenTree* op = gtPeelFieldAddrs(tree); if (op->OperIs(GT_LCL_ADDR)) { diff --git a/src/coreclr/jit/promotiondecomposition.cpp b/src/coreclr/jit/promotiondecomposition.cpp index f82c33c6e1b910..3a7e3f4a2454fe 100644 --- a/src/coreclr/jit/promotiondecomposition.cpp +++ b/src/coreclr/jit/promotiondecomposition.cpp @@ -1297,6 +1297,44 @@ void Compiler::gtPeelOffsets(GenTree** addr, target_ssize_t* offset, FieldSeq** } } +//------------------------------------------------------------------------ +// gtPeelFieldAddrs: Peel any chain of instance GT_FIELD_ADDR nodes off the +// specified address and return the underlying base node. +// +// Arguments: +// addr - The address node. +// +// Returns: +// The first node along the chain that is not an instance GT_FIELD_ADDR. +// For example, given FIELD_ADDR(FIELD_ADDR(LCL_VAR this, a), b), returns +// the LCL_VAR. +// +// Remarks: +// Static field addresses (where `IsInstance()` is false) are not peeled, +// since they carry a runtime helper call rather than a simple +// constant-offset addend. +// +GenTree* Compiler::gtPeelFieldAddrs(GenTree* addr) const +{ + while (addr->OperIs(GT_FIELD_ADDR) && addr->AsFieldAddr()->IsInstance()) + { + addr = addr->AsFieldAddr()->GetFldObj(); + } + return addr; +} + +//------------------------------------------------------------------------ +// gtPeelFieldAddrs (const overload): see the non-const variant above. +// +// GenTreeFieldAddr::GetFldObj() returns a mutable GenTree* even from a const +// receiver, so we localize the const_cast here rather than asking every +// const-correct caller to perform one at the use site. +// +const GenTree* Compiler::gtPeelFieldAddrs(const GenTree* addr) const +{ + return gtPeelFieldAddrs(const_cast(addr)); +} + // HandleStructStore: // Handle a store that may be between struct locals with replacements. //