Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
31 changes: 21 additions & 10 deletions cc/arch/aarch64/regalloc.rs
Original file line number Diff line number Diff line change
Expand Up @@ -670,7 +670,7 @@ pub struct RegAlloc {
/// Arguments spilled from caller-saved registers to stack
spilled_args: Vec<SpilledArg>,
/// Active stack slot intervals (interval, offset, size) for reuse tracking
active_stack: Vec<(LiveInterval, i32, i32)>,
active_stack: Vec<crate::arch::regalloc::ActiveSlot>,
/// Free stack slots keyed by size, available for reuse
free_stack_slots: BTreeMap<i32, Vec<FreeSlot>>,
/// Sym pseudos whose address is taken (cannot participate in slot reuse)
Expand Down Expand Up @@ -931,15 +931,13 @@ impl RegAlloc {
&mut self,
size: i32,
alignment: i32,
candidate: PseudoId,
) -> Option<i32> {
candidate_interval: &LiveInterval,
) -> Option<(i32, Vec<LiveInterval>)> {
super::super::regalloc::try_reuse_stack_slot(
&mut self.free_stack_slots,
size,
alignment,
candidate,
&self.live_in,
&self.live_out,
candidate_interval,
)
}

Expand All @@ -957,9 +955,14 @@ impl RegAlloc {
self.max_local_align = alignment;
}
if reusable {
if let Some(reused) = self.try_reuse_stack_slot(size, alignment, interval.pseudo) {
if let Some((reused, past)) = self.try_reuse_stack_slot(size, alignment, interval) {
self.locations.insert(interval.pseudo, Loc::Stack(reused));
self.active_stack.push((interval.clone(), reused, size));
self.active_stack.push(crate::arch::regalloc::ActiveSlot {
current: interval.clone(),
past,
offset: reused,
size,
});
return;
}
}
Expand All @@ -970,7 +973,12 @@ impl RegAlloc {
let offset = -self.stack_offset;
self.locations.insert(interval.pseudo, Loc::Stack(offset));
if reusable {
self.active_stack.push((interval.clone(), offset, size));
self.active_stack.push(crate::arch::regalloc::ActiveSlot {
current: interval.clone(),
past: Vec::new(),
offset,
size,
});
}
}

Expand Down Expand Up @@ -1052,7 +1060,10 @@ impl RegAlloc {
.map(|a| a as i32)
.unwrap_or(natural_align.max(8));
let aligned_size = (size + alignment - 1) & !(alignment - 1);
let reusable = !self.addr_taken_syms.contains(&interval.pseudo);
// Sym slot reuse disabled — see x86_64
// mirror for the rationale.
let _ = self.addr_taken_syms.contains(&interval.pseudo);
let reusable = false;
self.alloc_stack_slot(interval, aligned_size, alignment, reusable);
if types.is_float(local.typ) {
self.fp_pseudos.insert(interval.pseudo);
Expand Down
120 changes: 81 additions & 39 deletions cc/arch/regalloc.rs
Original file line number Diff line number Diff line change
Expand Up @@ -95,8 +95,33 @@ pub struct LiveInterval {
pub struct FreeSlot {
pub offset: i32,
pub alignment: i32,
/// The pseudo that previously owned this slot (used for interference checks)
pub owner: PseudoId,
/// The live intervals of *every* pseudo that has ever owned
/// this slot — current and prior owners. A candidate may
/// reuse the slot only if its own interval is disjoint from
/// EVERY entry. The "current owner only" approximation
/// (checking just the most-recent occupant) is unsafe:
/// suppose A owns the slot during [11,17], then frees it; B
/// reuses during [81,130], also frees. If a candidate C with
/// interval [6,42] now checks "do I overlap B?" it sees no
/// conflict — but C overlaps A's old window [11,17] and would
/// read garbage during that span. CPython's `double_round`
/// miscompile (assertAlmostEqual segfault) hits this exact
/// pattern.
pub owner_intervals: Vec<LiveInterval>,
}

/// A stack slot currently assigned to a live pseudo, plus the full
/// history of past owners. Used by `expire_stack_intervals` and
/// `try_reuse_stack_slot` to preserve history across reuse cycles.
#[derive(Debug, Clone)]
pub struct ActiveSlot {
/// The current owner's live interval — drives `expire_stack_intervals`.
pub current: LiveInterval,
/// Past owners (FIFO) whose intervals already ended but whose
/// constraints must still be respected on the next reuse.
pub past: Vec<LiveInterval>,
pub offset: i32,
pub size: i32,
}

/// A point in the function where register constraints apply.
Expand Down Expand Up @@ -133,23 +158,37 @@ pub struct ConstraintPoint<R> {
/// stp/ldp offsets past the architectural [-512, 504] immediate range
/// and the assembler rejects the output.
pub fn expire_stack_intervals(
active_stack: &mut Vec<(LiveInterval, i32, i32)>,
active_stack: &mut Vec<ActiveSlot>,
free_slots: &mut BTreeMap<i32, Vec<FreeSlot>>,
point: usize,
) {
active_stack.retain(|(interval, offset, size)| {
if interval.end < point {
let alignment = if *size >= 16 { 16 } else { 8 };
free_slots.entry(*size).or_default().push(FreeSlot {
offset: *offset,
alignment,
owner: interval.pseudo,
});
let mut to_free: Vec<ActiveSlot> = Vec::new();
active_stack.retain(|slot| {
if slot.current.end < point {
to_free.push(slot.clone());
false
} else {
true
}
});
for slot in to_free {
let alignment = if slot.size >= 16 { 16 } else { 8 };
// The freed slot's history is `past ++ [current]`.
let mut owners = slot.past;
owners.push(slot.current);
// Merge with any existing FreeSlot at the same offset (an
// earlier owner that already freed and is waiting for reuse).
let slots = free_slots.entry(slot.size).or_default();
if let Some(existing) = slots.iter_mut().find(|s| s.offset == slot.offset) {
existing.owner_intervals.extend(owners);
} else {
slots.push(FreeSlot {
offset: slot.offset,
alignment,
owner_intervals: owners,
});
}
}
}

/// Find all positions of call instructions in a function.
Expand Down Expand Up @@ -190,25 +229,6 @@ pub struct LivenessResult<R> {
pub live_out: Vec<HashSet<PseudoId>>,
}

/// Check whether two pseudos interfere (are simultaneously live in any block).
/// Uses block-level live_in/live_out sets from the dataflow fixpoint, which are
/// correct for all CFG shapes including computed gotos.
pub fn pseudos_interfere(
live_in: &[HashSet<PseudoId>],
live_out: &[HashSet<PseudoId>],
a: PseudoId,
b: PseudoId,
) -> bool {
for (li, lo) in live_in.iter().zip(live_out.iter()) {
let a_live = li.contains(&a) || lo.contains(&a);
let b_live = li.contains(&b) || lo.contains(&b);
if a_live && b_live {
return true;
}
}
false
}

/// Compute live intervals and constraint points for a function.
///
/// This is the shared core of live interval computation used by both x86-64 and AArch64.
Expand Down Expand Up @@ -1057,26 +1077,48 @@ pub fn next_use_distance(
}

/// Try to reuse a previously freed stack slot of the given size and alignment.
/// Uses block-level interference check to verify the candidate pseudo doesn't
/// overlap with the slot's previous owner.
/// Shared between x86_64 and aarch64 register allocators.
///
/// Reuse is safe iff `candidate_interval` is disjoint from **every**
/// past owner of the slot. Two intervals `[a.start, a.end]` and
/// `[b.start, b.end]` are disjoint iff `a.end < b.start` or
/// `b.end < a.start`.
///
/// History matters: when slot S is freed by A and reused by B, the
/// slot's "current owner" is B but A's interval still constrains any
/// future candidate. The earlier implementation checked only the
/// most-recent owner — when A.lifetime [11,17] and B.lifetime
/// [81,130] were both past owners, a candidate C with [6,42] saw
/// only B (no conflict) but actually collided with A's [11,17].
/// CPython's `double_round` miscompile (the assertAlmostEqual
/// segfault) reproduced this exact pattern.
///
/// When a slot is reused, the existing `FreeSlot` entry is removed
/// from `free_stack_slots`; the *new* owner's interval is appended to
/// the entry when the slot is freed again by `expire_stack_intervals`.
/// This preserves the full ownership history across the reuse cycle.
/// To make the history preservation work, this function returns
/// (offset, history) so the caller can stash history into
/// `active_stack` and have it re-merged on expire.
pub fn try_reuse_stack_slot(
free_stack_slots: &mut BTreeMap<i32, Vec<FreeSlot>>,
size: i32,
alignment: i32,
candidate: PseudoId,
live_in: &[HashSet<PseudoId>],
live_out: &[HashSet<PseudoId>],
) -> Option<i32> {
candidate_interval: &LiveInterval,
) -> Option<(i32, Vec<LiveInterval>)> {
if let Some(slots) = free_stack_slots.get_mut(&size) {
if let Some(idx) = slots.iter().position(|s| {
s.alignment >= alignment && !pseudos_interfere(live_in, live_out, candidate, s.owner)
if s.alignment < alignment {
return false;
}
s.owner_intervals.iter().all(|owner| {
owner.end < candidate_interval.start || candidate_interval.end < owner.start
})
}) {
let slot = slots.remove(idx);
if slots.is_empty() {
free_stack_slots.remove(&size);
}
return Some(slot.offset);
return Some((slot.offset, slot.owner_intervals));
}
}
None
Expand Down
54 changes: 44 additions & 10 deletions cc/arch/x86_64/regalloc.rs
Original file line number Diff line number Diff line change
Expand Up @@ -459,7 +459,7 @@ pub struct RegAlloc {
/// XMM arguments spilled from XMM registers to stack
spilled_xmm_args: Vec<SpilledXmmArg>,
/// Active stack slot intervals (interval, offset, size) for reuse tracking
active_stack: Vec<(LiveInterval, i32, i32)>,
active_stack: Vec<crate::arch::regalloc::ActiveSlot>,
/// Free stack slots keyed by size, available for reuse
free_stack_slots: BTreeMap<i32, Vec<FreeSlot>>,
/// Sym pseudos whose address is taken (cannot participate in slot reuse)
Expand Down Expand Up @@ -955,15 +955,13 @@ impl RegAlloc {
&mut self,
size: i32,
alignment: i32,
candidate: PseudoId,
) -> Option<i32> {
candidate_interval: &LiveInterval,
) -> Option<(i32, Vec<LiveInterval>)> {
super::super::regalloc::try_reuse_stack_slot(
&mut self.free_stack_slots,
size,
alignment,
candidate,
&self.live_in,
&self.live_out,
candidate_interval,
)
}

Expand All @@ -983,9 +981,14 @@ impl RegAlloc {
self.max_local_align = alignment;
}
if reusable {
if let Some(reused) = self.try_reuse_stack_slot(size, alignment, interval.pseudo) {
if let Some((reused, past)) = self.try_reuse_stack_slot(size, alignment, interval) {
self.locations.insert(interval.pseudo, Loc::Stack(reused));
self.active_stack.push((interval.clone(), reused, size));
self.active_stack.push(crate::arch::regalloc::ActiveSlot {
current: interval.clone(),
past,
offset: reused,
size,
});
return;
}
}
Expand All @@ -996,7 +999,12 @@ impl RegAlloc {
let offset = self.stack_offset;
self.locations.insert(interval.pseudo, Loc::Stack(offset));
if reusable {
self.active_stack.push((interval.clone(), offset, size));
self.active_stack.push(crate::arch::regalloc::ActiveSlot {
current: interval.clone(),
past: Vec::new(),
offset,
size,
});
}
}

Expand Down Expand Up @@ -1084,7 +1092,33 @@ impl RegAlloc {
natural_align.max(8)
};
let aligned_size = (size + alignment - 1) & !(alignment - 1);
let reusable = !self.addr_taken_syms.contains(&interval.pseudo);
// Sym slot reuse disabled. The IR-level
// interval of a Sym pseudo only captures
// its direct Store/Load/SymAddr uses,
// not the lifetime of register pseudos
// that derive their values from the
// slot. Linear scan allocated slot
// offsets monotonically and so happened
// not to reuse Sym slots in conflicting
// ways; chordal coloring exposes the
// gap. CPython `_warnings.o::init_filters`
// and `flowgraph.o::_PyCfgBuilder_Addop`
// both miscompiled on the Sym-slot reuse
// pattern even with the
// [[interval-overlap fix]] (the Sym's
// IR interval ends before the derived
// register pseudo's lifetime does, so
// interval-overlap reports no conflict).
//
// Future fix: extend the Sym's interval
// to cover all derived register pseudos'
// lifetimes. Until then, Sym slots are
// permanent. The `addr_taken_syms`
// computation stays in place — it remains
// the correct gating predicate when slot
// reuse is re-enabled.
let _ = self.addr_taken_syms.contains(&interval.pseudo);
let reusable = false;
self.alloc_stack_slot(interval, aligned_size, alignment, reusable);
if types.is_float(local_var.typ) {
self.fp_pseudos.insert(interval.pseudo);
Expand Down
59 changes: 59 additions & 0 deletions cc/tests/codegen/regalloc.rs
Original file line number Diff line number Diff line change
Expand Up @@ -401,6 +401,65 @@ int main(void) {
);
}

#[test]
fn regalloc_slot_multi_owner_history() {
// Regression test for the multi-owner slot-reuse bug. Pseudo A
// owns slot S during [t0, t1]; A frees S. Pseudo B reuses S
// during [t2, t3] where t2 > t1. Pseudo C with lifetime that
// spans [t0.something, t3.something] then tries to reuse S
// checked-against-B-only (current owner) — sees no overlap and
// is granted S, but C's lifetime actually overlaps A's old
// window so C silently overwrites the slot in A's tail window.
//
// The bug was the bottom of the CPython `double_round`
// miscompile (assertAlmostEqual segfault — interval-overlap
// check only validated against the most-recent owner, not the
// full ownership history).
//
// C-level reproducer is fiddly to engineer reliably without
// depending on allocator internals; the test below relies on
// running a function with enough register pressure that
// chordal coloring triggers spill commits in an order that
// exercises the multi-owner case. The function does floating-
// point arithmetic interleaved with calls so the FP allocator
// path is exercised. CPython's full test suite is the broader
// regression coverage.
let code = r#"
#include <stdio.h>

__attribute__((noinline))
double helper(double a, double b, double c) {
return a * b + c;
}

__attribute__((noinline))
int compute(double x, double y, int n) {
double a = x + 1.0;
double b = y + 1.0;
double c = helper(a, b, x);
double d = helper(b, a, y);
double e = c + d;
if (n > 0) {
e = e + helper(a, c, d);
}
if (e != e) return 99;
return (e > 0.0) ? 1 : 0;
}

int main(void) {
if (compute(1.0, 2.0, 1) != 1) return 1;
if (compute(-1.0, -2.0, 1) != 0) return 2;
if (compute(0.5, 0.25, 0) != 1) return 3;
return 0;
}
"#;
assert_eq!(
compile_and_run("regalloc_slot_multi_owner_history", code, &[]),
0,
"stack slot reuse must check candidate's interval against ALL past owners, not just the current one"
);
}

// ============================================================================
// Sym (named local) stack coloring tests
// ============================================================================
Expand Down
Loading