Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
95 changes: 29 additions & 66 deletions rust/lance-index/src/scalar/inverted/impact.rs
Original file line number Diff line number Diff line change
Expand Up @@ -56,30 +56,14 @@ impl PartialEq for ImpactSkipData {
}
}

/// Test-only score/scan-count pair returned by [`ImpactSkipData::max_score_up_to`].
#[cfg(test)]
#[derive(Debug, Clone, Copy)]
pub struct ImpactScore {
pub score: f32,
pub entries_scanned: usize,
}

#[derive(Debug, Default, Clone)]
pub struct ImpactScoreCache {}

impl ImpactScoreCache {
fn entry_score<S: Scorer + ?Sized>(
&mut self,
impacts: &ImpactSkipData,
entry_idx: usize,
query_weight: f32,
scorer: &S,
) -> f32 {
if query_weight <= 0.0 {
return 0.0;
}
query_weight * impacts.doc_weight_bounds(scorer)[entry_idx]
}
}

#[derive(Debug, Clone, Copy)]
struct ImpactEntryHeader {
doc_up_to: u32,
Expand Down Expand Up @@ -160,6 +144,12 @@ impl ImpactSkipData {
&self.baked_bounds(scorer).0
}

/// Baked per-block max doc weights (level0 entries only), for bulk skip
/// scans over dead ranges without per-block window bookkeeping.
pub(crate) fn level0_doc_weight_bounds<S: Scorer + ?Sized>(&self, scorer: &S) -> &[f32] {
&self.baked_bounds(scorer).0[..self.level0_len]
}

/// List-wide max doc weight, from the baked bounds. The tightest valid
/// global score bound for this list is `query_weight * this`, matching
/// what the non-impact format stores as `max_score` at build time — but
Expand Down Expand Up @@ -192,11 +182,20 @@ impl ImpactSkipData {
}
}

/// Last doc id covered by the level0 entry of `block_idx`, or `None` when
/// the entry is missing or malformed.
pub(crate) fn level0_doc_up_to(&self, block_idx: usize) -> Option<u32> {
if block_idx >= self.level0_len {
return None;
}
match self.entry_doc_up_tos[block_idx] {
u32::MAX => None,
doc_up_to => Some(doc_up_to),
}
}

/// Max score of the docs covered by the level0 entry of `block_idx`,
/// answered from the baked bounds slab.
// Only tests exercise the uncached form until the maxscore rework
// (stacked follow-up) anchors its block-max caches on it.
#[cfg_attr(not(test), allow(dead_code))]
pub fn level0_score<S: Scorer + ?Sized>(
&self,
block_idx: usize,
Expand All @@ -209,17 +208,18 @@ impl ImpactSkipData {
query_weight * self.doc_weight_bounds(scorer)[block_idx]
}

pub fn level0_score_cached<S: Scorer + ?Sized>(
/// Max score of the docs covered by the level1 entry of `group_idx`,
/// answered from the baked bounds slab.
pub fn level1_score<S: Scorer + ?Sized>(
&self,
block_idx: usize,
group_idx: usize,
query_weight: f32,
scorer: &S,
cache: &mut ImpactScoreCache,
) -> f32 {
if block_idx >= self.level0_len {
if group_idx >= level1_len(self.level0_len) || query_weight <= 0.0 {
return 0.0;
}
cache.entry_score(self, block_idx, query_weight, scorer)
query_weight * self.doc_weight_bounds(scorer)[self.level0_len + group_idx]
}

#[cfg(test)]
Expand All @@ -240,22 +240,7 @@ impl ImpactSkipData {
})
}

pub fn max_score_up_to_cached<S>(
&self,
start_block_idx: usize,
up_to: u64,
query_weight: f32,
scorer: &S,
cache: &mut ImpactScoreCache,
) -> ImpactScore
where
S: Scorer + ?Sized,
{
self.max_score_up_to_with(start_block_idx, up_to, |impacts, entry_idx| {
cache.entry_score(impacts, entry_idx, query_weight, scorer)
})
}

#[cfg(test)]
fn max_score_up_to_with<E>(
&self,
start_block_idx: usize,
Expand Down Expand Up @@ -638,25 +623,6 @@ mod tests {
assert_eq!(impacts.level1_doc_up_to(0), None);
}

#[test]
fn impact_score_cache_matches_uncached_scores() {
let blocks = (0..40)
.map(|block| vec![(block as u32, 1 + block as u32 % 3, 10)])
.collect::<Vec<_>>();
let impacts = build_impact_skip_data(&blocks).unwrap();
let scorer = MemBM25Scorer::new(400, 40, HashMap::from([(String::from("token"), 40usize)]));
let mut cache = ImpactScoreCache::default();

let uncached_level0 = impacts.level0_score(3, 1.0, &scorer);
let cached_level0 = impacts.level0_score_cached(3, 1.0, &scorer, &mut cache);
assert_eq!(cached_level0, uncached_level0);

let uncached = impacts.max_score_up_to(0, 31, |idx| idx as u32, 1.0, &scorer);
let cached = impacts.max_score_up_to_cached(0, 31, 1.0, &scorer, &mut cache);
assert_eq!(cached.score, uncached.score);
assert_eq!(cached.entries_scanned, uncached.entries_scanned);
}

#[test]
fn impact_entries_are_decoded_lazily() {
let level0_0 = encode_impact_entry(&[(0, 1, 10)], ImpactFormat::FixedU32).unwrap();
Expand All @@ -675,11 +641,8 @@ mod tests {
assert!(score.score.is_finite());
assert_eq!(score.entries_scanned, 1);

let mut cache = ImpactScoreCache::default();
assert_eq!(
impacts.level0_score_cached(1, 1.0, &scorer, &mut cache),
f32::INFINITY
);
// The malformed level0 entry degrades to an unskippable bound.
assert_eq!(impacts.level0_score(1, 1.0, &scorer), f32::INFINITY);
}

#[test]
Expand Down
Loading
Loading