Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion diskann-benchmark/src/backend/exhaustive/product.rs
Original file line number Diff line number Diff line change
Expand Up @@ -87,7 +87,7 @@ mod imp {
5,
);

let offsets = diskann_providers::model::pq::calculate_chunk_offsets_auto(
let offsets = diskann_quantization::views::calculate_chunk_offsets_auto(
data.ncols(),
input.num_pq_chunks.get(),
);
Expand Down
19 changes: 9 additions & 10 deletions diskann-disk/src/search/pq/quantizer_preprocess.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,13 @@
* Licensed under the MIT license.
*/

//! PQ quantizer query preprocessing.
//!
//! Prior to the introduction of the [`quantizer_preprocess`] method, the disk index was
//! hard-coded to use L2 distance for comparisons. We're keeping that behavior here -
//! treating `Cosine` and `CosineNormalized` as L2 until a more thorough evaluation can
//! be made.
Comment thread
arkrishn94 marked this conversation as resolved.

use diskann::ANNResult;
use diskann_vector::distance::Metric;

Expand Down Expand Up @@ -33,11 +40,7 @@ pub fn quantizer_preprocess(
.bridge_err()?;

match metric {
// Prior to the introduction of the `quantizer_preprocess` method, the
// disk index was hard-coded to use L2 distance for comparisons.
//
// We're keeping that behavior here - treating `Cosine` and `CosineNormalized`
// as L2 until a more thorough evaluation can be made.
// Cosine and CosineNormalized fall back to L2; see module docs.
Metric::L2 | Metric::Cosine | Metric::CosineNormalized => {
table.process_into::<diskann_quantization::distances::SquaredL2>(
&pq_scratch.rotated_query[..dim],
Expand All @@ -54,11 +57,7 @@ pub fn quantizer_preprocess(
}
PQTable::Fixed(table) => {
match metric {
// Prior to the introduction of the `quantizer_preprocess` method, the
// disk index was hard-coded to use L2 distance for comparisons.
//
// We're keeping that behavior here - treating `Cosine` and `CosineNormalized`
// as L2 until a more thorough evaluation can be made.
// Cosine and CosineNormalized fall back to L2; see module docs.
Metric::L2 | Metric::Cosine | Metric::CosineNormalized => {
// The scratch only stores the aligned dimension. However, preprocessing
// wants the actual dimension used, so we have to shrink the rotated query
Expand Down
6 changes: 2 additions & 4 deletions diskann-disk/src/search/provider/disk_provider.rs
Original file line number Diff line number Diff line change
Expand Up @@ -31,17 +31,15 @@ use diskann::{
Accessor, BuildQueryComputer, DataProvider, DefaultContext, DelegateNeighbor, HasId,
NeighborAccessor, NoopGuard,
},
utils::{
object_pool::{ObjectPool, PoolOption, TryAsPooled},
IntoUsize, VectorRepr,
},
utils::{IntoUsize, VectorRepr},
ANNError, ANNResult,
};
use diskann_providers::storage::StorageReadProvider;
use diskann_providers::{
model::{compute_pq_distance, compute_pq_distance_for_pq_coordinates},
storage::{get_compressed_pq_file, get_disk_index_file, get_pq_pivot_file, LoadWith},
};
use diskann_utils::object_pool::{ObjectPool, PoolOption, TryAsPooled};

use crate::search::pq::{quantizer_preprocess, PQData, PQScratch};
use diskann_vector::{distance::Metric, DistanceFunction, PreprocessedDistanceFunction};
Expand Down
7 changes: 2 additions & 5 deletions diskann-disk/src/storage/quant/pq/pq_generation.rs
Original file line number Diff line number Diff line change
Expand Up @@ -9,15 +9,12 @@ use diskann::{utils::VectorRepr, ANNError};
use diskann_providers::storage::{StorageReadProvider, StorageWriteProvider};
use diskann_providers::{
forward_threadpool,
model::{
pq::{accum_row_inplace, generate_pq_pivots},
GeneratePivotArguments,
},
model::{pq::generate_pq_pivots, GeneratePivotArguments},
storage::PQStorage,
utils::{AsThreadPool, BridgeErr, Timer},
};
use diskann_quantization::{product::TransposedTable, CompressInto};
use diskann_utils::views::MatrixBase;
use diskann_utils::views::{accum_row_inplace, MatrixBase};
use diskann_vector::distance::Metric;
use tracing::info;

Expand Down
6 changes: 2 additions & 4 deletions diskann-garnet/src/provider.rs
Original file line number Diff line number Diff line change
Expand Up @@ -20,13 +20,11 @@ use diskann::{
Accessor, BuildDistanceComputer, BuildQueryComputer, DataProvider, DelegateNeighbor,
Delete, ElementStatus, HasId, NeighborAccessor, NeighborAccessorMut, NoopGuard, SetElement,
},
utils::{
VectorRepr,
object_pool::{AsPooled, ObjectPool, PooledRef, Undef},
},
utils::VectorRepr,
};
use diskann_providers::model::graph::provider::async_::common::FullPrecision;
use diskann_utils::Reborrow;
use diskann_utils::object_pool::{AsPooled, ObjectPool, PooledRef, Undef};
use diskann_vector::{PreprocessedDistanceFunction, contains::ContainsSimd, distance::Metric};
use std::{
future, mem,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -9,12 +9,9 @@ use std::sync::Arc;

use bf_tree::{BfTree, Config};
use bytemuck::bytes_of;
use diskann::{
ANNError, ANNErrorKind, ANNResult,
error::IntoANNResult,
utils::{VectorRepr, object_pool::ObjectPool},
};
use diskann::{ANNError, ANNErrorKind, ANNResult, error::IntoANNResult, utils::VectorRepr};
use diskann_quantization::CompressInto;
use diskann_utils::object_pool::ObjectPool;
use diskann_vector::distance::Metric;
use thiserror::Error;

Expand Down Expand Up @@ -356,23 +353,23 @@ mod tests {
let c = provider.query_computer(&[-0.5, -0.5]).unwrap();
let expected: f32 = 1.5 * 1.5 * 2.0;
assert_eq!(
c.evaluate_similarity(&provider.get_vector_sync(3).unwrap()),
c.evaluate_similarity(provider.get_vector_sync(3).unwrap().as_slice()),
expected
);

// Distance Computer.
let d = provider.distance_computer();
assert_eq!(
d.evaluate_similarity(
&provider.get_vector_sync(0).unwrap(),
&provider.get_vector_sync(3).unwrap()
provider.get_vector_sync(0).unwrap().as_slice(),
provider.get_vector_sync(3).unwrap().as_slice()
),
2.0
);

let slice: &[f32] = &[-0.5, -0.5];
assert_eq!(
d.evaluate_similarity(slice, &provider.get_vector_sync(3).unwrap()),
d.evaluate_similarity(slice, provider.get_vector_sync(3).unwrap().as_slice()),
expected,
);
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -14,12 +14,9 @@
use std::sync::{Arc, Mutex};

use crate::storage::{StorageReadProvider, StorageWriteProvider};
use diskann::{
ANNError, ANNResult,
error::IntoANNResult,
utils::{VectorRepr, object_pool::ObjectPool},
};
use diskann::{ANNError, ANNResult, error::IntoANNResult, utils::VectorRepr};
use diskann_quantization::CompressInto;
use diskann_utils::object_pool::ObjectPool;
use diskann_vector::distance::Metric;

use super::common::{AlignedMemoryVectorStore, TestCallCount};
Expand Down Expand Up @@ -447,21 +444,18 @@ mod tests {
// Query Computer.
let c = provider.query_computer(&[-0.5, -0.5]).unwrap();
let expected: f32 = 1.5 * 1.5 * 2.0;
assert_eq!(
c.evaluate_similarity(&provider.get_vector_sync(3)),
expected
);
assert_eq!(c.evaluate_similarity(provider.get_vector_sync(3)), expected);

// Distance Computer.
let d = provider.distance_computer();
assert_eq!(
d.evaluate_similarity(&provider.get_vector_sync(0), &provider.get_vector_sync(3)),
d.evaluate_similarity(provider.get_vector_sync(0), provider.get_vector_sync(3)),
2.0
);

let slice: &[f32] = &[-0.5, -0.5];
assert_eq!(
d.evaluate_similarity(slice, &provider.get_vector_sync(3)),
d.evaluate_similarity(slice, provider.get_vector_sync(3)),
expected,
);
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -14,9 +14,10 @@ use crate::storage::{StorageReadProvider, StorageWriteProvider};
use arc_swap::{ArcSwap, Guard};
#[cfg(test)]
use diskann::utils::VectorRepr;
use diskann::{ANNError, ANNResult, utils::object_pool::ObjectPool};
use diskann::{ANNError, ANNResult};
#[cfg(test)]
use diskann_quantization::CompressInto;
use diskann_utils::object_pool::ObjectPool;
use diskann_vector::{DistanceFunction, PreprocessedDistanceFunction, distance::Metric};

use super::{VectorGuard, common::TestCallCount};
Expand Down
8 changes: 4 additions & 4 deletions diskann-providers/src/model/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -11,10 +11,10 @@ pub use configuration::IndexConfiguration;
pub mod pq;
pub use pq::{
FixedChunkPQTable, GeneratePivotArguments, MAX_PQ_TRAINING_SET_SIZE, NUM_KMEANS_REPS_PQ,
NUM_PQ_CENTROIDS, accum_row_inplace, calculate_chunk_offsets_auto, compute_pq_distance,
compute_pq_distance_for_pq_coordinates, direct_distance_impl, distance,
generate_pq_data_from_pivots_from_membuf, generate_pq_data_from_pivots_from_membuf_batch,
generate_pq_pivots, generate_pq_pivots_from_membuf,
NUM_PQ_CENTROIDS, compute_pq_distance, compute_pq_distance_for_pq_coordinates,
direct_distance_impl, distance, generate_pq_data_from_pivots_from_membuf,
generate_pq_data_from_pivots_from_membuf_batch, generate_pq_pivots,
generate_pq_pivots_from_membuf,
};

pub mod statistics;
Expand Down
2 changes: 1 addition & 1 deletion diskann-providers/src/model/pq/distance/common.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
* Licensed under the MIT license.
*/

use diskann::utils::object_pool::{self, ObjectPool};
use diskann_utils::object_pool::{self, ObjectPool};

use crate::model::pq::fixed_chunk_pq_table::FixedChunkPQTable;

Expand Down
68 changes: 2 additions & 66 deletions diskann-providers/src/model/pq/distance/dynamic.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,8 @@

use std::{ops::Deref, sync::Arc};

use diskann::{ANNResult, utils::object_pool::ObjectPool};
use diskann::ANNResult;
use diskann_utils::object_pool::ObjectPool;
use diskann_vector::{DistanceFunction, PreprocessedDistanceFunction, distance::Metric};

// Concrete implementations
Expand Down Expand Up @@ -100,25 +101,6 @@ where
}
}

impl<T> PreprocessedDistanceFunction<&Vec<u8>, f32> for QueryComputer<T>
where
T: Deref<Target = FixedChunkPQTable>,
{
fn evaluate_similarity(&self, changing: &Vec<u8>) -> f32 {
self.evaluate_similarity(changing.as_slice())
}
}

impl<T> PreprocessedDistanceFunction<&&[u8], f32> for QueryComputer<T>
where
T: Deref<Target = FixedChunkPQTable>,
{
fn evaluate_similarity(&self, changing: &&[u8]) -> f32 {
let changing: &[u8] = changing;
self.evaluate_similarity(changing)
}
}

/// Pre-dispatched distance functions for the `FixedChunkPQTable`.
#[derive(Debug)]
pub struct VTable {
Expand Down Expand Up @@ -232,52 +214,6 @@ where
}
}

/// Perform a comparison between a full-precision vector and quantized vector.
impl<T> DistanceFunction<&[f32], &&[u8], f32> for DistanceComputer<T>
where
T: Deref<Target = FixedChunkPQTable>,
{
#[inline(always)]
fn evaluate_similarity(&self, fp: &[f32], q: &&[u8]) -> f32 {
let q: &[u8] = q;
self.evaluate_similarity(fp, q)
}
}

impl<T> DistanceFunction<&[f32], &Vec<u8>, f32> for DistanceComputer<T>
where
T: Deref<Target = FixedChunkPQTable>,
{
#[inline(always)]
fn evaluate_similarity(&self, fp: &[f32], q: &Vec<u8>) -> f32 {
self.evaluate_similarity(fp, q.as_slice())
}
}

/// Perform a comparison between two quantized vectors.
impl<T> DistanceFunction<&&[u8], &&[u8], f32> for DistanceComputer<T>
where
T: Deref<Target = FixedChunkPQTable>,
{
#[inline(always)]
fn evaluate_similarity(&self, q0: &&[u8], q1: &&[u8]) -> f32 {
let q0: &[u8] = q0;
let q1: &[u8] = q1;
self.evaluate_similarity(q0, q1)
}
}

/// Perform a comparison between two quantized vectors.
impl<T> DistanceFunction<&Vec<u8>, &Vec<u8>, f32> for DistanceComputer<T>
where
T: Deref<Target = FixedChunkPQTable>,
{
#[inline(always)]
fn evaluate_similarity(&self, q0: &Vec<u8>, q1: &Vec<u8>) -> f32 {
self.evaluate_similarity(q0.as_slice(), q1.as_slice())
}
}

#[cfg(test)]
mod tests {
use std::marker::PhantomData;
Expand Down
6 changes: 2 additions & 4 deletions diskann-providers/src/model/pq/distance/innerproduct.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,10 +5,8 @@

use std::{ops::Deref, sync::Arc};

use diskann::{
ANNResult,
utils::object_pool::{self, ObjectPool, PoolOption},
};
use diskann::ANNResult;
use diskann_utils::object_pool::{self, ObjectPool, PoolOption};
use diskann_vector::PreprocessedDistanceFunction;

use super::common::get_lookup_table_size;
Expand Down
6 changes: 2 additions & 4 deletions diskann-providers/src/model/pq/distance/l2.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,10 +5,8 @@

use std::{ops::Deref, sync::Arc};

use diskann::{
ANNResult,
utils::object_pool::{self, ObjectPool, PoolOption},
};
use diskann::ANNResult;
use diskann_utils::object_pool::{self, ObjectPool, PoolOption};
use diskann_vector::PreprocessedDistanceFunction;

use super::common::get_lookup_table_size;
Expand Down
3 changes: 2 additions & 1 deletion diskann-providers/src/model/pq/distance/test_utils.rs
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,8 @@ use diskann_vector::{
use rand::{Rng, distr::Distribution};
use rand_distr::{Normal, Uniform};

use crate::model::{FixedChunkPQTable, pq::calculate_chunk_offsets_auto};
use crate::model::FixedChunkPQTable;
use diskann_quantization::views::calculate_chunk_offsets_auto;

/// We need a way to generate random queries.
///
Expand Down
6 changes: 2 additions & 4 deletions diskann-providers/src/model/pq/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -10,11 +10,9 @@ pub use fixed_chunk_pq_table::{

mod pq_construction;
pub use pq_construction::{
MAX_PQ_TRAINING_SET_SIZE, NUM_KMEANS_REPS_PQ, NUM_PQ_CENTROIDS, accum_row_inplace,
calculate_chunk_offsets, calculate_chunk_offsets_auto, generate_pq_data_from_pivots,
MAX_PQ_TRAINING_SET_SIZE, NUM_KMEANS_REPS_PQ, NUM_PQ_CENTROIDS, generate_pq_data_from_pivots,
generate_pq_data_from_pivots_from_membuf, generate_pq_data_from_pivots_from_membuf_batch,
generate_pq_pivots, generate_pq_pivots_from_membuf, get_chunk_from_training_data,
move_train_data_by_centroid,
generate_pq_pivots, generate_pq_pivots_from_membuf, move_train_data_by_centroid,
};
Comment thread
arkrishn94 marked this conversation as resolved.

/// all metadata of individual sub-component files is written in first 4KB for unified files
Expand Down
Loading
Loading