From 5a48be954459945e2e3ef8383066a719565f34d7 Mon Sep 17 00:00:00 2001 From: Thejas-bhat Date: Thu, 21 May 2026 13:31:39 -0700 Subject: [PATCH 1/3] pass training params to zap to control the index props --- index/scorch/train_vector.go | 34 ++++++++++++++++++++++++++++------ 1 file changed, 28 insertions(+), 6 deletions(-) diff --git a/index/scorch/train_vector.go b/index/scorch/train_vector.go index 6a58fd06c..cdc409223 100644 --- a/index/scorch/train_vector.go +++ b/index/scorch/train_vector.go @@ -37,10 +37,11 @@ import ( ) type trainRequest struct { - finalSample bool - sampleSize int - ackCh chan error - sample segment.Segment + finalSample bool + sampleSize int + ackCh chan error + sample segment.Segment + trainingParams *index.TrainingParams } type vectorTrainer struct { @@ -154,6 +155,10 @@ func (t *vectorTrainer) trainLoop() { return case trainReq := <-t.trainCh: sampleSeg := trainReq.sample + if trainReq.trainingParams != nil { + t.config[index.TrainingKey] = trainReq.trainingParams + } + // no sample segment: just persist state if this is the final sample and move on. if sampleSeg == nil { if trainReq.finalSample { @@ -207,7 +212,7 @@ func (t *vectorTrainer) trainLoop() { return } - trainedIndex, err := t.parent.segPlugin.OpenUsing(path, t.parent.segmentConfig) + trainedIndex, err := t.parent.segPlugin.OpenUsing(path, t.config) if err != nil { trainReq.ackCh <- fmt.Errorf("error opening trained index: %v", err) close(trainReq.ackCh) @@ -303,6 +308,23 @@ func (t *vectorTrainer) train(batch *index.Batch) error { sampleSize: len(trainData), ackCh: make(chan error), } + // setting the training params using the internal value before the actual + // training has started + config := t.config + if atomic.LoadUint64(&t.trainedSamples) == 0 { + trainingParamsBytes := batch.InternalOps[index.TrainingKey] + var trainingParams index.TrainingParams + if trainingParamsBytes != nil { + err = util.UnmarshalJSON(trainingParamsBytes, &trainingParams) + if err != nil { + return fmt.Errorf("error parsing training params: %v", err) + } + trainReq.trainingParams = &trainingParams + config = maps.Clone(t.config) + config[index.TrainingKey] = &trainingParams + } + } + // just builds a new vector index out of the train data provided // this is not necessarily the final train data since this is submitted // as a request to the trainer component to be merged. once the training @@ -312,7 +334,7 @@ func (t *vectorTrainer) train(batch *index.Batch) error { // note: this might index text data too, how to handle this? s.segmentConfig? // todo: updates/deletes -> data drift detection if len(trainData) > 0 { - trainReq.sample, _, err = t.parent.segPlugin.NewUsing(trainData, t.parent.segmentConfig) + trainReq.sample, _, err = t.parent.segPlugin.NewUsing(trainData, config) if err != nil { return err } From cc0c62bd637806ab83fa1e485fc3a9bdb2e8e90a Mon Sep 17 00:00:00 2001 From: Thejas-bhat Date: Fri, 22 May 2026 12:34:37 -0700 Subject: [PATCH 2/3] remove unnecessary flag set --- go.mod | 6 +++--- go.sum | 12 ++++++------ index/scorch/train_vector.go | 2 -- 3 files changed, 9 insertions(+), 11 deletions(-) diff --git a/go.mod b/go.mod index 58584ab80..d210800ac 100644 --- a/go.mod +++ b/go.mod @@ -5,9 +5,9 @@ go 1.25.0 require ( github.com/RoaringBitmap/roaring/v2 v2.14.5 github.com/bits-and-blooms/bitset v1.24.2 - github.com/blevesearch/bleve_index_api v1.3.11 + github.com/blevesearch/bleve_index_api v1.3.12 github.com/blevesearch/geo v0.2.5 - github.com/blevesearch/go-faiss v1.1.3-0.20260520191817-63736f1417c1 + github.com/blevesearch/go-faiss v1.1.3-0.20260525132456-c1cb753e04cd github.com/blevesearch/go-metrics v0.0.0-20201227073835-cf1acfcdf475 github.com/blevesearch/go-porterstemmer v1.0.3 github.com/blevesearch/goleveldb v1.0.1 @@ -25,7 +25,7 @@ require ( github.com/blevesearch/zapx/v14 v14.4.3 github.com/blevesearch/zapx/v15 v15.4.3 github.com/blevesearch/zapx/v16 v16.3.4 - github.com/blevesearch/zapx/v17 v17.1.4-0.20260521123239-ff145410dea9 + github.com/blevesearch/zapx/v17 v17.1.4 github.com/couchbase/moss v0.2.0 github.com/spf13/cobra v1.10.2 go.etcd.io/bbolt v1.4.0 diff --git a/go.sum b/go.sum index ec1ccbfaa..c5bb77c96 100644 --- a/go.sum +++ b/go.sum @@ -2,12 +2,12 @@ github.com/RoaringBitmap/roaring/v2 v2.14.5 h1:ckd0o545JqDPeVJDgeFoaM21eBixUnlWf github.com/RoaringBitmap/roaring/v2 v2.14.5/go.mod h1:eq4wdNXxtJIS/oikeCzdX1rBzek7ANzbth041hrU8Q4= github.com/bits-and-blooms/bitset v1.24.2 h1:M7/NzVbsytmtfHbumG+K2bremQPMJuqv1JD3vOaFxp0= github.com/bits-and-blooms/bitset v1.24.2/go.mod h1:7hO7Gc7Pp1vODcmWvKMRA9BNmbv6a/7QIWpPxHddWR8= -github.com/blevesearch/bleve_index_api v1.3.11 h1:x29vbV8OjWfLcrDVd7Lr1q+BkLNS0JWNEig0MCVnKH4= -github.com/blevesearch/bleve_index_api v1.3.11/go.mod h1:xvd48t5XMeeioWQ5/jZvgLrV98flT2rdvEJ3l/ki4Ko= +github.com/blevesearch/bleve_index_api v1.3.12 h1:MirVNltwGq8z0PhOgiQp+bKL5qq8OvCxEwOOC7NnHNE= +github.com/blevesearch/bleve_index_api v1.3.12/go.mod h1:xvd48t5XMeeioWQ5/jZvgLrV98flT2rdvEJ3l/ki4Ko= github.com/blevesearch/geo v0.2.5 h1:yJg9FX1oRwLnjXSXF+ECHfXFTF4diF02Ca/qUGVjJhE= github.com/blevesearch/geo v0.2.5/go.mod h1:Jhq7WE2K6mJTx1xS44M2pUO6Io+wjCSHh1+co3YOgH4= -github.com/blevesearch/go-faiss v1.1.3-0.20260520191817-63736f1417c1 h1:hQ8yzG249Ui6fVSvZGrEBpuyOgpt7YsGAvKznqoxlAo= -github.com/blevesearch/go-faiss v1.1.3-0.20260520191817-63736f1417c1/go.mod h1:OMGQwOaRRYxrmeNdMrXJPvVx8gBnvE5RYrr0BahNnkk= +github.com/blevesearch/go-faiss v1.1.3-0.20260525132456-c1cb753e04cd h1:ftEpy+Ma4N/O4zgIRn4pKZOhmi8UnvApcYT6hTf0IYQ= +github.com/blevesearch/go-faiss v1.1.3-0.20260525132456-c1cb753e04cd/go.mod h1:w3W9AiWsFRGVaMG+/cmJi7iHEAuGyC6blsgO1EzCK/M= github.com/blevesearch/go-metrics v0.0.0-20201227073835-cf1acfcdf475 h1:kDy+zgJFJJoJYBvdfBSiZYBbdsUL0XcjHYWezpQBGPA= github.com/blevesearch/go-metrics v0.0.0-20201227073835-cf1acfcdf475/go.mod h1:9eJDeqxJ3E7WnLebQUlPD7ZjSce7AnDb9vjGmMCbD0A= github.com/blevesearch/go-porterstemmer v1.0.3 h1:GtmsqID0aZdCSNiY8SkuPJ12pD4jI+DdXTAn4YRcHCo= @@ -45,8 +45,8 @@ github.com/blevesearch/zapx/v15 v15.4.3 h1:iJiMJOHrz216jyO6lS0m9RTCEkprUnzvqAI2l github.com/blevesearch/zapx/v15 v15.4.3/go.mod h1:1pssev/59FsuWcgSnTa0OeEpOzmhtmr/0/11H0Z8+Nw= github.com/blevesearch/zapx/v16 v16.3.4 h1:hDAqA8qusZTNbPEL7//w5P65UZ2de6yhSeUaTbp0Po0= github.com/blevesearch/zapx/v16 v16.3.4/go.mod h1:zqkPPqs9GS9FzVWzCO3Wf1X044yWAV17+4zb+FTiEHg= -github.com/blevesearch/zapx/v17 v17.1.4-0.20260521123239-ff145410dea9 h1:Yp9DiRN3dmTc+l426S6JDj9eh6b9Og55jXFo8i36PQk= -github.com/blevesearch/zapx/v17 v17.1.4-0.20260521123239-ff145410dea9/go.mod h1:yk8zBTzIwJN38DFJuLOTgjc76fDzEXh8txQOF/BVgtY= +github.com/blevesearch/zapx/v17 v17.1.4 h1:mAiuZOJOHWkrn9RFKntELrL57zARgQcamSWFx/Mbn6A= +github.com/blevesearch/zapx/v17 v17.1.4/go.mod h1:p/JKwke1X6SewJ1b5psiakNXyfXEXJHnIUMUTp89uAs= github.com/couchbase/ghistogram v0.1.0 h1:b95QcQTCzjTUocDXp/uMgSNQi8oj1tGwnJ4bODWZnps= github.com/couchbase/ghistogram v0.1.0/go.mod h1:s1Jhy76zqfEecpNWJfWUiKZookAFaiGOEoyzgHt9i7k= github.com/couchbase/moss v0.2.0 h1:VCYrMzFwEryyhRSeI+/b3tRBSeTpi/8gn5Kf6dxqn+o= diff --git a/index/scorch/train_vector.go b/index/scorch/train_vector.go index cdc409223..60db17883 100644 --- a/index/scorch/train_vector.go +++ b/index/scorch/train_vector.go @@ -185,10 +185,8 @@ func (t *vectorTrainer) trainLoop() { // merge the new segment with the existing one into a .tmp file, then // atomically rename it into place (Os.Open on the live path is unsafe // during the merge). - t.config[index.TrainingKey] = true _, _, err := t.parent.segPlugin.MergeUsing([]segment.Segment{t.trainedIndex.segment, sampleSeg}, []*roaring.Bitmap{nil, nil}, path+".tmp", t.parent.closeCh, nil, t.config) - t.config[index.TrainingKey] = false if err != nil { trainReq.ackCh <- fmt.Errorf("error merging trained index: %v", err) close(trainReq.ackCh) From 079f94bc78811014ed9a354820916e0640a206cc Mon Sep 17 00:00:00 2001 From: Thejas-bhat Date: Fri, 29 May 2026 09:09:27 -0700 Subject: [PATCH 3/3] go mod tidy --- go.mod | 4 ++-- go.sum | 8 ++++---- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/go.mod b/go.mod index d210800ac..7af6683d5 100644 --- a/go.mod +++ b/go.mod @@ -7,7 +7,7 @@ require ( github.com/bits-and-blooms/bitset v1.24.2 github.com/blevesearch/bleve_index_api v1.3.12 github.com/blevesearch/geo v0.2.5 - github.com/blevesearch/go-faiss v1.1.3-0.20260525132456-c1cb753e04cd + github.com/blevesearch/go-faiss v1.1.3 github.com/blevesearch/go-metrics v0.0.0-20201227073835-cf1acfcdf475 github.com/blevesearch/go-porterstemmer v1.0.3 github.com/blevesearch/goleveldb v1.0.1 @@ -25,7 +25,7 @@ require ( github.com/blevesearch/zapx/v14 v14.4.3 github.com/blevesearch/zapx/v15 v15.4.3 github.com/blevesearch/zapx/v16 v16.3.4 - github.com/blevesearch/zapx/v17 v17.1.4 + github.com/blevesearch/zapx/v17 v17.1.5 github.com/couchbase/moss v0.2.0 github.com/spf13/cobra v1.10.2 go.etcd.io/bbolt v1.4.0 diff --git a/go.sum b/go.sum index c5bb77c96..bf01c4438 100644 --- a/go.sum +++ b/go.sum @@ -6,8 +6,8 @@ github.com/blevesearch/bleve_index_api v1.3.12 h1:MirVNltwGq8z0PhOgiQp+bKL5qq8Ov github.com/blevesearch/bleve_index_api v1.3.12/go.mod h1:xvd48t5XMeeioWQ5/jZvgLrV98flT2rdvEJ3l/ki4Ko= github.com/blevesearch/geo v0.2.5 h1:yJg9FX1oRwLnjXSXF+ECHfXFTF4diF02Ca/qUGVjJhE= github.com/blevesearch/geo v0.2.5/go.mod h1:Jhq7WE2K6mJTx1xS44M2pUO6Io+wjCSHh1+co3YOgH4= -github.com/blevesearch/go-faiss v1.1.3-0.20260525132456-c1cb753e04cd h1:ftEpy+Ma4N/O4zgIRn4pKZOhmi8UnvApcYT6hTf0IYQ= -github.com/blevesearch/go-faiss v1.1.3-0.20260525132456-c1cb753e04cd/go.mod h1:w3W9AiWsFRGVaMG+/cmJi7iHEAuGyC6blsgO1EzCK/M= +github.com/blevesearch/go-faiss v1.1.3 h1:C1ACLuVGq9AHLBgA594vwR7F6iAl8vKx/841OLv+09U= +github.com/blevesearch/go-faiss v1.1.3/go.mod h1:w3W9AiWsFRGVaMG+/cmJi7iHEAuGyC6blsgO1EzCK/M= github.com/blevesearch/go-metrics v0.0.0-20201227073835-cf1acfcdf475 h1:kDy+zgJFJJoJYBvdfBSiZYBbdsUL0XcjHYWezpQBGPA= github.com/blevesearch/go-metrics v0.0.0-20201227073835-cf1acfcdf475/go.mod h1:9eJDeqxJ3E7WnLebQUlPD7ZjSce7AnDb9vjGmMCbD0A= github.com/blevesearch/go-porterstemmer v1.0.3 h1:GtmsqID0aZdCSNiY8SkuPJ12pD4jI+DdXTAn4YRcHCo= @@ -45,8 +45,8 @@ github.com/blevesearch/zapx/v15 v15.4.3 h1:iJiMJOHrz216jyO6lS0m9RTCEkprUnzvqAI2l github.com/blevesearch/zapx/v15 v15.4.3/go.mod h1:1pssev/59FsuWcgSnTa0OeEpOzmhtmr/0/11H0Z8+Nw= github.com/blevesearch/zapx/v16 v16.3.4 h1:hDAqA8qusZTNbPEL7//w5P65UZ2de6yhSeUaTbp0Po0= github.com/blevesearch/zapx/v16 v16.3.4/go.mod h1:zqkPPqs9GS9FzVWzCO3Wf1X044yWAV17+4zb+FTiEHg= -github.com/blevesearch/zapx/v17 v17.1.4 h1:mAiuZOJOHWkrn9RFKntELrL57zARgQcamSWFx/Mbn6A= -github.com/blevesearch/zapx/v17 v17.1.4/go.mod h1:p/JKwke1X6SewJ1b5psiakNXyfXEXJHnIUMUTp89uAs= +github.com/blevesearch/zapx/v17 v17.1.5 h1:++CuAe5fQr07CSXYxnDTFH/CClCLXTRSR71iSBRn4Z4= +github.com/blevesearch/zapx/v17 v17.1.5/go.mod h1:c+mPvbZgZnDPOUS5Z9EXhntMcJnpIVjQTM9TF5yEGJM= github.com/couchbase/ghistogram v0.1.0 h1:b95QcQTCzjTUocDXp/uMgSNQi8oj1tGwnJ4bODWZnps= github.com/couchbase/ghistogram v0.1.0/go.mod h1:s1Jhy76zqfEecpNWJfWUiKZookAFaiGOEoyzgHt9i7k= github.com/couchbase/moss v0.2.0 h1:VCYrMzFwEryyhRSeI+/b3tRBSeTpi/8gn5Kf6dxqn+o=