From 2f52c6957425f8481dbecf5178ad31de0ef73cbf Mon Sep 17 00:00:00 2001 From: Ignacio Van Droogenbroeck Date: Wed, 15 Apr 2026 09:48:28 -0600 Subject: [PATCH] perf: pool and pre-allocate interned-string dict MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Closes #66. Fixes map rehashing and slice growth on repeated interned encode/decode when the Encoder/Decoder is reused. New SetInternedStringsDictCap(n) on both Encoder and Decoder sets an initial capacity hint for the dict, avoiding rehashes as entries are added. Reset paths now reuse dict storage (clear-in-place for maps, truncate for slices) instead of dropping it to the GC every session; PutEncoder/PutDecoder drop oversized dicts (> 4096 entries) so a one-off large session doesn't permanently bloat pool memory. Introduces a dictOwned flag to track whether the dict was internally allocated or supplied by the caller via ResetDict/WithDict. The Encoder/Decoder will never clear, truncate, or append into a caller-owned dict — a regression test covers both the encoder-clear and decoder-alias clobber scenarios. A new allocation-based reuse test asserts 0 encoder allocs / 1 decoder alloc on the Reset+encode/decode hot loop (would fail on the pre-fix code). --- CHANGELOG.md | 1 + decode.go | 64 +++++++++++++++-- encode.go | 62 +++++++++++++++-- intern.go | 15 +++- intern_test.go | 184 +++++++++++++++++++++++++++++++++++++++++++++++++ 5 files changed, 317 insertions(+), 9 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 266e292..6caf060 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,6 +5,7 @@ - **decode:** `readCode` bsr fast path — when decoding from a byte slice, reads directly from the underlying array instead of dispatching through the `io.ByteReader` interface; eliminates ~900M interface calls/sec at Arc's throughput ([#57](https://github.com/Basekick-Labs/msgpack/issues/57)) (StructUnmarshal **-7.5%**, StructUnmarshalPartially **-6.1%**) - **decode:** `PeekCode` bsr fast path — peeks directly at `bsr.data[bsr.pos]` instead of `ReadByte` + `UnreadByte` (two interface calls) ([#59](https://github.com/Basekick-Labs/msgpack/issues/59)) - **encode:** pool `OmitEmpty` filtered field slices via `sync.Pool` — when fields are actually omitted, the allocated `[]*field` slice is now returned to a pool for reuse instead of being GC'd ([#58](https://github.com/Basekick-Labs/msgpack/issues/58)) +- **encode/decode:** pool and pre-allocate interned-string dict — `SetInternedStringsDictCap(n)` pre-sizes the dict to avoid map rehashing and slice growth; pooled encoders/decoders now reuse dict storage across `Reset()` (cleared in place) instead of discarding it, and `Put*()` drops oversized dicts to keep the pool lean ([#66](https://github.com/Basekick-Labs/msgpack/issues/66)) --- diff --git a/decode.go b/decode.go index 7896774..6e40351 100644 --- a/decode.go +++ b/decode.go @@ -53,6 +53,18 @@ func PutDecoder(dec *Decoder) { } else if dec.buf != nil { dec.buf = dec.buf[:0] } + // Drop the interned-string dict if we own it and it grew large, so pool + // entries don't permanently retain memory from a one-off large interning + // session. A caller-owned dict is always dropped so we never hold a + // reference to caller memory across pool round-trips. We check cap(), + // not len(), because PutDecoder can see a truncated slice (len=0) whose + // backing array is still large. + if !dec.dictOwned { + dec.dict = nil + } else if cap(dec.dict) > internDictPoolCap { + dec.dict = nil + dec.dictOwned = false + } decPool.Put(dec) } @@ -82,6 +94,8 @@ type Decoder struct { rec []byte dict []string flags uint32 + internCap int // initial capacity hint for the interned-string dict + dictOwned bool // true when dict was lazily allocated by us, safe to mutate/pool } // NewDecoder returns a new decoder that reads from r. @@ -102,24 +116,34 @@ func (d *Decoder) Reset(r io.Reader) { } // ResetDict is like Reset, but also resets the dict. +// +// A non-nil dict replaces the current dict; the decoder will not append to +// it or otherwise mutate it (caller retains ownership). A nil dict keeps +// any internally-owned dict storage for reuse, truncated to empty — +// subsequent interned decodes skip the slice allocation. func (d *Decoder) ResetDict(r io.Reader, dict []string) { d.ResetReader(r) d.flags = 0 d.structTag = "" - d.dict = dict + if dict != nil { + d.dict = dict + d.dictOwned = false + } } func (d *Decoder) WithDict(dict []string, fn func(*Decoder) error) error { - oldDict := d.dict + oldDict, oldOwned := d.dict, d.dictOwned d.dict = dict + d.dictOwned = false err := fn(d) d.dict = oldDict + d.dictOwned = oldOwned return err } func (d *Decoder) ResetReader(r io.Reader) { d.mapDecoder = nil - d.dict = nil + d.releaseOrTruncateDict() if br, ok := r.(bufReader); ok { d.r = br @@ -144,7 +168,18 @@ func (d *Decoder) ResetBytes(data []byte) { d.mapDecoder = nil d.flags = 0 d.structTag = "" - d.dict = nil + d.releaseOrTruncateDict() +} + +// releaseOrTruncateDict reuses the dict storage if we allocated it +// ourselves; otherwise it drops the reference so a caller-supplied dict is +// never aliased or appended into by a subsequent reset. +func (d *Decoder) releaseOrTruncateDict() { + if d.dictOwned { + d.dict = d.dict[:0] + } else { + d.dict = nil + } } func (d *Decoder) SetMapDecoder(fn func(*Decoder) (interface{}, error)) { @@ -187,6 +222,27 @@ func (d *Decoder) UseInternedStrings(on bool) { } } +// SetInternedStringsDictCap sets an initial capacity hint for the +// interned-string dict, avoiding slice growth as entries are appended. +// n is clamped to [0, maxDictLen]; 0 restores lazy allocation. +// +// The hint is consulted the next time the dict is allocated — typically on +// the first interned decode after construction, Reset, or ResetDict. Call +// it before decoding to guarantee it takes effect. +// +// When the decoder is managed by GetDecoder/PutDecoder, PutDecoder drops +// dicts whose capacity exceeds an internal pool-retention threshold so a +// single oversized session doesn't permanently bloat pool memory. Setting +// n above that threshold forfeits cross-Put reuse of the dict. +func (d *Decoder) SetInternedStringsDictCap(n int) { + if n < 0 { + n = 0 + } else if n > maxDictLen { + n = maxDictLen + } + d.internCap = n +} + // UsePreallocateValues enables preallocating values in chunks func (d *Decoder) UsePreallocateValues(on bool) { if on { diff --git a/encode.go b/encode.go index 2e7b664..6420663 100644 --- a/encode.go +++ b/encode.go @@ -58,6 +58,16 @@ func PutEncoder(enc *Encoder) { if cap(enc.wbuf) > 32*1024 { enc.wbuf = nil } + // Drop the interned-string dict if we own it and it grew large, so pool + // entries don't permanently retain memory from a one-off large interning + // session. A caller-owned dict is always dropped so we never hold a + // reference to caller memory across pool round-trips. + if !enc.dictOwned { + enc.dict = nil + } else if len(enc.dict) > internDictPoolCap { + enc.dict = nil + enc.dictOwned = false + } encPool.Put(enc) } @@ -94,6 +104,8 @@ type Encoder struct { buf []byte timeBuf []byte flags uint32 + internCap int // initial capacity hint for the interned-string dict + dictOwned bool // true when dict was lazily allocated by us, safe to mutate/pool } // NewEncoder returns a new encoder that writes to w. @@ -116,23 +128,33 @@ func (e *Encoder) Reset(w io.Writer) { } // ResetDict is like Reset, but also resets the dict. +// +// A non-nil dict replaces the current dict; the encoder will not mutate it +// (caller retains ownership). A nil dict keeps any internally-owned dict +// storage for reuse, cleared to empty — subsequent interned encodes skip +// the map allocation. func (e *Encoder) ResetDict(w io.Writer, dict map[string]int) { e.ResetWriter(w) e.flags = 0 e.structTag = "" - e.dict = dict + if dict != nil { + e.dict = dict + e.dictOwned = false + } } func (e *Encoder) WithDict(dict map[string]int, fn func(*Encoder) error) error { - oldDict := e.dict + oldDict, oldOwned := e.dict, e.dictOwned e.dict = dict + e.dictOwned = false err := fn(e) e.dict = oldDict + e.dictOwned = oldOwned return err } func (e *Encoder) ResetWriter(w io.Writer) { - e.dict = nil + e.releaseOrClearDict() if bw, ok := w.(writer); ok { e.w = bw } else if w == nil { @@ -150,7 +172,18 @@ func (e *Encoder) resetForMarshal() { e.w = &e.bsw e.flags = 0 e.structTag = "" - e.dict = nil + e.releaseOrClearDict() +} + +// releaseOrClearDict reuses the dict storage if we allocated it ourselves; +// otherwise it drops the reference so a caller-supplied dict is never +// mutated by a subsequent reset. +func (e *Encoder) releaseOrClearDict() { + if e.dictOwned { + clear(e.dict) + } else { + e.dict = nil + } } // SetSortMapKeys causes the Encoder to encode map keys in increasing order. @@ -220,6 +253,27 @@ func (e *Encoder) UseInternedStrings(on bool) { } } +// SetInternedStringsDictCap sets an initial capacity hint for the +// interned-string dict, avoiding map rehashing as entries are added. +// n is clamped to [0, maxDictLen]; 0 restores lazy allocation. +// +// The hint is consulted the next time the dict is allocated — typically on +// the first interned encode after construction, Reset, or ResetDict. Call +// it before encoding to guarantee it takes effect. +// +// When the encoder is managed by GetEncoder/PutEncoder, PutEncoder drops +// dicts whose length exceeds an internal pool-retention threshold so a +// single oversized session doesn't permanently bloat pool memory. Setting +// n above that threshold forfeits cross-Put reuse of the dict. +func (e *Encoder) SetInternedStringsDictCap(n int) { + if n < 0 { + n = 0 + } else if n > maxDictLen { + n = maxDictLen + } + e.internCap = n +} + func (e *Encoder) Encode(v interface{}) error { switch v := v.(type) { case nil: diff --git a/intern.go b/intern.go index 65405fe..63a8472 100644 --- a/intern.go +++ b/intern.go @@ -11,6 +11,12 @@ import ( const ( minInternedStringLen = 3 maxDictLen = math.MaxUint16 + + // internDictPoolCap is the threshold above which a pooled Encoder/Decoder + // drops its interned-string dict in Put*() instead of retaining it for + // reuse. Mirrors the wbuf/buf cap-drop pattern: keeps hot, small dicts + // warm without letting a one-off large session bloat pool memory. + internDictPoolCap = 4096 ) var internedStringExtID = int8(math.MinInt8) @@ -63,7 +69,8 @@ func (e *Encoder) encodeInternedString(s string, intern bool) error { if intern && len(s) >= minInternedStringLen && len(e.dict) < maxDictLen { if e.dict == nil { - e.dict = make(map[string]int) + e.dict = make(map[string]int, e.internCap) + e.dictOwned = true } idx := len(e.dict) e.dict[s] = idx @@ -227,6 +234,12 @@ func (d *Decoder) decodeInternedStringWithLen(n int, intern bool) (string, error } if intern && len(s) >= minInternedStringLen && len(d.dict) < maxDictLen { + if d.dict == nil { + if d.internCap > 0 { + d.dict = make([]string, 0, d.internCap) + } + d.dictOwned = true + } d.dict = append(d.dict, s) } diff --git a/intern_test.go b/intern_test.go index a2c0ff6..87bc297 100644 --- a/intern_test.go +++ b/intern_test.go @@ -144,3 +144,187 @@ func dictMap(dict []string) map[string]int { } return m } + +func TestInternedStringDictCap(t *testing.T) { + var buf bytes.Buffer + + enc := msgpack.NewEncoder(&buf) + enc.UseInternedStrings(true) + enc.SetInternedStringsDictCap(1024) + + dec := msgpack.NewDecoder(&buf) + dec.UseInternedStrings(true) + dec.SetInternedStringsDictCap(1024) + + const n = 2000 // deliberately more than the cap hint; it is a hint, not a limit + in := make([]string, n) + for i := 0; i < n; i++ { + in[i] = "key_" + string(rune('a'+i%26)) + "_" + string(rune('0'+i%10)) + "_" + + string(rune('A'+(i/10)%26)) + "_" + string(rune('0'+(i/100)%10)) + } + + for _, s := range in { + require.Nil(t, enc.EncodeString(s)) + } + for i := 0; i < n; i++ { + s, err := dec.DecodeString() + require.Nil(t, err) + require.Equal(t, in[i], s) + } +} + +func TestInternedStringDictReuseAcrossReset(t *testing.T) { + var buf1, buf2 bytes.Buffer + + enc := msgpack.NewEncoder(&buf1) + enc.UseInternedStrings(true) + + dec := msgpack.NewDecoder(&buf1) + dec.UseInternedStrings(true) + + for i := 0; i < 5; i++ { + require.Nil(t, enc.EncodeString("first-session")) + } + for i := 0; i < 5; i++ { + s, err := dec.DecodeString() + require.Nil(t, err) + require.Equal(t, "first-session", s) + } + + // Reset (not ResetDict) — dict storage should be cleared in place, + // and a new session must not see prior-session entries. + enc.Reset(&buf2) + enc.UseInternedStrings(true) + dec.Reset(&buf2) + dec.UseInternedStrings(true) + + for i := 0; i < 5; i++ { + require.Nil(t, enc.EncodeString("second-session")) + } + for i := 0; i < 5; i++ { + s, err := dec.DecodeString() + require.Nil(t, err) + require.Equal(t, "second-session", s) + } +} + +// TestResetDoesNotMutateCallerDict guards against silently clearing or +// aliasing a caller-supplied dict via Reset after ResetDict. Regression +// for the ownership bug fixed alongside #66. +func TestResetDoesNotMutateCallerDict(t *testing.T) { + externalEnc := map[string]int{"hello world": 0, "foo bar": 1} + externalDec := []string{"hello world", "foo bar"} + + var buf bytes.Buffer + + enc := msgpack.NewEncoder(&buf) + enc.ResetDict(&buf, externalEnc) + enc.Reset(&buf) // must NOT clear externalEnc + require.Equal(t, 2, len(externalEnc)) + require.Equal(t, 0, externalEnc["hello world"]) + require.Equal(t, 1, externalEnc["foo bar"]) + + dec := msgpack.NewDecoder(&buf) + dec.ResetDict(&buf, externalDec) + dec.ResetBytes(nil) // must NOT alias externalDec's backing array + + // Now encode an interned string with a fresh encoder and decode it + // through `dec`. If ResetBytes failed to drop the external alias, the + // decoded string would be appended into externalDec. + var payload bytes.Buffer + encFresh := msgpack.NewEncoder(&payload) + encFresh.UseInternedStrings(true) + require.Nil(t, encFresh.EncodeString("would-clobber")) + + dec.ResetBytes(payload.Bytes()) + dec.UseInternedStrings(true) + s, err := dec.DecodeString() + require.Nil(t, err) + require.Equal(t, "would-clobber", s) + + require.Equal(t, []string{"hello world", "foo bar"}, externalDec) +} + +// TestInternedStringDictStorageIsReused verifies that the underlying map +// bucket array and slice backing array are kept across Reset — i.e., the +// clear-in-place / truncate-in-place path actually reuses storage and +// doesn't reallocate. +func TestInternedStringDictStorageIsReused(t *testing.T) { + var buf bytes.Buffer + enc := msgpack.NewEncoder(&buf) + enc.UseInternedStrings(true) + dec := msgpack.NewDecoder(&buf) + dec.UseInternedStrings(true) + + // Warm both dicts so storage exists. + require.Nil(t, enc.EncodeString("warmup-string")) + _, err := dec.DecodeString() + require.Nil(t, err) + + // Pre-size the buffer so bytes.Buffer growth doesn't pollute the count. + buf.Grow(64) + + encAllocs := testing.AllocsPerRun(50, func() { + buf.Reset() + enc.Reset(&buf) + enc.UseInternedStrings(true) + _ = enc.EncodeString("warmup-string") + }) + // With dict storage reuse, Reset + interned encode of an already-known + // string should allocate nothing. Without reuse, the map is freshly + // allocated each call. + require.Equalf(t, float64(0), encAllocs, + "encoder intern dict storage not reused: %v allocs/op", encAllocs) + + // For the decoder, drive it from a pre-built byte slice so we can + // isolate dict allocations from reader setup. Encode the string twice: + // the first is the raw interned-tagged string (which the decoder must + // allocate a Go string for), the second is a 3-byte ext index reference + // that decodes by fetching from the dict with zero allocations. + var payload bytes.Buffer + encFresh := msgpack.NewEncoder(&payload) + encFresh.UseInternedStrings(true) + require.Nil(t, encFresh.EncodeString("warmup-string")) + require.Nil(t, encFresh.EncodeString("warmup-string")) + data := payload.Bytes() + + decAllocs := testing.AllocsPerRun(50, func() { + dec.ResetBytes(data) + dec.UseInternedStrings(true) + _, _ = dec.DecodeString() // populates dict[0]="warmup-string" + _, _ = dec.DecodeString() // fetches dict[0] by index — no alloc + }) + // The first DecodeString allocates one Go string for the raw payload + // and appends it to the dict (no slice growth thanks to reuse); the + // second DecodeString reads an ext index and returns the interned + // string without allocating. Total: 1 alloc/op for the first string. + require.Equalf(t, float64(1), decAllocs, + "decoder intern dict storage not reused: %v allocs/op", decAllocs) +} + +func TestInternedStringDictPoolRecycle(t *testing.T) { + // First pooled session: intern some strings, then return to the pool. + enc1 := msgpack.GetEncoder() + var buf1 bytes.Buffer + enc1.Reset(&buf1) + enc1.UseInternedStrings(true) + for i := 0; i < 3; i++ { + require.Nil(t, enc1.EncodeString("leaked-if-buggy")) + } + msgpack.PutEncoder(enc1) + + // Second pooled session: may reuse enc1. Its dict must not leak the + // prior session's entry — either it was cleared or replaced. + enc2 := msgpack.GetEncoder() + var buf2 bytes.Buffer + enc2.Reset(&buf2) + enc2.UseInternedStrings(true) + require.Nil(t, enc2.EncodeString("fresh-session")) + msgpack.PutEncoder(enc2) + + dec := msgpack.NewDecoder(&buf2) + dec.UseInternedStrings(true) + s, err := dec.DecodeString() + require.Nil(t, err) + require.Equal(t, "fresh-session", s) +}