diff --git a/CHANGELOG.md b/CHANGELOG.md index 266e292..6caf060 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,6 +5,7 @@ - **decode:** `readCode` bsr fast path — when decoding from a byte slice, reads directly from the underlying array instead of dispatching through the `io.ByteReader` interface; eliminates ~900M interface calls/sec at Arc's throughput ([#57](https://github.com/Basekick-Labs/msgpack/issues/57)) (StructUnmarshal **-7.5%**, StructUnmarshalPartially **-6.1%**) - **decode:** `PeekCode` bsr fast path — peeks directly at `bsr.data[bsr.pos]` instead of `ReadByte` + `UnreadByte` (two interface calls) ([#59](https://github.com/Basekick-Labs/msgpack/issues/59)) - **encode:** pool `OmitEmpty` filtered field slices via `sync.Pool` — when fields are actually omitted, the allocated `[]*field` slice is now returned to a pool for reuse instead of being GC'd ([#58](https://github.com/Basekick-Labs/msgpack/issues/58)) +- **encode/decode:** pool and pre-allocate interned-string dict — `SetInternedStringsDictCap(n)` pre-sizes the dict to avoid map rehashing and slice growth; pooled encoders/decoders now reuse dict storage across `Reset()` (cleared in place) instead of discarding it, and `Put*()` drops oversized dicts to keep the pool lean ([#66](https://github.com/Basekick-Labs/msgpack/issues/66)) --- diff --git a/decode.go b/decode.go index 7896774..6e40351 100644 --- a/decode.go +++ b/decode.go @@ -53,6 +53,18 @@ func PutDecoder(dec *Decoder) { } else if dec.buf != nil { dec.buf = dec.buf[:0] } + // Drop the interned-string dict if we own it and it grew large, so pool + // entries don't permanently retain memory from a one-off large interning + // session. A caller-owned dict is always dropped so we never hold a + // reference to caller memory across pool round-trips. We check cap(), + // not len(), because PutDecoder can see a truncated slice (len=0) whose + // backing array is still large. + if !dec.dictOwned { + dec.dict = nil + } else if cap(dec.dict) > internDictPoolCap { + dec.dict = nil + dec.dictOwned = false + } decPool.Put(dec) } @@ -82,6 +94,8 @@ type Decoder struct { rec []byte dict []string flags uint32 + internCap int // initial capacity hint for the interned-string dict + dictOwned bool // true when dict was lazily allocated by us, safe to mutate/pool } // NewDecoder returns a new decoder that reads from r. @@ -102,24 +116,34 @@ func (d *Decoder) Reset(r io.Reader) { } // ResetDict is like Reset, but also resets the dict. +// +// A non-nil dict replaces the current dict; the decoder will not append to +// it or otherwise mutate it (caller retains ownership). A nil dict keeps +// any internally-owned dict storage for reuse, truncated to empty — +// subsequent interned decodes skip the slice allocation. func (d *Decoder) ResetDict(r io.Reader, dict []string) { d.ResetReader(r) d.flags = 0 d.structTag = "" - d.dict = dict + if dict != nil { + d.dict = dict + d.dictOwned = false + } } func (d *Decoder) WithDict(dict []string, fn func(*Decoder) error) error { - oldDict := d.dict + oldDict, oldOwned := d.dict, d.dictOwned d.dict = dict + d.dictOwned = false err := fn(d) d.dict = oldDict + d.dictOwned = oldOwned return err } func (d *Decoder) ResetReader(r io.Reader) { d.mapDecoder = nil - d.dict = nil + d.releaseOrTruncateDict() if br, ok := r.(bufReader); ok { d.r = br @@ -144,7 +168,18 @@ func (d *Decoder) ResetBytes(data []byte) { d.mapDecoder = nil d.flags = 0 d.structTag = "" - d.dict = nil + d.releaseOrTruncateDict() +} + +// releaseOrTruncateDict reuses the dict storage if we allocated it +// ourselves; otherwise it drops the reference so a caller-supplied dict is +// never aliased or appended into by a subsequent reset. +func (d *Decoder) releaseOrTruncateDict() { + if d.dictOwned { + d.dict = d.dict[:0] + } else { + d.dict = nil + } } func (d *Decoder) SetMapDecoder(fn func(*Decoder) (interface{}, error)) { @@ -187,6 +222,27 @@ func (d *Decoder) UseInternedStrings(on bool) { } } +// SetInternedStringsDictCap sets an initial capacity hint for the +// interned-string dict, avoiding slice growth as entries are appended. +// n is clamped to [0, maxDictLen]; 0 restores lazy allocation. +// +// The hint is consulted the next time the dict is allocated — typically on +// the first interned decode after construction, Reset, or ResetDict. Call +// it before decoding to guarantee it takes effect. +// +// When the decoder is managed by GetDecoder/PutDecoder, PutDecoder drops +// dicts whose capacity exceeds an internal pool-retention threshold so a +// single oversized session doesn't permanently bloat pool memory. Setting +// n above that threshold forfeits cross-Put reuse of the dict. +func (d *Decoder) SetInternedStringsDictCap(n int) { + if n < 0 { + n = 0 + } else if n > maxDictLen { + n = maxDictLen + } + d.internCap = n +} + // UsePreallocateValues enables preallocating values in chunks func (d *Decoder) UsePreallocateValues(on bool) { if on { diff --git a/encode.go b/encode.go index 2e7b664..6420663 100644 --- a/encode.go +++ b/encode.go @@ -58,6 +58,16 @@ func PutEncoder(enc *Encoder) { if cap(enc.wbuf) > 32*1024 { enc.wbuf = nil } + // Drop the interned-string dict if we own it and it grew large, so pool + // entries don't permanently retain memory from a one-off large interning + // session. A caller-owned dict is always dropped so we never hold a + // reference to caller memory across pool round-trips. + if !enc.dictOwned { + enc.dict = nil + } else if len(enc.dict) > internDictPoolCap { + enc.dict = nil + enc.dictOwned = false + } encPool.Put(enc) } @@ -94,6 +104,8 @@ type Encoder struct { buf []byte timeBuf []byte flags uint32 + internCap int // initial capacity hint for the interned-string dict + dictOwned bool // true when dict was lazily allocated by us, safe to mutate/pool } // NewEncoder returns a new encoder that writes to w. @@ -116,23 +128,33 @@ func (e *Encoder) Reset(w io.Writer) { } // ResetDict is like Reset, but also resets the dict. +// +// A non-nil dict replaces the current dict; the encoder will not mutate it +// (caller retains ownership). A nil dict keeps any internally-owned dict +// storage for reuse, cleared to empty — subsequent interned encodes skip +// the map allocation. func (e *Encoder) ResetDict(w io.Writer, dict map[string]int) { e.ResetWriter(w) e.flags = 0 e.structTag = "" - e.dict = dict + if dict != nil { + e.dict = dict + e.dictOwned = false + } } func (e *Encoder) WithDict(dict map[string]int, fn func(*Encoder) error) error { - oldDict := e.dict + oldDict, oldOwned := e.dict, e.dictOwned e.dict = dict + e.dictOwned = false err := fn(e) e.dict = oldDict + e.dictOwned = oldOwned return err } func (e *Encoder) ResetWriter(w io.Writer) { - e.dict = nil + e.releaseOrClearDict() if bw, ok := w.(writer); ok { e.w = bw } else if w == nil { @@ -150,7 +172,18 @@ func (e *Encoder) resetForMarshal() { e.w = &e.bsw e.flags = 0 e.structTag = "" - e.dict = nil + e.releaseOrClearDict() +} + +// releaseOrClearDict reuses the dict storage if we allocated it ourselves; +// otherwise it drops the reference so a caller-supplied dict is never +// mutated by a subsequent reset. +func (e *Encoder) releaseOrClearDict() { + if e.dictOwned { + clear(e.dict) + } else { + e.dict = nil + } } // SetSortMapKeys causes the Encoder to encode map keys in increasing order. @@ -220,6 +253,27 @@ func (e *Encoder) UseInternedStrings(on bool) { } } +// SetInternedStringsDictCap sets an initial capacity hint for the +// interned-string dict, avoiding map rehashing as entries are added. +// n is clamped to [0, maxDictLen]; 0 restores lazy allocation. +// +// The hint is consulted the next time the dict is allocated — typically on +// the first interned encode after construction, Reset, or ResetDict. Call +// it before encoding to guarantee it takes effect. +// +// When the encoder is managed by GetEncoder/PutEncoder, PutEncoder drops +// dicts whose length exceeds an internal pool-retention threshold so a +// single oversized session doesn't permanently bloat pool memory. Setting +// n above that threshold forfeits cross-Put reuse of the dict. +func (e *Encoder) SetInternedStringsDictCap(n int) { + if n < 0 { + n = 0 + } else if n > maxDictLen { + n = maxDictLen + } + e.internCap = n +} + func (e *Encoder) Encode(v interface{}) error { switch v := v.(type) { case nil: diff --git a/intern.go b/intern.go index 65405fe..63a8472 100644 --- a/intern.go +++ b/intern.go @@ -11,6 +11,12 @@ import ( const ( minInternedStringLen = 3 maxDictLen = math.MaxUint16 + + // internDictPoolCap is the threshold above which a pooled Encoder/Decoder + // drops its interned-string dict in Put*() instead of retaining it for + // reuse. Mirrors the wbuf/buf cap-drop pattern: keeps hot, small dicts + // warm without letting a one-off large session bloat pool memory. + internDictPoolCap = 4096 ) var internedStringExtID = int8(math.MinInt8) @@ -63,7 +69,8 @@ func (e *Encoder) encodeInternedString(s string, intern bool) error { if intern && len(s) >= minInternedStringLen && len(e.dict) < maxDictLen { if e.dict == nil { - e.dict = make(map[string]int) + e.dict = make(map[string]int, e.internCap) + e.dictOwned = true } idx := len(e.dict) e.dict[s] = idx @@ -227,6 +234,12 @@ func (d *Decoder) decodeInternedStringWithLen(n int, intern bool) (string, error } if intern && len(s) >= minInternedStringLen && len(d.dict) < maxDictLen { + if d.dict == nil { + if d.internCap > 0 { + d.dict = make([]string, 0, d.internCap) + } + d.dictOwned = true + } d.dict = append(d.dict, s) } diff --git a/intern_test.go b/intern_test.go index a2c0ff6..87bc297 100644 --- a/intern_test.go +++ b/intern_test.go @@ -144,3 +144,187 @@ func dictMap(dict []string) map[string]int { } return m } + +func TestInternedStringDictCap(t *testing.T) { + var buf bytes.Buffer + + enc := msgpack.NewEncoder(&buf) + enc.UseInternedStrings(true) + enc.SetInternedStringsDictCap(1024) + + dec := msgpack.NewDecoder(&buf) + dec.UseInternedStrings(true) + dec.SetInternedStringsDictCap(1024) + + const n = 2000 // deliberately more than the cap hint; it is a hint, not a limit + in := make([]string, n) + for i := 0; i < n; i++ { + in[i] = "key_" + string(rune('a'+i%26)) + "_" + string(rune('0'+i%10)) + "_" + + string(rune('A'+(i/10)%26)) + "_" + string(rune('0'+(i/100)%10)) + } + + for _, s := range in { + require.Nil(t, enc.EncodeString(s)) + } + for i := 0; i < n; i++ { + s, err := dec.DecodeString() + require.Nil(t, err) + require.Equal(t, in[i], s) + } +} + +func TestInternedStringDictReuseAcrossReset(t *testing.T) { + var buf1, buf2 bytes.Buffer + + enc := msgpack.NewEncoder(&buf1) + enc.UseInternedStrings(true) + + dec := msgpack.NewDecoder(&buf1) + dec.UseInternedStrings(true) + + for i := 0; i < 5; i++ { + require.Nil(t, enc.EncodeString("first-session")) + } + for i := 0; i < 5; i++ { + s, err := dec.DecodeString() + require.Nil(t, err) + require.Equal(t, "first-session", s) + } + + // Reset (not ResetDict) — dict storage should be cleared in place, + // and a new session must not see prior-session entries. + enc.Reset(&buf2) + enc.UseInternedStrings(true) + dec.Reset(&buf2) + dec.UseInternedStrings(true) + + for i := 0; i < 5; i++ { + require.Nil(t, enc.EncodeString("second-session")) + } + for i := 0; i < 5; i++ { + s, err := dec.DecodeString() + require.Nil(t, err) + require.Equal(t, "second-session", s) + } +} + +// TestResetDoesNotMutateCallerDict guards against silently clearing or +// aliasing a caller-supplied dict via Reset after ResetDict. Regression +// for the ownership bug fixed alongside #66. +func TestResetDoesNotMutateCallerDict(t *testing.T) { + externalEnc := map[string]int{"hello world": 0, "foo bar": 1} + externalDec := []string{"hello world", "foo bar"} + + var buf bytes.Buffer + + enc := msgpack.NewEncoder(&buf) + enc.ResetDict(&buf, externalEnc) + enc.Reset(&buf) // must NOT clear externalEnc + require.Equal(t, 2, len(externalEnc)) + require.Equal(t, 0, externalEnc["hello world"]) + require.Equal(t, 1, externalEnc["foo bar"]) + + dec := msgpack.NewDecoder(&buf) + dec.ResetDict(&buf, externalDec) + dec.ResetBytes(nil) // must NOT alias externalDec's backing array + + // Now encode an interned string with a fresh encoder and decode it + // through `dec`. If ResetBytes failed to drop the external alias, the + // decoded string would be appended into externalDec. + var payload bytes.Buffer + encFresh := msgpack.NewEncoder(&payload) + encFresh.UseInternedStrings(true) + require.Nil(t, encFresh.EncodeString("would-clobber")) + + dec.ResetBytes(payload.Bytes()) + dec.UseInternedStrings(true) + s, err := dec.DecodeString() + require.Nil(t, err) + require.Equal(t, "would-clobber", s) + + require.Equal(t, []string{"hello world", "foo bar"}, externalDec) +} + +// TestInternedStringDictStorageIsReused verifies that the underlying map +// bucket array and slice backing array are kept across Reset — i.e., the +// clear-in-place / truncate-in-place path actually reuses storage and +// doesn't reallocate. +func TestInternedStringDictStorageIsReused(t *testing.T) { + var buf bytes.Buffer + enc := msgpack.NewEncoder(&buf) + enc.UseInternedStrings(true) + dec := msgpack.NewDecoder(&buf) + dec.UseInternedStrings(true) + + // Warm both dicts so storage exists. + require.Nil(t, enc.EncodeString("warmup-string")) + _, err := dec.DecodeString() + require.Nil(t, err) + + // Pre-size the buffer so bytes.Buffer growth doesn't pollute the count. + buf.Grow(64) + + encAllocs := testing.AllocsPerRun(50, func() { + buf.Reset() + enc.Reset(&buf) + enc.UseInternedStrings(true) + _ = enc.EncodeString("warmup-string") + }) + // With dict storage reuse, Reset + interned encode of an already-known + // string should allocate nothing. Without reuse, the map is freshly + // allocated each call. + require.Equalf(t, float64(0), encAllocs, + "encoder intern dict storage not reused: %v allocs/op", encAllocs) + + // For the decoder, drive it from a pre-built byte slice so we can + // isolate dict allocations from reader setup. Encode the string twice: + // the first is the raw interned-tagged string (which the decoder must + // allocate a Go string for), the second is a 3-byte ext index reference + // that decodes by fetching from the dict with zero allocations. + var payload bytes.Buffer + encFresh := msgpack.NewEncoder(&payload) + encFresh.UseInternedStrings(true) + require.Nil(t, encFresh.EncodeString("warmup-string")) + require.Nil(t, encFresh.EncodeString("warmup-string")) + data := payload.Bytes() + + decAllocs := testing.AllocsPerRun(50, func() { + dec.ResetBytes(data) + dec.UseInternedStrings(true) + _, _ = dec.DecodeString() // populates dict[0]="warmup-string" + _, _ = dec.DecodeString() // fetches dict[0] by index — no alloc + }) + // The first DecodeString allocates one Go string for the raw payload + // and appends it to the dict (no slice growth thanks to reuse); the + // second DecodeString reads an ext index and returns the interned + // string without allocating. Total: 1 alloc/op for the first string. + require.Equalf(t, float64(1), decAllocs, + "decoder intern dict storage not reused: %v allocs/op", decAllocs) +} + +func TestInternedStringDictPoolRecycle(t *testing.T) { + // First pooled session: intern some strings, then return to the pool. + enc1 := msgpack.GetEncoder() + var buf1 bytes.Buffer + enc1.Reset(&buf1) + enc1.UseInternedStrings(true) + for i := 0; i < 3; i++ { + require.Nil(t, enc1.EncodeString("leaked-if-buggy")) + } + msgpack.PutEncoder(enc1) + + // Second pooled session: may reuse enc1. Its dict must not leak the + // prior session's entry — either it was cleared or replaced. + enc2 := msgpack.GetEncoder() + var buf2 bytes.Buffer + enc2.Reset(&buf2) + enc2.UseInternedStrings(true) + require.Nil(t, enc2.EncodeString("fresh-session")) + msgpack.PutEncoder(enc2) + + dec := msgpack.NewDecoder(&buf2) + dec.UseInternedStrings(true) + s, err := dec.DecodeString() + require.Nil(t, err) + require.Equal(t, "fresh-session", s) +}