From c69cb3fb818c7e1af85e664a097fe4c7246aec99 Mon Sep 17 00:00:00 2001 From: Ignacio Van Droogenbroeck Date: Fri, 12 Jun 2026 15:05:46 -0600 Subject: [PATCH 1/3] fix: enforce alloc limit in bytes decode paths MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The fork's readN/readNGrow split (upstream's readN was the chunked, limit-respecting variant) left bytes(), bytesPtr() calling the package-level readN directly — the unbounded variant that allocates the full declared length upfront. On a stream decode, a malicious bin32/ str32 header declaring ~4GB forced a ~4GB allocation before any payload was read, even with alloc limits enabled (the default). The string path (d.readN method) was unaffected — it already branches on the flag. Route bytes decoding through a new readNInto method that mirrors d.readN's branch: chunked readNGrow by default, unbounded readN only when DisableAllocLimit(true) is set. Trusted workloads with large binary payloads can keep the old single-allocation behavior via DisableAllocLimit; the follow-up commit reduces the cost of the chunked path. --- decode.go | 11 +++++++++++ decode_string.go | 4 ++-- msgpack_test.go | 43 +++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 56 insertions(+), 2 deletions(-) diff --git a/decode.go b/decode.go index 6e40351..550dbeb 100644 --- a/decode.go +++ b/decode.go @@ -761,6 +761,17 @@ func (d *Decoder) readN(n int) ([]byte, error) { return d.buf, nil } +// readNInto reads n bytes into b, growing it as needed, and honors the +// decoder's alloc limit: unless DisableAllocLimit is set, allocation for +// a declared length is chunked by bytesAllocLimit so a malicious header +// can't force a huge upfront allocation. +func (d *Decoder) readNInto(b []byte, n int) ([]byte, error) { + if d.flags&disableAllocLimitFlag != 0 { + return readN(d.r, b, n) + } + return readNGrow(d.r, b, n) +} + func readN(r io.Reader, b []byte, n int) ([]byte, error) { if b == nil { if n == 0 { diff --git a/decode_string.go b/decode_string.go index f7ac107..b46b7a0 100644 --- a/decode_string.go +++ b/decode_string.go @@ -92,7 +92,7 @@ func (d *Decoder) bytes(c byte, b []byte) ([]byte, error) { if n == -1 { return nil, nil } - return readN(d.r, b, n) + return d.readNInto(b, n) } func (d *Decoder) decodeStringTemp() (string, error) { @@ -139,7 +139,7 @@ func (d *Decoder) bytesPtr(c byte, ptr *[]byte) error { return nil } - *ptr, err = readN(d.r, *ptr, n) + *ptr, err = d.readNInto(*ptr, n) return err } diff --git a/msgpack_test.go b/msgpack_test.go index 6de7319..c1ad0f4 100644 --- a/msgpack_test.go +++ b/msgpack_test.go @@ -3,7 +3,9 @@ package msgpack_test import ( "bufio" "bytes" + "errors" "fmt" + "io" "math" "reflect" "testing" @@ -75,6 +77,47 @@ func (t *MsgpackTest) TestLargeString() { t.Equal(dst, src) } +func (t *MsgpackTest) TestDecodeBytesHugeDeclaredLen() { + // A bin32 header declaring ~4GB with a 1-byte payload must fail with a + // read error after at most one bytesAllocLimit-sized chunk — not attempt + // to allocate the full declared length upfront. + data := []byte{0xc6, 0xff, 0xff, 0xff, 0xff, 'x'} + + var out []byte + dec := msgpack.NewDecoder(bytes.NewReader(data)) + t.True(errors.Is(dec.Decode(&out), io.ErrUnexpectedEOF)) + + // Same via DecodeBytes (d.bytes path). + dec = msgpack.NewDecoder(bytes.NewReader(data)) + _, err := dec.DecodeBytes() + t.True(errors.Is(err, io.ErrUnexpectedEOF)) + + // And via a []byte struct field (decodeBytesValue path). + var s struct{ Data []byte } + payload := append([]byte{0x81, 0xa4, 'D', 'a', 't', 'a'}, data...) + dec = msgpack.NewDecoder(bytes.NewReader(payload)) + t.True(errors.Is(dec.Decode(&s), io.ErrUnexpectedEOF)) +} + +func (t *MsgpackTest) TestDecodeBytesLargeStream() { + // Larger than bytesAllocLimit (1MB) so the chunked grow path is hit. + src := bytes.Repeat([]byte{'x'}, 2500*1024) + data, err := msgpack.Marshal(src) + t.Nil(err) + + var dst []byte + dec := msgpack.NewDecoder(bytes.NewReader(data)) + t.Nil(dec.Decode(&dst)) + t.Equal(src, dst) + + // DisableAllocLimit path still works. + dst = nil + dec = msgpack.NewDecoder(bytes.NewReader(data)) + dec.DisableAllocLimit(true) + t.Nil(dec.Decode(&dst)) + t.Equal(src, dst) +} + func (t *MsgpackTest) TestSliceOfStructs() { in := []*nameStruct{{"hello"}} var out []*nameStruct From 43571eee3418605b9d4d3c2794038c5bf0058c60 Mon Sep 17 00:00:00 2001 From: Ignacio Van Droogenbroeck Date: Fri, 12 Jun 2026 15:13:11 -0600 Subject: [PATCH 2/3] perf: exact-capacity chunked growth in readNGrow MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Replace the append-based chunk growth in readNGrow with explicit exact-capacity sizing: grow to min(n, max(2*pos, pos+bytesAllocLimit)) per round. This preserves the prior security bound — allocation never runs more than max(received, bytesAllocLimit) ahead of data actually supplied — while eliminating append's capacity overshoot beyond n and reducing alloc+copy rounds. readN (DisableAllocLimit path) similarly allocates exactly n instead of append-growing, skipping a useless copy of old contents that ReadFull overwrites anyway. 4MB stream decode vs v6 (count=6, Apple M3 Max): DecodeStringStream4MB 549.7µs → 381.9µs (-30.5%) 15.4MiB → 11.0MiB (-28.5%) 7 → 6 allocs The bytes path is slower than v6 (+127%) only because v6 skipped the alloc limit entirely there (see previous commit); against the corrected chunked baseline this change is -20% time, -38% B/op. DisableAllocLimit restores single-allocation reads for trusted input. Closes #63 --- bench_test.go | 44 ++++++++++++++++++++++++++++++++++++++++++++ decode.go | 43 +++++++++++++++++++++++++++---------------- 2 files changed, 71 insertions(+), 16 deletions(-) diff --git a/bench_test.go b/bench_test.go index 77acc88..3b2d132 100644 --- a/bench_test.go +++ b/bench_test.go @@ -131,6 +131,50 @@ func BenchmarkByteSlice(b *testing.B) { benchmarkEncodeDecode(b, src, &dst) } +// Stream decodes larger than bytesAllocLimit (1MB) exercise the chunked +// growth path in readNGrow (issue #63). + +func BenchmarkDecodeBytesStream4MB(b *testing.B) { + data, err := msgpack.Marshal(make([]byte, 4<<20)) + if err != nil { + b.Fatal(err) + } + rd := bytes.NewReader(data) + var out []byte + + b.ReportAllocs() + b.ResetTimer() + + for i := 0; i < b.N; i++ { + rd.Reset(data) + dec := msgpack.NewDecoder(rd) + out = nil + if err := dec.Decode(&out); err != nil { + b.Fatal(err) + } + } +} + +func BenchmarkDecodeStringStream4MB(b *testing.B) { + data, err := msgpack.Marshal(string(make([]byte, 4<<20))) + if err != nil { + b.Fatal(err) + } + rd := bytes.NewReader(data) + var out string + + b.ReportAllocs() + b.ResetTimer() + + for i := 0; i < b.N; i++ { + rd.Reset(data) + dec := msgpack.NewDecoder(rd) + if err := dec.Decode(&out); err != nil { + b.Fatal(err) + } + } +} + func BenchmarkByteArray(b *testing.B) { var src [1024]byte var dst [1024]byte diff --git a/decode.go b/decode.go index 550dbeb..557af0a 100644 --- a/decode.go +++ b/decode.go @@ -777,13 +777,14 @@ func readN(r io.Reader, b []byte, n int) ([]byte, error) { if n == 0 { return make([]byte, 0), nil } - b = make([]byte, 0, n) - } - - if n > cap(b) { - b = append(b, make([]byte, n-len(b))...) + b = make([]byte, n) } else if n <= cap(b) { b = b[:n] + } else { + // ReadFull overwrites all of b, so allocate exactly n instead of + // growing with append (which would copy the old contents for + // nothing and overshoot the capacity). + b = make([]byte, n) } _, err := io.ReadFull(r, b) @@ -810,22 +811,32 @@ func readNGrow(r io.Reader, b []byte, n int) ([]byte, error) { _, err := io.ReadFull(r, b) return b, err } - b = b[:cap(b)] - - var pos int - for { - alloc := min(n-len(b), bytesAllocLimit) - b = append(b, make([]byte, alloc)...) - _, err := io.ReadFull(r, b[pos:]) - if err != nil { + // n exceeds the current capacity. Fill the available capacity first, + // then grow to at most one bytesAllocLimit chunk or 2x the bytes + // already received ahead of the data — whichever is larger — capped + // at the exact target n. A malicious declared length can therefore + // never force allocation more than max(received, bytesAllocLimit) + // ahead of the data actually supplied (the same bound the previous + // chunked append provided), while exact sizing avoids append's + // capacity overshoot beyond n and reduces alloc+copy rounds. + b = b[:cap(b)] + pos := len(b) + if pos > 0 { + if _, err := io.ReadFull(r, b); err != nil { return b, err } + } + for pos < n { + newCap := min(max(2*pos, pos+bytesAllocLimit), n) + nb := make([]byte, newCap) + copy(nb, b[:pos]) + b = nb - if len(b) == n { - break + if _, err := io.ReadFull(r, b[pos:]); err != nil { + return b, err } - pos = len(b) + pos = newCap } return b, nil From 182322de803148f21ef9be68fefff11c482b1c68 Mon Sep 17 00:00:00 2001 From: Ignacio Van Droogenbroeck Date: Fri, 12 Jun 2026 15:23:14 -0600 Subject: [PATCH 3/3] refactor: address internal review findings for bytes alloc-limit fix MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - readNInto: validate the declared length against remaining input on the byte-slice (Unmarshal) path before allocating — restores single exact-size allocation there (1 alloc for a 4MB payload, matching the pre-fix behavior) while also closing the same declared-length bomb on the byte-slice path. Error semantics match io.ReadFull (EOF vs ErrUnexpectedEOF), keeping decoderTests expectations intact. - Remove the pre-Go-1.21 local min helper; use builtin min/max. - Add Unmarshal-path tests: huge declared length fails fast; 2.5MB round-trip; result is caller-owned (no aliasing of the input). - Add CHANGELOG entries (Bug Fixes + Performance) for #63. --- CHANGELOG.md | 12 ++++++++++++ decode.go | 23 ++++++++++++++++------- msgpack_test.go | 21 +++++++++++++++++++++ 3 files changed, 49 insertions(+), 7 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index c661dd7..9f993e4 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,3 +1,15 @@ +## Unreleased + +### Bug Fixes + +- **decode:** enforce alloc limit in bytes decode paths — `bytes()`/`bytesPtr()` called the unbounded `readN` directly, so on a stream decode a malicious `bin32`/`str32` header declaring ~4GB forced a ~4GB upfront allocation even with alloc limits enabled (the default). Bytes decoding now routes through `readNInto`, which chunks allocation by `bytesAllocLimit` unless `DisableAllocLimit(true)` is set; on the byte-slice (`Unmarshal`) path the declared length is validated against the remaining input before a single exact-size allocation ([#63](https://github.com/Basekick-Labs/msgpack/issues/63)) + +### Performance + +- **decode:** exact-capacity chunked growth in `readNGrow` — replaces append-based chunk growth with explicit `min(n, max(2*pos, pos+bytesAllocLimit))` sizing: same allocation-ahead-of-data security bound, no capacity overshoot beyond the target, fewer alloc+copy rounds ([#63](https://github.com/Basekick-Labs/msgpack/issues/63)) (4MB string stream decode **-30.5% ns/op**, **-28.5% B/op**) + +--- + ## v6.1.0 (2026-04-27) ### Performance diff --git a/decode.go b/decode.go index 557af0a..ef91ad2 100644 --- a/decode.go +++ b/decode.go @@ -766,6 +766,22 @@ func (d *Decoder) readN(n int) ([]byte, error) { // a declared length is chunked by bytesAllocLimit so a malicious header // can't force a huge upfront allocation. func (d *Decoder) readNInto(b []byte, n int) ([]byte, error) { + // Byte-slice reader: the declared length can be validated against the + // data actually present before allocating, so a single exact-size + // allocation is safe regardless of the alloc limit. (Unlike d.readN's + // zero-copy fast path, the result here is caller-owned and must be a + // copy, not a sub-slice of the input.) + if d.bsr.data != nil { + if remaining := len(d.bsr.data) - d.bsr.pos; n > remaining { + // Match io.ReadFull semantics: EOF when no data remains, + // ErrUnexpectedEOF on a partial read. + if remaining == 0 && n > 0 { + return b, io.EOF + } + return b, io.ErrUnexpectedEOF + } + return readN(d.r, b, n) + } if d.flags&disableAllocLimitFlag != 0 { return readN(d.r, b, n) } @@ -841,10 +857,3 @@ func readNGrow(r io.Reader, b []byte, n int) ([]byte, error) { return b, nil } - -func min(a, b int) int { //nolint:unparam - if a <= b { - return a - } - return b -} diff --git a/msgpack_test.go b/msgpack_test.go index c1ad0f4..84d14bc 100644 --- a/msgpack_test.go +++ b/msgpack_test.go @@ -99,6 +99,27 @@ func (t *MsgpackTest) TestDecodeBytesHugeDeclaredLen() { t.True(errors.Is(dec.Decode(&s), io.ErrUnexpectedEOF)) } +func (t *MsgpackTest) TestUnmarshalBytesHugeDeclaredLen() { + // On the byte-slice path the declared length is validated against the + // remaining input before allocating: must fail fast, no huge alloc. + data := []byte{0xc6, 0xff, 0xff, 0xff, 0xff, 'x'} + var out []byte + t.True(errors.Is(msgpack.Unmarshal(data, &out), io.ErrUnexpectedEOF)) +} + +func (t *MsgpackTest) TestUnmarshalBytesLarge() { + src := bytes.Repeat([]byte{'x'}, 2500*1024) + data, err := msgpack.Marshal(src) + t.Nil(err) + + var dst []byte + t.Nil(msgpack.Unmarshal(data, &dst)) + t.Equal(src, dst) + // The result must be caller-owned, not an alias of the input buffer. + data[len(data)-1] = 'y' + t.Equal(byte('x'), dst[len(dst)-1]) +} + func (t *MsgpackTest) TestDecodeBytesLargeStream() { // Larger than bytesAllocLimit (1MB) so the chunked grow path is hit. src := bytes.Repeat([]byte{'x'}, 2500*1024)