From c69cb3fb818c7e1af85e664a097fe4c7246aec99 Mon Sep 17 00:00:00 2001
From: Ignacio Van Droogenbroeck <ignacio@vandroogenbroeck.net>
Date: Fri, 12 Jun 2026 15:05:46 -0600
Subject: [PATCH 1/3] fix: enforce alloc limit in bytes decode paths
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The fork's readN/readNGrow split (upstream's readN was the chunked,
limit-respecting variant) left bytes(), bytesPtr() calling the
package-level readN directly — the unbounded variant that allocates the
full declared length upfront. On a stream decode, a malicious bin32/
str32 header declaring ~4GB forced a ~4GB allocation before any payload
was read, even with alloc limits enabled (the default). The string path
(d.readN method) was unaffected — it already branches on the flag.

Route bytes decoding through a new readNInto method that mirrors
d.readN's branch: chunked readNGrow by default, unbounded readN only
when DisableAllocLimit(true) is set.

Trusted workloads with large binary payloads can keep the old
single-allocation behavior via DisableAllocLimit; the follow-up commit
reduces the cost of the chunked path.
---
 decode.go        | 11 +++++++++++
 decode_string.go |  4 ++--
 msgpack_test.go  | 43 +++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 56 insertions(+), 2 deletions(-)

diff --git a/decode.go b/decode.go
index 6e40351..550dbeb 100644
--- a/decode.go
+++ b/decode.go
@@ -761,6 +761,17 @@ func (d *Decoder) readN(n int) ([]byte, error) {
 	return d.buf, nil
 }
 
+// readNInto reads n bytes into b, growing it as needed, and honors the
+// decoder's alloc limit: unless DisableAllocLimit is set, allocation for
+// a declared length is chunked by bytesAllocLimit so a malicious header
+// can't force a huge upfront allocation.
+func (d *Decoder) readNInto(b []byte, n int) ([]byte, error) {
+	if d.flags&disableAllocLimitFlag != 0 {
+		return readN(d.r, b, n)
+	}
+	return readNGrow(d.r, b, n)
+}
+
 func readN(r io.Reader, b []byte, n int) ([]byte, error) {
 	if b == nil {
 		if n == 0 {
diff --git a/decode_string.go b/decode_string.go
index f7ac107..b46b7a0 100644
--- a/decode_string.go
+++ b/decode_string.go
@@ -92,7 +92,7 @@ func (d *Decoder) bytes(c byte, b []byte) ([]byte, error) {
 	if n == -1 {
 		return nil, nil
 	}
-	return readN(d.r, b, n)
+	return d.readNInto(b, n)
 }
 
 func (d *Decoder) decodeStringTemp() (string, error) {
@@ -139,7 +139,7 @@ func (d *Decoder) bytesPtr(c byte, ptr *[]byte) error {
 		return nil
 	}
 
-	*ptr, err = readN(d.r, *ptr, n)
+	*ptr, err = d.readNInto(*ptr, n)
 	return err
 }
 
diff --git a/msgpack_test.go b/msgpack_test.go
index 6de7319..c1ad0f4 100644
--- a/msgpack_test.go
+++ b/msgpack_test.go
@@ -3,7 +3,9 @@ package msgpack_test
 import (
 	"bufio"
 	"bytes"
+	"errors"
 	"fmt"
+	"io"
 	"math"
 	"reflect"
 	"testing"
@@ -75,6 +77,47 @@ func (t *MsgpackTest) TestLargeString() {
 	t.Equal(dst, src)
 }
 
+func (t *MsgpackTest) TestDecodeBytesHugeDeclaredLen() {
+	// A bin32 header declaring ~4GB with a 1-byte payload must fail with a
+	// read error after at most one bytesAllocLimit-sized chunk — not attempt
+	// to allocate the full declared length upfront.
+	data := []byte{0xc6, 0xff, 0xff, 0xff, 0xff, 'x'}
+
+	var out []byte
+	dec := msgpack.NewDecoder(bytes.NewReader(data))
+	t.True(errors.Is(dec.Decode(&out), io.ErrUnexpectedEOF))
+
+	// Same via DecodeBytes (d.bytes path).
+	dec = msgpack.NewDecoder(bytes.NewReader(data))
+	_, err := dec.DecodeBytes()
+	t.True(errors.Is(err, io.ErrUnexpectedEOF))
+
+	// And via a []byte struct field (decodeBytesValue path).
+	var s struct{ Data []byte }
+	payload := append([]byte{0x81, 0xa4, 'D', 'a', 't', 'a'}, data...)
+	dec = msgpack.NewDecoder(bytes.NewReader(payload))
+	t.True(errors.Is(dec.Decode(&s), io.ErrUnexpectedEOF))
+}
+
+func (t *MsgpackTest) TestDecodeBytesLargeStream() {
+	// Larger than bytesAllocLimit (1MB) so the chunked grow path is hit.
+	src := bytes.Repeat([]byte{'x'}, 2500*1024)
+	data, err := msgpack.Marshal(src)
+	t.Nil(err)
+
+	var dst []byte
+	dec := msgpack.NewDecoder(bytes.NewReader(data))
+	t.Nil(dec.Decode(&dst))
+	t.Equal(src, dst)
+
+	// DisableAllocLimit path still works.
+	dst = nil
+	dec = msgpack.NewDecoder(bytes.NewReader(data))
+	dec.DisableAllocLimit(true)
+	t.Nil(dec.Decode(&dst))
+	t.Equal(src, dst)
+}
+
 func (t *MsgpackTest) TestSliceOfStructs() {
 	in := []*nameStruct{{"hello"}}
 	var out []*nameStruct

From 43571eee3418605b9d4d3c2794038c5bf0058c60 Mon Sep 17 00:00:00 2001
From: Ignacio Van Droogenbroeck <ignacio@vandroogenbroeck.net>
Date: Fri, 12 Jun 2026 15:13:11 -0600
Subject: [PATCH 2/3] perf: exact-capacity chunked growth in readNGrow
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Replace the append-based chunk growth in readNGrow with explicit
exact-capacity sizing: grow to min(n, max(2*pos, pos+bytesAllocLimit))
per round. This preserves the prior security bound — allocation never
runs more than max(received, bytesAllocLimit) ahead of data actually
supplied — while eliminating append's capacity overshoot beyond n and
reducing alloc+copy rounds. readN (DisableAllocLimit path) similarly
allocates exactly n instead of append-growing, skipping a useless copy
of old contents that ReadFull overwrites anyway.

4MB stream decode vs v6 (count=6, Apple M3 Max):

  DecodeStringStream4MB   549.7µs → 381.9µs  (-30.5%)  15.4MiB → 11.0MiB (-28.5%)  7 → 6 allocs

The bytes path is slower than v6 (+127%) only because v6 skipped the
alloc limit entirely there (see previous commit); against the corrected
chunked baseline this change is -20% time, -38% B/op. DisableAllocLimit
restores single-allocation reads for trusted input.

Closes #63
---
 bench_test.go | 44 ++++++++++++++++++++++++++++++++++++++++++++
 decode.go     | 43 +++++++++++++++++++++++++++----------------
 2 files changed, 71 insertions(+), 16 deletions(-)

diff --git a/bench_test.go b/bench_test.go
index 77acc88..3b2d132 100644
--- a/bench_test.go
+++ b/bench_test.go
@@ -131,6 +131,50 @@ func BenchmarkByteSlice(b *testing.B) {
 	benchmarkEncodeDecode(b, src, &dst)
 }
 
+// Stream decodes larger than bytesAllocLimit (1MB) exercise the chunked
+// growth path in readNGrow (issue #63).
+
+func BenchmarkDecodeBytesStream4MB(b *testing.B) {
+	data, err := msgpack.Marshal(make([]byte, 4<<20))
+	if err != nil {
+		b.Fatal(err)
+	}
+	rd := bytes.NewReader(data)
+	var out []byte
+
+	b.ReportAllocs()
+	b.ResetTimer()
+
+	for i := 0; i < b.N; i++ {
+		rd.Reset(data)
+		dec := msgpack.NewDecoder(rd)
+		out = nil
+		if err := dec.Decode(&out); err != nil {
+			b.Fatal(err)
+		}
+	}
+}
+
+func BenchmarkDecodeStringStream4MB(b *testing.B) {
+	data, err := msgpack.Marshal(string(make([]byte, 4<<20)))
+	if err != nil {
+		b.Fatal(err)
+	}
+	rd := bytes.NewReader(data)
+	var out string
+
+	b.ReportAllocs()
+	b.ResetTimer()
+
+	for i := 0; i < b.N; i++ {
+		rd.Reset(data)
+		dec := msgpack.NewDecoder(rd)
+		if err := dec.Decode(&out); err != nil {
+			b.Fatal(err)
+		}
+	}
+}
+
 func BenchmarkByteArray(b *testing.B) {
 	var src [1024]byte
 	var dst [1024]byte
diff --git a/decode.go b/decode.go
index 550dbeb..557af0a 100644
--- a/decode.go
+++ b/decode.go
@@ -777,13 +777,14 @@ func readN(r io.Reader, b []byte, n int) ([]byte, error) {
 		if n == 0 {
 			return make([]byte, 0), nil
 		}
-		b = make([]byte, 0, n)
-	}
-
-	if n > cap(b) {
-		b = append(b, make([]byte, n-len(b))...)
+		b = make([]byte, n)
 	} else if n <= cap(b) {
 		b = b[:n]
+	} else {
+		// ReadFull overwrites all of b, so allocate exactly n instead of
+		// growing with append (which would copy the old contents for
+		// nothing and overshoot the capacity).
+		b = make([]byte, n)
 	}
 
 	_, err := io.ReadFull(r, b)
@@ -810,22 +811,32 @@ func readNGrow(r io.Reader, b []byte, n int) ([]byte, error) {
 		_, err := io.ReadFull(r, b)
 		return b, err
 	}
-	b = b[:cap(b)]
-
-	var pos int
-	for {
-		alloc := min(n-len(b), bytesAllocLimit)
-		b = append(b, make([]byte, alloc)...)
 
-		_, err := io.ReadFull(r, b[pos:])
-		if err != nil {
+	// n exceeds the current capacity. Fill the available capacity first,
+	// then grow to at most one bytesAllocLimit chunk or 2x the bytes
+	// already received ahead of the data — whichever is larger — capped
+	// at the exact target n. A malicious declared length can therefore
+	// never force allocation more than max(received, bytesAllocLimit)
+	// ahead of the data actually supplied (the same bound the previous
+	// chunked append provided), while exact sizing avoids append's
+	// capacity overshoot beyond n and reduces alloc+copy rounds.
+	b = b[:cap(b)]
+	pos := len(b)
+	if pos > 0 {
+		if _, err := io.ReadFull(r, b); err != nil {
 			return b, err
 		}
+	}
+	for pos < n {
+		newCap := min(max(2*pos, pos+bytesAllocLimit), n)
+		nb := make([]byte, newCap)
+		copy(nb, b[:pos])
+		b = nb
 
-		if len(b) == n {
-			break
+		if _, err := io.ReadFull(r, b[pos:]); err != nil {
+			return b, err
 		}
-		pos = len(b)
+		pos = newCap
 	}
 
 	return b, nil

From 182322de803148f21ef9be68fefff11c482b1c68 Mon Sep 17 00:00:00 2001
From: Ignacio Van Droogenbroeck <ignacio@vandroogenbroeck.net>
Date: Fri, 12 Jun 2026 15:23:14 -0600
Subject: [PATCH 3/3] refactor: address internal review findings for bytes
 alloc-limit fix
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

- readNInto: validate the declared length against remaining input on the
  byte-slice (Unmarshal) path before allocating — restores single
  exact-size allocation there (1 alloc for a 4MB payload, matching the
  pre-fix behavior) while also closing the same declared-length bomb on
  the byte-slice path. Error semantics match io.ReadFull (EOF vs
  ErrUnexpectedEOF), keeping decoderTests expectations intact.
- Remove the pre-Go-1.21 local min helper; use builtin min/max.
- Add Unmarshal-path tests: huge declared length fails fast; 2.5MB
  round-trip; result is caller-owned (no aliasing of the input).
- Add CHANGELOG entries (Bug Fixes + Performance) for #63.
---
 CHANGELOG.md    | 12 ++++++++++++
 decode.go       | 23 ++++++++++++++++-------
 msgpack_test.go | 21 +++++++++++++++++++++
 3 files changed, 49 insertions(+), 7 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index c661dd7..9f993e4 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,3 +1,15 @@
+## Unreleased
+
+### Bug Fixes
+
+- **decode:** enforce alloc limit in bytes decode paths — `bytes()`/`bytesPtr()` called the unbounded `readN` directly, so on a stream decode a malicious `bin32`/`str32` header declaring ~4GB forced a ~4GB upfront allocation even with alloc limits enabled (the default). Bytes decoding now routes through `readNInto`, which chunks allocation by `bytesAllocLimit` unless `DisableAllocLimit(true)` is set; on the byte-slice (`Unmarshal`) path the declared length is validated against the remaining input before a single exact-size allocation ([#63](https://github.com/Basekick-Labs/msgpack/issues/63))
+
+### Performance
+
+- **decode:** exact-capacity chunked growth in `readNGrow` — replaces append-based chunk growth with explicit `min(n, max(2*pos, pos+bytesAllocLimit))` sizing: same allocation-ahead-of-data security bound, no capacity overshoot beyond the target, fewer alloc+copy rounds ([#63](https://github.com/Basekick-Labs/msgpack/issues/63)) (4MB string stream decode **-30.5% ns/op**, **-28.5% B/op**)
+
+---
+
 ## v6.1.0 (2026-04-27)
 
 ### Performance
diff --git a/decode.go b/decode.go
index 557af0a..ef91ad2 100644
--- a/decode.go
+++ b/decode.go
@@ -766,6 +766,22 @@ func (d *Decoder) readN(n int) ([]byte, error) {
 // a declared length is chunked by bytesAllocLimit so a malicious header
 // can't force a huge upfront allocation.
 func (d *Decoder) readNInto(b []byte, n int) ([]byte, error) {
+	// Byte-slice reader: the declared length can be validated against the
+	// data actually present before allocating, so a single exact-size
+	// allocation is safe regardless of the alloc limit. (Unlike d.readN's
+	// zero-copy fast path, the result here is caller-owned and must be a
+	// copy, not a sub-slice of the input.)
+	if d.bsr.data != nil {
+		if remaining := len(d.bsr.data) - d.bsr.pos; n > remaining {
+			// Match io.ReadFull semantics: EOF when no data remains,
+			// ErrUnexpectedEOF on a partial read.
+			if remaining == 0 && n > 0 {
+				return b, io.EOF
+			}
+			return b, io.ErrUnexpectedEOF
+		}
+		return readN(d.r, b, n)
+	}
 	if d.flags&disableAllocLimitFlag != 0 {
 		return readN(d.r, b, n)
 	}
@@ -841,10 +857,3 @@ func readNGrow(r io.Reader, b []byte, n int) ([]byte, error) {
 
 	return b, nil
 }
-
-func min(a, b int) int { //nolint:unparam
-	if a <= b {
-		return a
-	}
-	return b
-}
diff --git a/msgpack_test.go b/msgpack_test.go
index c1ad0f4..84d14bc 100644
--- a/msgpack_test.go
+++ b/msgpack_test.go
@@ -99,6 +99,27 @@ func (t *MsgpackTest) TestDecodeBytesHugeDeclaredLen() {
 	t.True(errors.Is(dec.Decode(&s), io.ErrUnexpectedEOF))
 }
 
+func (t *MsgpackTest) TestUnmarshalBytesHugeDeclaredLen() {
+	// On the byte-slice path the declared length is validated against the
+	// remaining input before allocating: must fail fast, no huge alloc.
+	data := []byte{0xc6, 0xff, 0xff, 0xff, 0xff, 'x'}
+	var out []byte
+	t.True(errors.Is(msgpack.Unmarshal(data, &out), io.ErrUnexpectedEOF))
+}
+
+func (t *MsgpackTest) TestUnmarshalBytesLarge() {
+	src := bytes.Repeat([]byte{'x'}, 2500*1024)
+	data, err := msgpack.Marshal(src)
+	t.Nil(err)
+
+	var dst []byte
+	t.Nil(msgpack.Unmarshal(data, &dst))
+	t.Equal(src, dst)
+	// The result must be caller-owned, not an alias of the input buffer.
+	data[len(data)-1] = 'y'
+	t.Equal(byte('x'), dst[len(dst)-1])
+}
+
 func (t *MsgpackTest) TestDecodeBytesLargeStream() {
 	// Larger than bytesAllocLimit (1MB) so the chunked grow path is hit.
 	src := bytes.Repeat([]byte{'x'}, 2500*1024)