From 4ec8d0467b65d322845c45f34e673ecdaa0b6aaa Mon Sep 17 00:00:00 2001
From: Lianet Magrans <lmagrans@confluent.io>
Date: Fri, 5 Jun 2026 12:09:20 -0400
Subject: [PATCH 01/11] support dyn for uncompressed

---
 checkstyle/suppressions.xml                   |   2 +-
 .../producer/internals/BufferPool.java        |  62 ++--
 .../producer/internals/ChunkedBufferPool.java | 200 +++++++++++
 .../ChunkedByteBufferOutputStream.java        | 246 +++++++++++++
 .../internals/ChunkedRecordAccumulator.java   | 336 ++++++++++++++++++
 .../producer/internals/ProducerBatch.java     |  33 ++
 .../producer/internals/RecordAccumulator.java |  61 ++--
 .../record/internal/MemoryRecordsBuilder.java |   9 +
 gradle/spotbugs-exclude.xml                   |  12 +
 9 files changed, 914 insertions(+), 47 deletions(-)
 create mode 100644 clients/src/main/java/org/apache/kafka/clients/producer/internals/ChunkedBufferPool.java
 create mode 100644 clients/src/main/java/org/apache/kafka/clients/producer/internals/ChunkedByteBufferOutputStream.java
 create mode 100644 clients/src/main/java/org/apache/kafka/clients/producer/internals/ChunkedRecordAccumulator.java
diff --git a/checkstyle/suppressions.xml b/checkstyle/suppressions.xml
index 1881f95ead5ea..a537e61d0217e 100644
--- a/checkstyle/suppressions.xml
+++ b/checkstyle/suppressions.xml
@@ -71,7 +71,7 @@
               files="(Utils|Topic|Lz4BlockOutputStream|JoinGroupRequest).java"/>
 
     <suppress checks="CyclomaticComplexity"
-              files="(AbstractFetch|ClientTelemetryReporter|ConsumerCoordinator|CommitRequestManager|FetchCollector|OffsetFetcherUtils|KafkaProducer|Sender|ConfigDef|KerberosLogin|AbstractRequest|AbstractResponse|Selector|SslFactory|SslTransportLayer|SaslClientAuthenticator|SaslClientCallbackHandler|SaslServerAuthenticator|AbstractCoordinator|TransactionManager|AbstractStickyAssignor|DefaultSslEngineFactory|Authorizer|RecordAccumulator|MemoryRecords|FetchSessionHandler|MockAdminClient).java"/>
+              files="(AbstractFetch|ChunkedBufferPool|ClientTelemetryReporter|ConsumerCoordinator|CommitRequestManager|FetchCollector|OffsetFetcherUtils|KafkaProducer|Sender|ConfigDef|KerberosLogin|AbstractRequest|AbstractResponse|Selector|SslFactory|SslTransportLayer|SaslClientAuthenticator|SaslClientCallbackHandler|SaslServerAuthenticator|AbstractCoordinator|TransactionManager|AbstractStickyAssignor|DefaultSslEngineFactory|Authorizer|RecordAccumulator|MemoryRecords|FetchSessionHandler|MockAdminClient).java"/>
 
     <suppress checks="JavaNCSS"
               files="(AbstractRequest|AbstractResponse|KerberosLogin|WorkerSinkTaskTest|TransactionManagerTest|SenderTest|KafkaAdminClient|ConsumerCoordinatorTest|KafkaAdminClientTest|KafkaProducerTest).java"/>
diff --git a/clients/src/main/java/org/apache/kafka/clients/producer/internals/BufferPool.java b/clients/src/main/java/org/apache/kafka/clients/producer/internals/BufferPool.java
index 517a2bd9ca7a6..f376c0fe2f4a3 100644
--- a/clients/src/main/java/org/apache/kafka/clients/producer/internals/BufferPool.java
+++ b/clients/src/main/java/org/apache/kafka/clients/producer/internals/BufferPool.java
@@ -48,15 +48,19 @@ public class BufferPool {
 
     private final long totalMemory;
     private final int poolableSize;
-    private final ReentrantLock lock;
-    private final Deque<ByteBuffer> free;
-    private final Deque<Condition> waiters;
-    /** Total available memory is the sum of nonPooledAvailableMemory and the number of byte buffers in free * poolableSize.  */
-    private long nonPooledAvailableMemory;
+    /** Lock held for any read or write of {@link #free}, {@link #waiters}, {@link #nonPooledAvailableMemory}, or {@link #closed}. */
+    protected final ReentrantLock lock;
+    /** Pooled buffers of capacity {@link #poolableSize}, available for reuse. Guarded by {@link #lock}. */
+    protected final Deque<ByteBuffer> free;
+    /** FIFO queue of pending allocation requests; the longest-waiting thread is woken first. Guarded by {@link #lock}. */
+    protected final Deque<Condition> waiters;
+    /** Total available memory is the sum of nonPooledAvailableMemory and the number of byte buffers in free * poolableSize. Guarded by {@link #lock}. */
+    protected long nonPooledAvailableMemory;
     private final Metrics metrics;
-    private final Time time;
+    protected final Time time;
     private final Sensor waitTime;
-    private boolean closed;
+    /** True once {@link #close()} has been invoked. */
+    protected boolean closed;
 
     /**
      * Create a new buffer pool
@@ -192,8 +196,7 @@ public ByteBuffer allocate(int size, long maxTimeToBlockMs) throws InterruptedEx
             // signal any additional waiters if there is more memory left
             // over for them
             try {
-                if (!(this.nonPooledAvailableMemory == 0 && this.free.isEmpty()) && !this.waiters.isEmpty())
-                    this.waiters.peekFirst().signal();
+                signalNextWaiterIfMemoryAvailable();
             } finally {
                 // Another finally... otherwise find bugs complains
                 lock.unlock();
@@ -211,6 +214,15 @@ protected void recordWaitTime(long timeNs) {
         this.waitTime.record(timeNs, time.milliseconds());
     }
 
+    /**
+     * Wake the longest-waiting thread if any memory (pooled or non-pooled) is available.
+     * Must be called with {@link #lock} held. No-op if no waiters or no memory is free.
+     */
+    protected void signalNextWaiterIfMemoryAvailable() {
+        if (!(this.nonPooledAvailableMemory == 0 && this.free.isEmpty()) && !this.waiters.isEmpty())
+            this.waiters.peekFirst().signal();
+    }
+
     /**
      * Allocate a buffer.  If buffer allocation fails (e.g. because of OOM) then return the size count back to
      * available memory and signal the next waiter if it exists.
@@ -222,16 +234,24 @@ private ByteBuffer safeAllocateByteBuffer(int size) {
             error = false;
             return buffer;
         } finally {
-            if (error) {
-                this.lock.lock();
-                try {
-                    this.nonPooledAvailableMemory += size;
-                    if (!this.waiters.isEmpty())
-                        this.waiters.peekFirst().signal();
-                } finally {
-                    this.lock.unlock();
-                }
-            }
+            if (error)
+                releaseReservedBytes(size);
+        }
+    }
+
+    /**
+     * Return previously-reserved non-pooled bytes to the pool and signal the next
+     * waiter. Acquires {@link #lock} internally. Used by callers
+     * that reserve memory and then need to roll back the reservation (e.g., upon errors).
+     */
+    protected void releaseReservedBytes(long bytes) {
+        this.lock.lock();
+        try {
+            this.nonPooledAvailableMemory += bytes;
+            if (!this.waiters.isEmpty())
+                this.waiters.peekFirst().signal();
+        } finally {
+            this.lock.unlock();
         }
     }
 
@@ -242,9 +262,9 @@ protected ByteBuffer allocateByteBuffer(int size) {
 
     /**
      * Attempt to ensure we have at least the requested number of bytes of memory for allocation by deallocating pooled
-     * buffers (if needed)
+     * buffers (if needed). Must be called with {@link #lock} held.
      */
-    private void freeUp(int size) {
+    protected void freeUp(int size) {
         while (!this.free.isEmpty() && this.nonPooledAvailableMemory < size)
             this.nonPooledAvailableMemory += this.free.pollLast().capacity();
     }
diff --git a/clients/src/main/java/org/apache/kafka/clients/producer/internals/ChunkedBufferPool.java b/clients/src/main/java/org/apache/kafka/clients/producer/internals/ChunkedBufferPool.java
new file mode 100644
index 0000000000000..96a4e6e0b36e4
--- /dev/null
+++ b/clients/src/main/java/org/apache/kafka/clients/producer/internals/ChunkedBufferPool.java
@@ -0,0 +1,200 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.kafka.clients.producer.internals;
+
+import org.apache.kafka.clients.producer.BufferExhaustedException;
+import org.apache.kafka.common.KafkaException;
+import org.apache.kafka.common.metrics.Metrics;
+import org.apache.kafka.common.utils.Time;
+
+import java.nio.ByteBuffer;
+import java.util.ArrayList;
+import java.util.List;
+import java.util.concurrent.TimeUnit;
+import java.util.concurrent.locks.Condition;
+
+/**
+ * A {@link BufferPool} dedicated to chunk-sized buffer reuse. The chunk size is passed to the
+ * parent as its {@link #poolableSize()}.
+ * <p>
+ * Adds {@link #allocateChunks(int, long)} for callers that need to acquire multiple
+ * chunks at once. The implementation is atomic across the K-chunk request:
+ * a single lock acquisition for the whole reservation, a single {@link Condition}
+ * added to {@link #waiters} so the request is FIFO-fair against single-chunk acquirers, and
+ * any failure (timeout, close, OOM) refunds the entire reservation atomically before the
+ * exception propagates. No partial chunk holds are visible outside the lock during the wait.
+ */
+public class ChunkedBufferPool extends BufferPool {
+
+    public ChunkedBufferPool(long memory, int chunkSize, Metrics metrics, Time time, String metricGrpName) {
+        super(memory, chunkSize, metrics, time, metricGrpName);
+    }
+
+    /**
+     * Allocate {@code ceil(totalSize / chunkSize)} chunk-sized buffers atomically.
+     * This follows the same principle as the allocate in the parent BufferPool class: if there is
+     * memory available, allocate the requested number of chunks. If not, wait for memory.
+     * If it needs to wait, it blocks up to {@code maxTimeToBlockMs} for the whole request,
+     * queued on {@link #waiters} alongside single-chunk acquirers (FIFO).
+     * <p>
+     * On any failure path every byte reserved during the call is returned to the pool
+     * and the next waiter is signaled. No partial chunk holds are visible to other threads during the wait.
+     * The reservation is tracked as bytes against {@link #nonPooledAvailableMemory} plus chunks polled from {@link #free}.
+     *
+     * @param totalSize        minimum total bytes of capacity required across the returned chunks
+     * @param maxTimeToBlockMs maximum time in milliseconds to block waiting for memory
+     * @return list of {@code ceil(totalSize / chunkSize)} {@code ByteBuffer}s, each of capacity
+     *         {@code chunkSize}
+     * @throws InterruptedException     if interrupted while waiting
+     * @throws IllegalArgumentException if {@code totalSize <= 0} or {@code totalSize > totalMemory()}
+     * @throws BufferExhaustedException if the request can't be satisfied within {@code maxTimeToBlockMs}
+     * @throws KafkaException           if the pool is closed during the wait
+     */
+    public List<ByteBuffer> allocateChunks(int totalSize, long maxTimeToBlockMs) throws InterruptedException {
+        if (totalSize <= 0)
+            throw new IllegalArgumentException("totalSize must be positive: " + totalSize);
+        if (totalSize > totalMemory())
+            throw new IllegalArgumentException("Attempt to allocate " + totalSize
+                + " bytes across chunks, but there is a hard limit of "
+                + totalMemory() + " on memory allocations.");
+
+        int chunkSize = poolableSize();
+        int numChunks = (int) (((long) totalSize + chunkSize - 1L) / chunkSize);
+        long memoryRequired = (long) numChunks * chunkSize;
+
+        // Chunks pulled from the free list during the reservation. Remaining bytes
+        // (memoryRequired - pooled.size() * chunkSize) are reserved against
+        // nonPooledAvailableMemory and materialized as raw allocations after the lock is
+        // released.
+        List<ByteBuffer> pooled = new ArrayList<>(numChunks);
+
+        lock.lock();
+        if (this.closed) {
+            lock.unlock();
+            throw new KafkaException("Producer closed while allocating memory");
+        }
+        try {
+            long freeListBytes = (long) free.size() * chunkSize;
+            if (this.nonPooledAvailableMemory + freeListBytes >= memoryRequired) {
+                // Enough memory available to allocate the chunks needed
+                while (pooled.size() < numChunks && !free.isEmpty())
+                    pooled.add(free.pollFirst());
+                long remainingBytes = memoryRequired - (long) pooled.size() * chunkSize;
+                if (remainingBytes > 0) {
+                    // remainingBytes is bounded by memoryRequired ≤ totalMemory; the entry check
+                    // rejects requests larger than that, so the int cast is safe in practice.
+                    freeUp((int) remainingBytes);
+                    this.nonPooledAvailableMemory -= remainingBytes;
+                }
+            } else {
+                // Not enough memory available to allocate the chunks needed, so we need to wait for memory.
+                // Same as in BufferPool.allocate, but wait to acquire the memory needed for all the chunks.
+                // A single Condition is added to the waiter's list to ensure FIFO fairness at the request level.
+                //
+                // `accumulated` tracks bytes drawn from nonPooledAvailableMemory only (pool chunks
+                // already taken live in `pooled`). Matches BufferPool.allocate's semantics: on
+                // failure, `accumulated` is exactly the amount to refund; on success it is reset to 0.
+                long accumulated = 0;
+                Condition moreMemory = lock.newCondition();
+                try {
+                    long remainingTimeToBlockNs = TimeUnit.MILLISECONDS.toNanos(maxTimeToBlockMs);
+                    waiters.addLast(moreMemory);
+                    while ((long) pooled.size() * chunkSize + accumulated < memoryRequired) {
+                        long startWaitNs = time.nanoseconds();
+                        long timeNs;
+                        boolean waitingTimeElapsed;
+                        try {
+                            waitingTimeElapsed = !moreMemory.await(remainingTimeToBlockNs, TimeUnit.NANOSECONDS);
+                        } finally {
+                            long endWaitNs = time.nanoseconds();
+                            timeNs = Math.max(0L, endWaitNs - startWaitNs);
+                            recordWaitTime(timeNs);
+                        }
+
+                        if (this.closed)
+                            throw new KafkaException("Producer closed while allocating memory");
+
+                        if (waitingTimeElapsed) {
+                            throw new BufferExhaustedException("Failed to allocate " + memoryRequired
+                                + " bytes (" + numChunks + " chunks of " + chunkSize
+                                + ") within the configured max blocking time " + maxTimeToBlockMs
+                                + " ms. Total memory: " + totalMemory() + " bytes. Available memory: "
+                                + availableMemory() + " bytes.");
+                        }
+
+                        remainingTimeToBlockNs -= timeNs;
+
+                        // Take pool chunks first, then reserve non-pool bytes for the remainder.
+                        // Pool chunks don't bump `accumulated` because their refund path is
+                        // `free.addFirst` in the outer catch, not nonPool.
+                        while (pooled.size() < numChunks
+                                && (long) (pooled.size() + 1) * chunkSize + accumulated <= memoryRequired
+                                && !free.isEmpty()) {
+                            pooled.add(free.pollFirst());
+                        }
+                        long stillNeeded = memoryRequired - (long) pooled.size() * chunkSize - accumulated;
+                        if (stillNeeded > 0) {
+                            freeUp((int) stillNeeded);
+                            long got = Math.min(stillNeeded, this.nonPooledAvailableMemory);
+                            this.nonPooledAvailableMemory -= got;
+                            accumulated += got;
+                        }
+                    }
+                    // Loop terminated normally — clear the rollback marker (mirrors BufferPool.allocate).
+                    accumulated = 0;
+                } finally {
+                    // On failure (timeout / close / interrupt), refund the non-pool bytes taken.
+                    // Pool chunks already in `pooled` are returned to `free` separately by the
+                    // outer catch.
+                    this.nonPooledAvailableMemory += accumulated;
+                    waiters.remove(moreMemory);
+                }
+            }
+        } catch (RuntimeException | InterruptedException e) {
+            // Any chunks taken from the pool must be returned.
+            for (ByteBuffer chunk : pooled)
+                free.addFirst(chunk);
+            throw e;
+        } finally {
+            try {
+                signalNextWaiterIfMemoryAvailable();
+            } finally {
+                lock.unlock();
+            }
+        }
+
+        // Allocate raw chunks for the reserved non-pooled portion outside the lock. On OOM,
+        // refund all reserved bytes (memoryRequired) and let the next waiter try.
+        int chunksStillNeeded = numChunks - pooled.size();
+        List<ByteBuffer> result = new ArrayList<>(numChunks);
+        result.addAll(pooled);
+        boolean error = true;
+        try {
+            for (int i = 0; i < chunksStillNeeded; i++)
+                result.add(allocateByteBuffer(chunkSize));
+            error = false;
+            return result;
+        } finally {
+            if (error) {
+                // The pooled buffers we already drained are also lost on this path; mirror
+                // BufferPool.safeAllocateByteBuffer's behaviour (the bytes return, the ByteBuffer
+                // instances become garbage).
+                releaseReservedBytes(memoryRequired);
+            }
+        }
+    }
+}
diff --git a/clients/src/main/java/org/apache/kafka/clients/producer/internals/ChunkedByteBufferOutputStream.java b/clients/src/main/java/org/apache/kafka/clients/producer/internals/ChunkedByteBufferOutputStream.java
new file mode 100644
index 0000000000000..ccf1d3d7374c9
--- /dev/null
+++ b/clients/src/main/java/org/apache/kafka/clients/producer/internals/ChunkedByteBufferOutputStream.java
@@ -0,0 +1,246 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.kafka.clients.producer.internals;
+
+import org.apache.kafka.common.utils.ByteBufferOutputStream;
+
+import java.nio.ByteBuffer;
+import java.util.ArrayList;
+import java.util.List;
+
+/**
+ * A {@link ByteBufferOutputStream} backed by a linked list of fixed-size chunks instead of a
+ * single re-allocated buffer.
+ * <p>
+ * <b>This stream does not grow on its own.</b> Chunks are supplied by the caller — initial
+ * chunks via the constructor, additional chunks via {@link #addBuffers(List)}. When a write's
+ * size exceeds the stream's {@link #remaining()} (sum of free bytes across all attached
+ * chunks), {@link IllegalStateException} is thrown. The caller is responsible for attaching
+ * additional chunks before any such write.
+ * <p>
+ * Automatic mid-write growth (allocating a chunk from inside {@code write()}) will be added
+ * together with compression support in a follow-up: compressor buffering can cause a record's
+ * write to exceed its reservation, which the caller can't predict in advance. Until then, the
+ * write path stays allocation-free and deterministic.
+ * <p>
+ * When {@link #buffer()} is called (typically at batch close), all chunks are flattened into a
+ * single contiguous ByteBuffer — exactly one copy operation.
+ */
+public class ChunkedByteBufferOutputStream extends ByteBufferOutputStream {
+
+    private final List<ByteBuffer> chunks;
+    private final int chunkSize;
+    private final BufferPool pool;
+    private ByteBuffer currentChunk;
+    private int currentChunkIndex;
+    private ByteBuffer flattenedBuffer;
+    private boolean dirty;
+
+    /**
+     * Constructs a chunked output stream backed by the given pre-allocated chunks. Ownership of
+     * {@code initialChunks} transfers to this stream — they will be returned to the pool via
+     * {@link #deallocate()}.
+     *
+     * @param initialChunks pre-allocated chunks; must be non-empty; each chunk's capacity must
+     *                      equal {@code chunkSize}
+     * @param chunkSize     the size of each chunk in bytes
+     * @param pool          the buffer pool used for deallocation
+     */
+    @SuppressWarnings("this-escape")
+    public ChunkedByteBufferOutputStream(List<ByteBuffer> initialChunks, int chunkSize, BufferPool pool) {
+        super(initialChunks.get(0));
+        this.chunkSize = chunkSize;
+        this.pool = pool;
+        this.chunks = new ArrayList<>(initialChunks);
+        this.currentChunk = this.chunks.get(0);
+        this.currentChunkIndex = 0;
+        this.dirty = true;
+    }
+
+    @Override
+    public void write(int b) {
+        ensureChunkCapacity(1);
+        currentChunk.put((byte) b);
+        dirty = true;
+    }
+
+    @Override
+    public void write(byte[] bytes, int off, int len) {
+        while (len > 0) {
+            ensureChunkCapacity(1);
+            int toWrite = Math.min(len, currentChunk.remaining());
+            currentChunk.put(bytes, off, toWrite);
+            off += toWrite;
+            len -= toWrite;
+        }
+        dirty = true;
+    }
+
+    @Override
+    public void write(ByteBuffer sourceBuffer) {
+        while (sourceBuffer.hasRemaining()) {
+            ensureChunkCapacity(1);
+            int toWrite = Math.min(sourceBuffer.remaining(), currentChunk.remaining());
+            int oldLimit = sourceBuffer.limit();
+            sourceBuffer.limit(sourceBuffer.position() + toWrite);
+            currentChunk.put(sourceBuffer);
+            sourceBuffer.limit(oldLimit);
+        }
+        dirty = true;
+    }
+
+    private void ensureChunkCapacity(int needed) {
+        if (currentChunk.remaining() < needed) {
+            advanceToNextChunk();
+        }
+    }
+
+    /**
+     * Advances {@code currentChunk} to the next pre-supplied chunk. The caller is responsible
+     * for obtaining additional chunks (e.g. {@code ChunkedBufferPool.allocateChunks}) and
+     * attaching them via {@link #addBuffers(List)} before any write that would exceed
+     * {@link #remaining()}. The KIP's mid-record growth path (non-blocking pool then direct
+     * heap) lands together with compression support.
+     */
+    private void advanceToNextChunk() {
+        if (currentChunkIndex + 1 >= chunks.size()) {
+            throw new IllegalStateException(
+                "No more chunks available; the write exceeded the stream's remaining capacity. "
+                    + "Caller must obtain additional chunks (e.g. from ChunkedBufferPool) and attach them "
+                    + "via addBuffers before any write whose size exceeds remaining()");
+        }
+        currentChunkIndex++;
+        currentChunk = chunks.get(currentChunkIndex);
+    }
+
+    /**
+     * Appends pre-allocated chunks to this stream. Ownership of {@code newChunks} transfers to
+     * the stream; they will be returned to the pool via {@link #deallocate()}.
+     */
+    public void addBuffers(List<ByteBuffer> newChunks) {
+        chunks.addAll(newChunks);
+    }
+
+    @Override
+    public ByteBuffer buffer() {
+        if (flattenedBuffer != null && !dirty) {
+            return flattenedBuffer;
+        }
+        int totalSize = 0;
+        for (ByteBuffer chunk : chunks) {
+            totalSize += chunk.position();
+        }
+        flattenedBuffer = ByteBuffer.allocate(totalSize);
+        for (ByteBuffer chunk : chunks) {
+            int chunkPos = chunk.position();
+            chunk.flip();
+            flattenedBuffer.put(chunk);
+            chunk.limit(chunk.capacity());
+            chunk.position(chunkPos);
+        }
+        dirty = false;
+        return flattenedBuffer;
+    }
+
+    /**
+     * Total bytes written across all chunks.
+     */
+    @Override
+    public int position() {
+        int total = 0;
+        for (ByteBuffer chunk : chunks) {
+            total += chunk.position();
+        }
+        return total;
+    }
+
+    /**
+     * Sets the write position, walking across pre-supplied chunks if the requested position
+     * exceeds the first chunk's capacity. Only valid before any write.
+     */
+    @Override
+    public void position(int position) {
+        if (currentChunkIndex != 0 || currentChunk.position() != 0) {
+            throw new IllegalStateException("position() can only be called before any writes");
+        }
+        int remaining = position;
+        int idx = 0;
+        while (remaining > 0 && idx < chunks.size()) {
+            ByteBuffer chunk = chunks.get(idx);
+            int take = Math.min(remaining, chunk.capacity());
+            chunk.position(take);
+            remaining -= take;
+            if (remaining > 0)
+                idx++;
+        }
+        if (remaining > 0) {
+            throw new IllegalArgumentException("position " + position
+                + " exceeds total pre-allocated capacity");
+        }
+        currentChunkIndex = idx;
+        currentChunk = chunks.get(idx);
+        dirty = true;
+    }
+
+    /**
+     * Total bytes available across the current chunk and every queued (not-yet-active) chunk.
+     */
+    @Override
+    public int remaining() {
+        int total = currentChunk.remaining();
+        for (int i = currentChunkIndex + 1; i < chunks.size(); i++)
+            total += chunks.get(i).remaining();
+        return total;
+    }
+
+    @Override
+    public int limit() {
+        return Integer.MAX_VALUE;
+    }
+
+    @Override
+    public int initialCapacity() {
+        return chunks.isEmpty() ? chunkSize : chunks.get(0).capacity();
+    }
+
+    @Override
+    public void ensureRemaining(int remainingBytesRequired) {
+        // The stream advances one chunk at a time — the most any single ensureChunkCapacity
+        // call can guarantee is `chunkSize` of contiguous-by-chunk space. Callers needing more
+        // than that are expected to attach chunks via addBuffers before writing; write(byte[])
+        // itself loops across chunks so contiguous capacity isn't required.
+        ensureChunkCapacity(Math.min(remainingBytesRequired, chunkSize));
+    }
+
+    /**
+     * Returns all pool-allocated chunks to the buffer pool.
+     */
+    public void deallocate(BufferPool pool) {
+        if (pool != null) {
+            for (ByteBuffer chunk : chunks) {
+                pool.deallocate(chunk);
+            }
+        }
+        chunks.clear();
+        currentChunk = null;
+        flattenedBuffer = null;
+    }
+
+    public void deallocate() {
+        deallocate(pool);
+    }
+}
diff --git a/clients/src/main/java/org/apache/kafka/clients/producer/internals/ChunkedRecordAccumulator.java b/clients/src/main/java/org/apache/kafka/clients/producer/internals/ChunkedRecordAccumulator.java
new file mode 100644
index 0000000000000..cac18c608d195
--- /dev/null
+++ b/clients/src/main/java/org/apache/kafka/clients/producer/internals/ChunkedRecordAccumulator.java
@@ -0,0 +1,336 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.kafka.clients.producer.internals;
+
+import org.apache.kafka.clients.producer.BufferExhaustedException;
+import org.apache.kafka.clients.producer.RecordMetadata;
+import org.apache.kafka.common.Cluster;
+import org.apache.kafka.common.compress.Compression;
+import org.apache.kafka.common.header.Header;
+import org.apache.kafka.common.metrics.Metrics;
+import org.apache.kafka.common.record.TimestampType;
+import org.apache.kafka.common.record.internal.AbstractRecords;
+import org.apache.kafka.common.record.internal.CompressionType;
+import org.apache.kafka.common.record.internal.MemoryRecordsBuilder;
+import org.apache.kafka.common.record.internal.Record;
+import org.apache.kafka.common.record.internal.RecordBatch;
+import org.apache.kafka.common.utils.Time;
+import org.apache.kafka.common.utils.internals.LogContext;
+
+import java.nio.ByteBuffer;
+import java.util.ArrayDeque;
+import java.util.Deque;
+import java.util.List;
+
+/**
+ * A {@link RecordAccumulator} variant that uses chunked buffer allocation: each new batch
+ * pre-allocates {@code ceil(recordSize / chunkSize)} chunks from a {@link ChunkedBufferPool},
+ * and grows mid-batch by attaching additional chunks via
+ * {@link ProducerBatch#addBuffers(List)} when later records would overflow.
+ * <p>
+ * This is the "dynamic" buffer.memory allocation strategy: memory consumption scales with
+ * actual data buffered, not with {@code active_partition_count × batch.size}.
+ * <p>
+ * <b>PR1 scope:</b> uncompressed only. Batch creation throws
+ * {@link UnsupportedOperationException} if compression is requested. The mid-batch flow
+ * matches the KIP: try a non-blocking pool acquire for the extension chunks; on
+ * {@link BufferExhaustedException}, close the existing batch (so the sender can drain it)
+ * and let the new record flow through the first-record path, which blocks on the pool for
+ * a fresh batch's chunks up to {@code max.block.ms}.
+ * <p>
+ * TODO: add the new buffer.memory.allocation.strategy config and instantiate this class only when set to "dynamic"
+ * TODO: add support for compressed data. Throws UnsupportedOperationException for now,
+ *   and IllegalStateException if overflow detected. Follow-ups will support compressed data and growth.
+ */
+public class ChunkedRecordAccumulator extends RecordAccumulator {
+
+    private final ChunkedBufferPool chunkedFree;
+
+    public ChunkedRecordAccumulator(LogContext logContext,
+                                    int batchSize,
+                                    Compression compression,
+                                    int lingerMs,
+                                    long retryBackoffMs,
+                                    long retryBackoffMaxMs,
+                                    int deliveryTimeoutMs,
+                                    PartitionerConfig partitionerConfig,
+                                    Metrics metrics,
+                                    String metricGrpName,
+                                    Time time,
+                                    TransactionManager transactionManager,
+                                    ChunkedBufferPool bufferPool) {
+        super(logContext, batchSize, compression, lingerMs, retryBackoffMs, retryBackoffMaxMs,
+                deliveryTimeoutMs, partitionerConfig, metrics, metricGrpName, time, transactionManager, bufferPool);
+        this.chunkedFree = bufferPool;
+    }
+
+    public ChunkedRecordAccumulator(LogContext logContext,
+                                    int batchSize,
+                                    Compression compression,
+                                    int lingerMs,
+                                    long retryBackoffMs,
+                                    long retryBackoffMaxMs,
+                                    int deliveryTimeoutMs,
+                                    Metrics metrics,
+                                    String metricGrpName,
+                                    Time time,
+                                    TransactionManager transactionManager,
+                                    ChunkedBufferPool bufferPool) {
+        this(logContext, batchSize, compression, lingerMs, retryBackoffMs, retryBackoffMaxMs,
+                deliveryTimeoutMs, new PartitionerConfig(), metrics, metricGrpName, time, transactionManager,
+                bufferPool);
+    }
+
+    @Override
+    public RecordAppendResult append(String topic,
+                                     int partition,
+                                     long timestamp,
+                                     byte[] key,
+                                     byte[] value,
+                                     Header[] headers,
+                                     AppendCallbacks callbacks,
+                                     long maxTimeToBlock,
+                                     long nowMs,
+                                     Cluster cluster) throws InterruptedException {
+        TopicInfo topicInfo = topicInfoMap.computeIfAbsent(topic,
+                k -> new TopicInfo(createBuiltInPartitioner(logContext, k, batchSize, partitionerRackAware, rack)));
+
+        appendsInProgress.incrementAndGet();
+        ChunkedByteBufferOutputStream bufferStream = null;
+        List<ByteBuffer> extensionChunks = null;
+        int extensionBytes = 0;
+        if (headers == null) headers = Record.EMPTY_HEADERS;
+        try {
+            while (true) {
+                final BuiltInPartitioner.StickyPartitionInfo partitionInfo;
+                final int effectivePartition;
+                if (partition == RecordMetadata.UNKNOWN_PARTITION) {
+                    partitionInfo = topicInfo.builtInPartitioner.peekCurrentPartitionInfo(cluster);
+                    effectivePartition = partitionInfo.partition();
+                } else {
+                    partitionInfo = null;
+                    effectivePartition = partition;
+                }
+                setPartition(callbacks, effectivePartition);
+
+                Deque<ProducerBatch> dq = topicInfo.batches.computeIfAbsent(effectivePartition, k -> new ArrayDeque<>());
+                synchronized (dq) {
+                    if (partitionChanged(topic, topicInfo, partitionInfo, dq, nowMs, cluster))
+                        continue;
+
+                    // Probe the tail directly. If the existing batch has logical room but the
+                    // underlying stream lacks physical capacity, capture the gap and fall through
+                    // to allocate the extension outside the deque lock. Otherwise, attempt the
+                    // append against the existing batch.
+                    ProducerBatch tail = dq.peekLast();
+                    int gap = tail == null ? 0 : tail.extensionBytesNeeded(timestamp, key, value, headers);
+                    if (gap > 0) {
+                        extensionBytes = gap;
+                    } else {
+                        extensionBytes = 0;  // clear any stale value carried from a prior iteration
+                        RecordAppendResult appendResult = tryAppend(timestamp, key, value, headers, callbacks, dq, nowMs);
+                        if (appendResult != null) {
+                            boolean enableSwitch = allBatchesFull(dq);
+                            topicInfo.builtInPartitioner.updatePartitionInfo(partitionInfo, appendResult.appendedBytes, cluster, enableSwitch);
+                            return appendResult;
+                        }
+                    }
+                }
+
+                if (extensionBytes > 0 && extensionChunks == null) {
+                    // Mid-batch extension: non-blocking attempt only. The producer thread already
+                    // holds chunks for the open batch; blocking here risks deadlock with the Sender
+                    // thread (which frees pool memory by completing batches). If the pool is
+                    // exhausted, we close the existing batch (so it becomes drainable) and fall
+                    // through to the first-record blocking path on the next loop iteration.
+                    try {
+                        extensionChunks = chunkedFree.allocateChunks(extensionBytes, 0L);
+                    } catch (BufferExhaustedException e) {
+                        log.trace("Pool exhausted while extending batch for topic {} partition {}; closing existing batch",
+                                topic, effectivePartition);
+                        synchronized (dq) {
+                            ProducerBatch last = dq.peekLast();
+                            if (last != null && last.isWritable()) {
+                                last.closeForRecordAppends();
+                            }
+                        }
+                        extensionBytes = 0;
+                        continue;
+                    }
+                    nowMs = time.milliseconds();
+                } else if (extensionBytes == 0 && bufferStream == null) {
+                    // First-record path: block on the pool for enough chunks to fit this record.
+                    // Temporary while compressed data not supported.
+                    // TODO: drop this throw and route through the
+                    //   compressed path (which adds the mid-record growth fallback for compressor overshoot).
+                    if (compression.type() != CompressionType.NONE) {
+                        throw new UnsupportedOperationException(
+                                "Compression is not implemented yet for the dynamic buffer.memory allocation strategy");
+                    }
+                    int size = AbstractRecords.estimateSizeInBytesUpperBound(
+                            RecordBatch.CURRENT_MAGIC_VALUE, compression.type(), key, value, headers);
+                    log.trace("Allocating {} byte chunked buffer ({} byte chunks) for topic {} partition {} with remaining timeout {}ms",
+                            size, chunkedFree.poolableSize(), topic, effectivePartition, maxTimeToBlock);
+                    List<ByteBuffer> initialChunks = chunkedFree.allocateChunks(size, maxTimeToBlock);
+                    nowMs = time.milliseconds();
+                    bufferStream = new ChunkedByteBufferOutputStream(initialChunks, chunkedFree.poolableSize(), chunkedFree);
+                }
+
+                synchronized (dq) {
+                    if (partitionChanged(topic, topicInfo, partitionInfo, dq, nowMs, cluster))
+                        continue;
+
+                    if (extensionChunks != null) {
+                        ProducerBatch last = dq.peekLast();
+                        // The off-lock allocateChunks window allows the original tail batch to be
+                        // drained and a split batch (plain ByteBufferOutputStream) to be addFirst'd
+                        // into an emptied deque — see splitAndReenqueue. The instanceof guard below
+                        // is load-bearing: addBuffers is only defined on ChunkedByteBufferOutputStream;
+                        // the else branch returns the chunks to the pool to avoid leaking them.
+                        if (last != null && last.isWritable()
+                                && last.bufferStream() instanceof ChunkedByteBufferOutputStream) {
+                            ((ChunkedByteBufferOutputStream) last.bufferStream()).addBuffers(extensionChunks);
+                            extensionChunks = null;
+                            extensionBytes = 0;
+                            // Re-probe after attach: between our first-lock probe and now we were off-lock,
+                            // and another appender (or a tail-replacement) may have consumed capacity that
+                            // our extension allocation was sized against. If a gap remains, loop to acquire
+                            // more chunks against the now-current state. Without this re-probe, a
+                            // tryAppend that passes hasRoomFor (writeLimit) but exceeds the stream's
+                            // physical remaining() trips IllegalStateException in
+                            // ChunkedByteBufferOutputStream.advanceToNextChunk. Termination: writeLimit is
+                            // bounded and fixed; once hasRoomFor turns false, extensionBytesNeeded returns 0
+                            // and we fall through to new-batch creation. Regression test:
+                            // testConcurrentExtensionRaceDoesNotOverflowChunkedStream.
+                            int recheck = last.extensionBytesNeeded(timestamp, key, value, headers);
+                            if (recheck > 0) {
+                                extensionBytes = recheck;
+                                continue;
+                            }
+                            RecordAppendResult retryResult = tryAppend(timestamp, key, value, headers, callbacks, dq, nowMs);
+                            if (retryResult != null) {
+                                boolean enableSwitch = allBatchesFull(dq);
+                                topicInfo.builtInPartitioner.updatePartitionInfo(partitionInfo, retryResult.appendedBytes, cluster, enableSwitch);
+                                return retryResult;
+                            }
+                            // Batch became full via writeLimit while we were off-lock — fall through
+                            // to new-batch creation on the next iteration.
+                            continue;
+                        }
+                        // Tail is gone, closed, or non-chunked (e.g., a split batch). Return chunks to pool.
+                        for (ByteBuffer chunk : extensionChunks)
+                            chunkedFree.deallocate(chunk);
+                        extensionChunks = null;
+                        extensionBytes = 0;
+                        continue;
+                    }
+
+                    // bufferStream is non-null here (first-record path). Before creating a new
+                    // batch with it, re-probe the tail: a concurrent appender may have created a
+                    // chunked batch we should extend instead. If so, release the oversized
+                    // bufferStream and let the next iteration take the extension path.
+                    ProducerBatch tail = dq.peekLast();
+                    if (tail != null && tail.isWritable()
+                            && tail.bufferStream() instanceof ChunkedByteBufferOutputStream
+                            && tail.extensionBytesNeeded(timestamp, key, value, headers) > 0) {
+                        bufferStream.deallocate();
+                        bufferStream = null;
+                        continue;
+                    }
+
+                    int firstRecordSize = AbstractRecords.estimateSizeInBytesUpperBound(
+                            RecordBatch.CURRENT_MAGIC_VALUE, compression.type(), key, value, headers);
+                    final ChunkedByteBufferOutputStream batchStream = bufferStream;
+                    RecordAppendResult appendResult = appendNewBatch(topic, effectivePartition, dq, timestamp, key, value, headers, callbacks,
+                            () -> chunkedRecordsBuilder(batchStream, firstRecordSize), nowMs);
+                    if (appendResult.newBatchCreated)
+                        bufferStream = null;
+                    boolean enableSwitch = allBatchesFull(dq);
+                    topicInfo.builtInPartitioner.updatePartitionInfo(partitionInfo, appendResult.appendedBytes, cluster, enableSwitch);
+                    return appendResult;
+                }
+            }
+        } finally {
+            if (bufferStream != null)
+                bufferStream.deallocate();
+            if (extensionChunks != null) {
+                for (ByteBuffer chunk : extensionChunks)
+                    chunkedFree.deallocate(chunk);
+            }
+            appendsInProgress.decrementAndGet();
+        }
+    }
+
+    /**
+     * Build a {@link MemoryRecordsBuilder} backed by the chunked stream.
+     * {@code writeLimit} is set to {@code max(batchSize, firstRecordSize)} — the same logical
+     * cap the legacy path uses. Physical chunk capacity grows on demand via
+     * {@link ChunkedByteBufferOutputStream#addBuffers(List)}; {@code writeLimit} bounds when a
+     * batch is considered logically full and a new one must be started.
+     */
+    private MemoryRecordsBuilder chunkedRecordsBuilder(ChunkedByteBufferOutputStream bufferStream,
+                                                       int firstRecordSize) {
+        int writeLimit = Math.max(batchSize, firstRecordSize);
+        return new MemoryRecordsBuilder(bufferStream, RecordBatch.CURRENT_MAGIC_VALUE, compression,
+                TimestampType.CREATE_TIME, 0L, RecordBatch.NO_TIMESTAMP, RecordBatch.NO_PRODUCER_ID,
+                RecordBatch.NO_PRODUCER_EPOCH, RecordBatch.NO_SEQUENCE, false, false,
+                RecordBatch.NO_PARTITION_LEADER_EPOCH, writeLimit);
+    }
+
+    /**
+     * Chunked-aware deallocation for {@link ProducerBatch}.
+     * <p>
+     * Intercepts the {@link #isInflight()} branch of the parent's
+     * {@link RecordAccumulator#deallocate(ProducerBatch)} when the batch is backed by a
+     * {@link ChunkedByteBufferOutputStream}. The parent's defensive path credits only
+     * {@code initialCapacity} (one {@code chunkSize}) back to the pool and throws — that's the
+     * KAFKA-19012-aware safety net for the legacy single-buffer path, where the in-flight
+     * buffer <i>is</i> the pooled buffer. A chunked batch holds K chunks but the in-flight
+     * network bytes live in a separate flattened heap buffer (see
+     * {@link ChunkedByteBufferOutputStream#buffer()}), so it is safe to return all K chunks
+     * to the pool here. The {@code IllegalStateException} is still propagated to preserve the
+     * contract that deallocating an inflight batch is unexpected upstream.
+     */
+    @Override
+    public void deallocate(ProducerBatch batch) {
+        if (!batch.isSplitBatch()
+                && !batch.isBufferDeallocated()
+                && batch.isInflight()
+                && batch.bufferStream() instanceof ChunkedByteBufferOutputStream) {
+            batch.markBufferDeallocated();
+            deallocateBatchBuffer(batch);
+            throw new IllegalStateException("Attempting to deallocate a batch that is inflight. Batch is " + batch);
+        }
+        super.deallocate(batch);
+    }
+
+    /**
+     * Route chunked batch deallocation through the stream rather than the legacy
+     * single-buffer pool path. The flattened buffer returned by {@code batch.buffer()} is a
+     * fresh heap allocation and must NOT be passed to the pool — instead, the stream owns
+     * the chunk-sized buffers and returns them to the pool.
+     */
+    @Override
+    protected void deallocateBatchBuffer(ProducerBatch batch) {
+        if (batch.bufferStream() instanceof ChunkedByteBufferOutputStream) {
+            ((ChunkedByteBufferOutputStream) batch.bufferStream()).deallocate(chunkedFree);
+        } else {
+            // Split batches and any non-chunked batches go through the default deallocation.
+            super.deallocateBatchBuffer(batch);
+        }
+    }
+}
diff --git a/clients/src/main/java/org/apache/kafka/clients/producer/internals/ProducerBatch.java b/clients/src/main/java/org/apache/kafka/clients/producer/internals/ProducerBatch.java
index 14d0d6ef6334d..a7e47583d310b 100644
--- a/clients/src/main/java/org/apache/kafka/clients/producer/internals/ProducerBatch.java
+++ b/clients/src/main/java/org/apache/kafka/clients/producer/internals/ProducerBatch.java
@@ -31,6 +31,7 @@
 import org.apache.kafka.common.record.internal.Record;
 import org.apache.kafka.common.record.internal.RecordBatch;
 import org.apache.kafka.common.requests.ProduceResponse;
+import org.apache.kafka.common.utils.ByteBufferOutputStream;
 import org.apache.kafka.common.utils.ProducerIdAndEpoch;
 import org.apache.kafka.common.utils.Time;
 
@@ -139,6 +140,38 @@ boolean hasLeaderChangedForTheOngoingRetry() {
     }
 
 
+    /**
+     * Bytes of physical buffer this batch needs before {@code tryAppend} could accept the given
+     * record. Returns 0 when the record fits (either logically full, or stream has room).
+     * Positive when {@code hasRoomFor} allows the record but the underlying stream lacks
+     * physical capacity — used by the dynamic strategy to allocate exactly the missing bytes
+     * before retrying.
+     * <p>
+     * For batches with the default {@link ByteBufferOutputStream} (legacy path), the stream's
+     * {@code remaining()} is large enough that this almost always returns 0.
+     */
+    int extensionBytesNeeded(long timestamp, byte[] key, byte[] value, Header[] headers) {
+        if (recordCount == 0)
+            return 0;
+        if (!recordsBuilder.hasRoomFor(timestamp, key, value, headers))
+            return 0;
+        // Upper-bound estimate is fine: over-counting by header bytes only over-allocates by a
+        // small fraction of one chunk. Under-estimation would be unsafe.
+        int recordSize = AbstractRecords.estimateSizeInBytesUpperBound(
+                magic(), recordsBuilder.compression().type(), key, value, headers);
+        int gap = recordSize - recordsBuilder.bufferStream().remaining();
+        return Math.max(0, gap);
+    }
+
+    /**
+     * The batch's underlying output stream. Exposed for the dynamic strategy to attach
+     * extension chunks and route deallocation through the chunked stream rather than the
+     * standard {@code BufferPool.deallocate(ByteBuffer, int)} path.
+     */
+    ByteBufferOutputStream bufferStream() {
+        return recordsBuilder.bufferStream();
+    }
+
     /**
      * Append the record to the current record set and return the relative offset within that record set
      *
diff --git a/clients/src/main/java/org/apache/kafka/clients/producer/internals/RecordAccumulator.java b/clients/src/main/java/org/apache/kafka/clients/producer/internals/RecordAccumulator.java
index b1eced17e4f25..29ddd07a489bc 100644
--- a/clients/src/main/java/org/apache/kafka/clients/producer/internals/RecordAccumulator.java
+++ b/clients/src/main/java/org/apache/kafka/clients/producer/internals/RecordAccumulator.java
@@ -58,6 +58,7 @@
 import java.util.Set;
 import java.util.concurrent.ConcurrentMap;
 import java.util.concurrent.atomic.AtomicInteger;
+import java.util.function.Supplier;
 
 /**
  * This class acts as a queue that accumulates records into {@link MemoryRecords}
@@ -68,23 +69,23 @@
  */
 public class RecordAccumulator {
 
-    private final LogContext logContext;
-    private final Logger log;
-    private volatile boolean closed;
+    protected final LogContext logContext;
+    protected final Logger log;
+    protected volatile boolean closed;
     private final AtomicInteger flushesInProgress;
-    private final AtomicInteger appendsInProgress;
-    private final int batchSize;
-    private final Compression compression;
+    protected final AtomicInteger appendsInProgress;
+    protected final int batchSize;
+    protected final Compression compression;
     private final int lingerMs;
     private final ExponentialBackoff retryBackoff;
     private final int deliveryTimeoutMs;
     private final long partitionAvailabilityTimeoutMs;  // latency threshold for marking partition temporary unavailable
-    private final boolean partitionerRackAware;
-    private final String rack;
+    protected final boolean partitionerRackAware;
+    protected final String rack;
     private final boolean enableAdaptivePartitioning;
     private final BufferPool free;
-    private final Time time;
-    private final ConcurrentMap<String /*topic*/, TopicInfo> topicInfoMap = new CopyOnWriteMap<>();
+    protected final Time time;
+    protected final ConcurrentMap<String /*topic*/, TopicInfo> topicInfoMap = new CopyOnWriteMap<>();
     private final ConcurrentMap<Integer /*nodeId*/, NodeLatencyStats> nodeStats = new CopyOnWriteMap<>();
     private final IncompleteBatches incomplete;
     // The following variables are only accessed by the sender thread, so we don't need to protect them.
@@ -217,7 +218,7 @@ private void registerMetrics(Metrics metrics, String metricGrpName) {
             (config, now) -> free.availableMemory());
     }
 
-    private void setPartition(AppendCallbacks callbacks, int partition) {
+    protected void setPartition(AppendCallbacks callbacks, int partition) {
         if (callbacks != null)
             callbacks.setPartition(partition);
     }
@@ -234,7 +235,7 @@ private void setPartition(AppendCallbacks callbacks, int partition) {
      * @return 'true' if partition changed and we need to get new partition info and retry,
      *         'false' otherwise
      */
-    private boolean partitionChanged(String topic,
+    protected boolean partitionChanged(String topic,
                                      TopicInfo topicInfo,
                                      BuiltInPartitioner.StickyPartitionInfo partitionInfo,
                                      Deque<ProducerBatch> deque, long nowMs,
@@ -347,7 +348,10 @@ public RecordAppendResult append(String topic,
                     if (partitionChanged(topic, topicInfo, partitionInfo, dq, nowMs, cluster))
                         continue;
 
-                    RecordAppendResult appendResult = appendNewBatch(topic, effectivePartition, dq, timestamp, key, value, headers, callbacks, buffer, nowMs);
+                    final ByteBuffer batchBuffer = buffer;
+                    RecordAppendResult appendResult = appendNewBatch(topic, effectivePartition, dq, timestamp, key, value, headers, callbacks,
+                            () -> MemoryRecords.builder(batchBuffer, RecordBatch.CURRENT_MAGIC_VALUE, compression, TimestampType.CREATE_TIME, 0L),
+                            nowMs);
                     // Set buffer to null, so that deallocate doesn't return it back to free pool, since it's used in the batch.
                     if (appendResult.newBatchCreated)
                         buffer = null;
@@ -374,10 +378,13 @@ public RecordAppendResult append(String topic,
      * @param value The value for the record
      * @param headers the Headers for the record
      * @param callbacks The callbacks to execute
-     * @param buffer The buffer for the new batch
+     * @param recordsBuilderSupplier Supplies the {@link MemoryRecordsBuilder} for the new
+     *        batch. Invoked lazily, only when this method is committed to creating a new
+     *        batch (i.e. after the in-lock concurrent-append race check has lost). The
+     *        chunked subclass passes a supplier that produces a stream-backed builder.
      * @param nowMs The current time, in milliseconds
      */
-    private RecordAppendResult appendNewBatch(String topic,
+    protected RecordAppendResult appendNewBatch(String topic,
                                               int partition,
                                               Deque<ProducerBatch> dq,
                                               long timestamp,
@@ -385,7 +392,7 @@ private RecordAppendResult appendNewBatch(String topic,
                                               byte[] value,
                                               Header[] headers,
                                               AppendCallbacks callbacks,
-                                              ByteBuffer buffer,
+                                              Supplier<MemoryRecordsBuilder> recordsBuilderSupplier,
                                               long nowMs) {
         assert partition != RecordMetadata.UNKNOWN_PARTITION;
 
@@ -395,7 +402,7 @@ private RecordAppendResult appendNewBatch(String topic,
             return appendResult;
         }
 
-        MemoryRecordsBuilder recordsBuilder = recordsBuilder(buffer);
+        MemoryRecordsBuilder recordsBuilder = recordsBuilderSupplier.get();
         ProducerBatch batch = new ProducerBatch(new TopicPartition(topic, partition), recordsBuilder, nowMs);
         FutureRecordMetadata future = Objects.requireNonNull(batch.tryAppend(timestamp, key, value, headers,
                 callbacks, nowMs));
@@ -406,14 +413,10 @@ private RecordAppendResult appendNewBatch(String topic,
         return new RecordAppendResult(future, dq.size() > 1 || batch.isFull(), true, batch.estimatedSizeInBytes());
     }
 
-    private MemoryRecordsBuilder recordsBuilder(ByteBuffer buffer) {
-        return MemoryRecords.builder(buffer, RecordBatch.CURRENT_MAGIC_VALUE, compression, TimestampType.CREATE_TIME, 0L);
-    }
-
     /**
      * Check if all batches in the queue are full.
      */
-    private boolean allBatchesFull(Deque<ProducerBatch> deque) {
+    protected boolean allBatchesFull(Deque<ProducerBatch> deque) {
         // Only the last batch may be incomplete, so we just check that.
         ProducerBatch last = deque.peekLast();
         return last == null || last.isFull();
@@ -427,7 +430,7 @@ private boolean allBatchesFull(Deque<ProducerBatch> deque) {
      *  and memory records built) in one of the following cases (whichever comes first): right before send,
      *  if it is expired, or when the producer is closed.
      */
-    private RecordAppendResult tryAppend(long timestamp, byte[] key, byte[] value, Header[] headers,
+    protected RecordAppendResult tryAppend(long timestamp, byte[] key, byte[] value, Header[] headers,
                                          Callback callback, Deque<ProducerBatch> deque, long nowMs) {
         if (closed)
             throw new KafkaException("Producer closed while send in progress");
@@ -1058,11 +1061,19 @@ public void deallocate(ProducerBatch batch) {
                     free.deallocate(ByteBuffer.allocate(batch.initialCapacity()));
                     throw new IllegalStateException("Attempting to deallocate a batch that is inflight. Batch is " + batch);
                 }
-                free.deallocate(batch.buffer(), batch.initialCapacity());
+                deallocateBatchBuffer(batch);
             }
         }
     }
 
+    /**
+     * Return the batch's underlying buffer to the pool.
+     * This default implementation returns the buffer at its initial capacity (single buffer).
+     */
+    protected void deallocateBatchBuffer(ProducerBatch batch) {
+        free.deallocate(batch.buffer(), batch.initialCapacity());
+    }
+
     /**
      * Remove from the incomplete list but do not free memory yet
      */
@@ -1298,7 +1309,7 @@ public ReadyCheckResult(Set<Node> readyNodes, long nextReadyCheckDelayMs, Set<St
     /**
      * Per topic info.
      */
-    private static class TopicInfo {
+    protected static class TopicInfo {
         public final ConcurrentMap<Integer /*partition*/, Deque<ProducerBatch>> batches = new CopyOnWriteMap<>();
         public final BuiltInPartitioner builtInPartitioner;
 
diff --git a/clients/src/main/java/org/apache/kafka/common/record/internal/MemoryRecordsBuilder.java b/clients/src/main/java/org/apache/kafka/common/record/internal/MemoryRecordsBuilder.java
index 3fd89deaba135..47a955cfcf18e 100644
--- a/clients/src/main/java/org/apache/kafka/common/record/internal/MemoryRecordsBuilder.java
+++ b/clients/src/main/java/org/apache/kafka/common/record/internal/MemoryRecordsBuilder.java
@@ -211,6 +211,15 @@ public int initialCapacity() {
         return bufferStream.initialCapacity();
     }
 
+    /**
+     * The underlying output stream. Exposed so the dynamic-strategy callers can attach
+     * pre-allocated chunks to a {@code ChunkedByteBufferOutputStream} and route deallocation
+     * polymorphically.
+     */
+    public ByteBufferOutputStream bufferStream() {
+        return bufferStream;
+    }
+
     public double compressionRatio() {
         return actualCompressionRatio;
     }
diff --git a/gradle/spotbugs-exclude.xml b/gradle/spotbugs-exclude.xml
index 6d18691973a45..1a56dc05037c9 100644
--- a/gradle/spotbugs-exclude.xml
+++ b/gradle/spotbugs-exclude.xml
@@ -127,6 +127,17 @@ For a detailed description of spotbugs bug categories, see https://spotbugs.read
         <Bug pattern="RV_EXCEPTION_NOT_THROWN"/>
     </Match>
 
+    <Match>
+        <!-- False positive: ChunkedRecordAccumulator.append creates a ChunkedByteBufferOutputStream
+             and either transfers ownership to a new ProducerBatch (through the recordsBuilder
+             supplier passed to appendNewBatch) or releases it via bufferStream.deallocate() in the
+             finally block. Spotbugs only recognizes close() as the cleanup method, and doesn't
+             follow ownership through the supplier lambda — verified 2026-06-05 by removing this
+             suppression and observing the warning at the constructor site. -->
+        <Class name="org.apache.kafka.clients.producer.internals.ChunkedRecordAccumulator"/>
+        <Bug pattern="OS_OPEN_STREAM"/>
+    </Match>
+
     <!-- false positive in Java 11, related to https://github.com/spotbugs/spotbugs/issues/756 but more complex -->
     <Match>
         <Class name="org.apache.kafka.common.compress.Lz4BlockOutputStream"/>
@@ -546,6 +557,7 @@ For a detailed description of spotbugs bug categories, see https://spotbugs.read
          -->
         <Or>
             <Class name="org.apache.kafka.clients.consumer.internals.CompletedFetch"/>
+            <Class name="org.apache.kafka.clients.producer.internals.BufferPool"/>
             <Class name="org.apache.kafka.clients.producer.internals.ProducerBatch"/>
             <Class name="org.apache.kafka.clients.producer.internals.RecordAccumulator"/>
             <Class name="org.apache.kafka.common.security.kerberos.KerberosLogin"/>

From 203a410dac3ec480aae2a483b26852d378df8513 Mon Sep 17 00:00:00 2001
From: Lianet Magrans <lmagrans@confluent.io>
Date: Fri, 5 Jun 2026 12:09:30 -0400
Subject: [PATCH 02/11] tests

---
 .../internals/ChunkedBufferPoolTest.java      | 422 ++++++++++++++++++
 .../ChunkedByteBufferOutputStreamTest.java    | 171 +++++++
 .../ChunkedRecordAccumulatorTest.java         | 350 +++++++++++++++
 3 files changed, 943 insertions(+)
 create mode 100644 clients/src/test/java/org/apache/kafka/clients/producer/internals/ChunkedBufferPoolTest.java
 create mode 100644 clients/src/test/java/org/apache/kafka/clients/producer/internals/ChunkedByteBufferOutputStreamTest.java
 create mode 100644 clients/src/test/java/org/apache/kafka/clients/producer/internals/ChunkedRecordAccumulatorTest.java

diff --git a/clients/src/test/java/org/apache/kafka/clients/producer/internals/ChunkedBufferPoolTest.java b/clients/src/test/java/org/apache/kafka/clients/producer/internals/ChunkedBufferPoolTest.java
new file mode 100644
index 0000000000000..e0de8ae19e4dc
--- /dev/null
+++ b/clients/src/test/java/org/apache/kafka/clients/producer/internals/ChunkedBufferPoolTest.java
@@ -0,0 +1,422 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.kafka.clients.producer.internals;
+
+import org.apache.kafka.clients.producer.BufferExhaustedException;
+import org.apache.kafka.common.metrics.Metrics;
+import org.apache.kafka.common.utils.MockTime;
+
+import org.junit.jupiter.api.AfterEach;
+import org.junit.jupiter.api.Test;
+
+import org.apache.kafka.common.KafkaException;
+
+import java.nio.ByteBuffer;
+import java.util.List;
+import java.util.concurrent.CountDownLatch;
+import java.util.concurrent.TimeUnit;
+import java.util.concurrent.atomic.AtomicReference;
+
+import static org.junit.jupiter.api.Assertions.assertEquals;
+import static org.junit.jupiter.api.Assertions.assertNotNull;
+import static org.junit.jupiter.api.Assertions.assertNull;
+import static org.junit.jupiter.api.Assertions.assertThrows;
+import static org.junit.jupiter.api.Assertions.assertTrue;
+
+public class ChunkedBufferPoolTest {
+
+    private final MockTime time = new MockTime();
+    private final Metrics metrics = new Metrics(time);
+    private final String metricGroup = "producer-metrics";
+
+    @AfterEach
+    public void teardown() {
+        metrics.close();
+    }
+
+    private ChunkedBufferPool pool(long totalMemory, int chunkSize) {
+        return new ChunkedBufferPool(totalMemory, chunkSize, metrics, time, metricGroup);
+    }
+
+    /** Single-chunk request returns a list of one buffer at the chunk size. */
+    @Test
+    public void testAllocateOneChunk() throws Exception {
+        int chunkSize = 64;
+        ChunkedBufferPool p = pool(1024, chunkSize);
+        List<ByteBuffer> chunks = p.allocateChunks(chunkSize, 100);
+        assertEquals(1, chunks.size());
+        assertEquals(chunkSize, chunks.get(0).capacity());
+    }
+
+    /** Total size that's not a multiple of chunk size rounds up; returned chunks cover it. */
+    @Test
+    public void testAllocateRoundsUpToChunkBoundary() throws Exception {
+        int chunkSize = 64;
+        ChunkedBufferPool p = pool(1024, chunkSize);
+        // 65 bytes requested → 2 chunks (128 bytes total).
+        List<ByteBuffer> chunks = p.allocateChunks(65, 100);
+        assertEquals(2, chunks.size());
+        for (ByteBuffer chunk : chunks)
+            assertEquals(chunkSize, chunk.capacity());
+    }
+
+    /** Multi-chunk request returns ceil(totalSize / chunkSize) chunks. */
+    @Test
+    public void testAllocateMultipleChunks() throws Exception {
+        int chunkSize = 64;
+        ChunkedBufferPool p = pool(1024, chunkSize);
+        List<ByteBuffer> chunks = p.allocateChunks(4 * chunkSize, 100);
+        assertEquals(4, chunks.size());
+    }
+
+    /** Pool memory accounting: after allocation, the unallocated portion shrinks by the request. */
+    @Test
+    public void testAvailableMemoryAfterAllocation() throws Exception {
+        int chunkSize = 64;
+        long total = 256;
+        ChunkedBufferPool p = pool(total, chunkSize);
+        p.allocateChunks(3 * chunkSize, 100);
+        assertEquals(total - 3L * chunkSize, p.availableMemory());
+    }
+
+    /** Returning chunks via deallocate restores pool memory. */
+    @Test
+    public void testDeallocationRestoresMemory() throws Exception {
+        int chunkSize = 64;
+        long total = 256;
+        ChunkedBufferPool p = pool(total, chunkSize);
+        List<ByteBuffer> chunks = p.allocateChunks(2 * chunkSize, 100);
+        for (ByteBuffer chunk : chunks)
+            p.deallocate(chunk);
+        assertEquals(total, p.availableMemory());
+    }
+
+    /** Requesting more than total memory throws IllegalArgumentException. */
+    @Test
+    public void testRejectsRequestExceedingTotalMemory() {
+        ChunkedBufferPool p = pool(128, 64);
+        assertThrows(IllegalArgumentException.class, () -> p.allocateChunks(129, 100));
+    }
+
+    /** Non-positive totalSize is rejected. */
+    @Test
+    public void testRejectsNonPositiveRequest() {
+        ChunkedBufferPool p = pool(128, 64);
+        assertThrows(IllegalArgumentException.class, () -> p.allocateChunks(0, 100));
+        assertThrows(IllegalArgumentException.class, () -> p.allocateChunks(-1, 100));
+    }
+
+    /**
+     * When a multi-chunk request can't be fully satisfied within the deadline, chunks already
+     * acquired during the call must be returned to the pool — the caller must not leak memory.
+     */
+    @Test
+    public void testRollbackOnPartialFailure() throws Exception {
+        int chunkSize = 64;
+        long total = 2 * chunkSize;  // only 2 chunks worth of memory
+        ChunkedBufferPool p = pool(total, chunkSize);
+        // Reserve one chunk so the pool has only 1 left.
+        ByteBuffer held = p.allocate(chunkSize, 100);
+
+        // Request 2 chunks with a zero deadline — first chunk fits, second can't, must throw.
+        assertThrows(BufferExhaustedException.class, () -> p.allocateChunks(2 * chunkSize, 0));
+
+        // Available memory must reflect only the chunk we deliberately hold; the request's
+        // first-chunk acquisition was rolled back.
+        assertEquals(total - chunkSize, p.availableMemory());
+
+        p.deallocate(held);
+        assertEquals(total, p.availableMemory());
+    }
+
+    /**
+     * No partial chunk holds during the wait. While a multi-chunk request blocks, the pool's
+     * {@code availableMemory()} must reflect bytes the waiter has not yet "earned" — i.e., the
+     * reservation is via the accumulator and not via chunks pulled out of the pool's accounting.
+     * Distinguishes the atomic implementation from the older loop-with-rollback (which held
+     * partials off the accounting between iterations).
+     */
+    @Test
+    public void testNoPartialHoldsDuringWait() throws Exception {
+        int chunkSize = 64;
+        long total = 4L * chunkSize;
+        ChunkedBufferPool p = pool(total, chunkSize);
+        // Drain the pool down to 1 chunk's worth so a 2-chunk request can't be satisfied immediately.
+        ByteBuffer h1 = p.allocate(chunkSize, 100);
+        ByteBuffer h2 = p.allocate(chunkSize, 100);
+        ByteBuffer h3 = p.allocate(chunkSize, 100);
+        long available = p.availableMemory();
+        assertEquals(chunkSize, available, "pool should have exactly 1 chunk free");
+
+        // Background thread requests 2 chunks; will block on the 2nd.
+        AtomicReference<Throwable> err = new AtomicReference<>();
+        Thread t = new Thread(() -> {
+            try {
+                p.allocateChunks(2 * chunkSize, 5_000);
+            } catch (Throwable th) {
+                err.set(th);
+            }
+        }, "chunked-waiter");
+        t.start();
+        // Wait until the thread has joined the waiters queue.
+        long deadlineMs = System.currentTimeMillis() + 2_000;
+        while (p.queued() == 0 && System.currentTimeMillis() < deadlineMs)
+            Thread.sleep(5);
+        assertEquals(1, p.queued(), "waiter should be parked on the pool's queue");
+
+        // While the waiter is parked, the pool's available memory must still report the
+        // 1 chunk's worth — the waiter has NOT consumed any of it (no partial hold).
+        assertEquals(chunkSize, p.availableMemory(),
+                "pool memory must reflect no partial holds during the atomic wait");
+
+        // Free enough chunks for the waiter to complete.
+        p.deallocate(h1);
+        p.deallocate(h2);
+        t.join(5_000);
+        assertNull(err.get(), "waiter unexpectedly threw: " + err.get());
+        p.deallocate(h3);
+    }
+
+    /**
+     * FIFO fairness across the K-chunk request. A multi-chunk request that joins the wait queue
+     * before a single-chunk request must complete first when memory becomes available — the
+     * K-chunk request occupies a single waiter slot, not K of them.
+     */
+    @Test
+    public void testFifoFairnessAcrossChunkedAndSingleChunkRequests() throws Exception {
+        int chunkSize = 64;
+        long total = 4L * chunkSize;
+        ChunkedBufferPool p = pool(total, chunkSize);
+        // Drain the pool entirely so any new request must wait.
+        ByteBuffer h1 = p.allocate(chunkSize, 100);
+        ByteBuffer h2 = p.allocate(chunkSize, 100);
+        ByteBuffer h3 = p.allocate(chunkSize, 100);
+        ByteBuffer h4 = p.allocate(chunkSize, 100);
+        assertEquals(0, p.availableMemory());
+
+        // T_multi enters first, requesting 2 chunks; T_single enters second, requesting 1 chunk.
+        AtomicReference<Long> multiCompletionMs = new AtomicReference<>();
+        AtomicReference<Long> singleCompletionMs = new AtomicReference<>();
+        CountDownLatch multiStarted = new CountDownLatch(1);
+        Thread tMulti = new Thread(() -> {
+            try {
+                multiStarted.countDown();
+                List<ByteBuffer> got = p.allocateChunks(2 * chunkSize, 10_000);
+                multiCompletionMs.set(System.nanoTime());
+                for (ByteBuffer b : got) p.deallocate(b);
+            } catch (Throwable th) {
+                multiCompletionMs.set(-1L);
+            }
+        }, "multi");
+        Thread tSingle = new Thread(() -> {
+            try {
+                ByteBuffer got = p.allocate(chunkSize, 10_000);
+                singleCompletionMs.set(System.nanoTime());
+                p.deallocate(got);
+            } catch (Throwable th) {
+                singleCompletionMs.set(-1L);
+            }
+        }, "single");
+
+        tMulti.start();
+        assertTrue(multiStarted.await(2, TimeUnit.SECONDS));
+        // Wait for tMulti to be parked before starting tSingle, so the FIFO order is deterministic.
+        long deadlineMs = System.currentTimeMillis() + 2_000;
+        while (p.queued() == 0 && System.currentTimeMillis() < deadlineMs)
+            Thread.sleep(5);
+        assertEquals(1, p.queued(), "multi-chunk waiter should be the only one queued");
+        tSingle.start();
+        // Wait for tSingle to also be parked.
+        deadlineMs = System.currentTimeMillis() + 2_000;
+        while (p.queued() < 2 && System.currentTimeMillis() < deadlineMs)
+            Thread.sleep(5);
+        assertEquals(2, p.queued(), "single-chunk waiter joined after the multi-chunk one");
+
+        // Now free chunks one at a time, allowing the multi-chunk request to accumulate.
+        p.deallocate(h1);
+        Thread.sleep(50);
+        p.deallocate(h2);
+        // Both freed — the FIFO leader (multi) should claim both before the single-chunk waiter
+        // gets any. The multi completes first.
+        tMulti.join(5_000);
+        // Now free another chunk for the single-chunk waiter.
+        p.deallocate(h3);
+        tSingle.join(5_000);
+
+        assertNotNull(multiCompletionMs.get(), "multi did not complete");
+        assertNotNull(singleCompletionMs.get(), "single did not complete");
+        assertTrue(multiCompletionMs.get() != -1L && singleCompletionMs.get() != -1L,
+                "both threads must complete without exception");
+        assertTrue(multiCompletionMs.get() <= singleCompletionMs.get(),
+                "FIFO violated: single (joined later) completed at "
+                        + singleCompletionMs.get() + " before multi at " + multiCompletionMs.get());
+        p.deallocate(h4);
+    }
+
+    /**
+     * Slow-path rollback must not double-refund pooled chunks. When the waiter polls some
+     * chunks from the free list, then times out on a subsequent iteration, the inner finally
+     * refunds {@code accumulated} bytes to {@code nonPooledAvailableMemory} and the outer
+     * catch re-inserts the polled chunks into {@code free}. If {@code accumulated} includes
+     * bytes that came from polled chunks, the same memory is refunded twice — the pool
+     * over-reports {@code availableMemory()} and subsequent allocations can exceed the
+     * configured {@code buffer.memory} limit.
+     */
+    @Test
+    public void testSlowPathDoesNotDoubleRefundPolledChunksOnTimeout() throws Exception {
+        int chunkSize = 64;
+        long total = 3L * chunkSize;
+        ChunkedBufferPool p = pool(total, chunkSize);
+
+        // Drain so the next allocateChunks must wait.
+        ByteBuffer h1 = p.allocate(chunkSize, 100);
+        ByteBuffer h2 = p.allocate(chunkSize, 100);
+        ByteBuffer h3 = p.allocate(chunkSize, 100);
+        assertEquals(0, p.availableMemory());
+
+        AtomicReference<Throwable> err = new AtomicReference<>();
+        Thread t = new Thread(() -> {
+            try {
+                // Asks for 3 chunks with a finite deadline. After we deallocate 2 chunks below
+                // the waiter will poll them, find itself still 1 short, and time out on the
+                // next await iteration — with 2 chunks held in `pooled` and 2*chunkSize in
+                // `accumulated`.
+                p.allocateChunks(3 * chunkSize, 500);
+            } catch (Throwable th) {
+                err.set(th);
+            }
+        }, "partial-holder");
+        t.start();
+
+        long deadline = System.currentTimeMillis() + 2_000;
+        while (p.queued() == 0 && System.currentTimeMillis() < deadline)
+            Thread.sleep(5);
+        assertEquals(1, p.queued());
+
+        // Hand the waiter 2 chunks — it polls them into `pooled` then awaits again for the 3rd.
+        p.deallocate(h1);
+        p.deallocate(h2);
+
+        // Give the waiter a moment to wake, poll, and re-enter await before the timeout fires.
+        Thread.sleep(100);
+
+        t.join(5_000);
+        assertTrue(err.get() instanceof BufferExhaustedException,
+                "expected BufferExhaustedException, got " + err.get());
+
+        // After the throw, exactly 2 chunkSizes of physical memory should be back in the pool
+        // (h1 + h2). h3 is still held outside the pool. The bug double-refunds: it credits
+        // 2*chunkSize to `nonPooledAvailableMemory` (inner finally) AND re-inserts h1, h2 into
+        // `free` (outer catch), so availableMemory() reports 4*chunkSize.
+        assertEquals(2L * chunkSize, p.availableMemory(),
+                "pool over-reports availableMemory due to double-refund of pooled chunks on rollback");
+
+        p.deallocate(h3);
+    }
+
+    /**
+     * Slow-path success must not corrupt the pool's accounting. When the waiter polls chunks
+     * from the free list to satisfy its request, the finally block runs with the inner loop's
+     * "success" reset still in effect — it must not refund or decrement {@code nonPool}.
+     * Pool chunks transferred to the caller via {@code pooled} are accounted for by removing
+     * them from {@code free}, not by deducting their bytes from {@code nonPool}.
+     */
+    @Test
+    public void testSlowPathSuccessDoesNotCorruptAccounting() throws Exception {
+        int chunkSize = 64;
+        long total = 3L * chunkSize;
+        ChunkedBufferPool p = pool(total, chunkSize);
+
+        // Drain so a 2-chunk request must wait.
+        ByteBuffer h1 = p.allocate(chunkSize, 100);
+        ByteBuffer h2 = p.allocate(chunkSize, 100);
+        ByteBuffer h3 = p.allocate(chunkSize, 100);
+        assertEquals(0, p.availableMemory());
+
+        AtomicReference<Throwable> err = new AtomicReference<>();
+        AtomicReference<List<ByteBuffer>> got = new AtomicReference<>();
+        Thread t = new Thread(() -> {
+            try {
+                got.set(p.allocateChunks(2 * chunkSize, 10_000));
+            } catch (Throwable th) {
+                err.set(th);
+            }
+        }, "slow-success");
+        t.start();
+
+        long deadline = System.currentTimeMillis() + 2_000;
+        while (p.queued() == 0 && System.currentTimeMillis() < deadline)
+            Thread.sleep(5);
+        assertEquals(1, p.queued());
+
+        // Deallocate 2 chunks → waiter wakes, polls both, accumulated reaches memoryRequired,
+        // exits normally. The success path's finally must NOT refund or decrement nonPool.
+        p.deallocate(h1);
+        p.deallocate(h2);
+        t.join(5_000);
+        assertNull(err.get(), "waiter unexpectedly threw: " + err.get());
+
+        // After success: waiter owns the 2 chunks (returned via `got`), the test still holds h3.
+        // The pool has lent out everything it had — availableMemory must be exactly 0.
+        assertEquals(0, p.availableMemory(),
+                "slow-path success corrupted accounting (likely a phantom nonPool decrement in the finally)");
+
+        // Returning all the held buffers must fully restore the pool.
+        for (ByteBuffer chunk : got.get())
+            p.deallocate(chunk);
+        p.deallocate(h3);
+        assertEquals(total, p.availableMemory(),
+                "pool not fully restored after returning all chunks");
+    }
+
+    /**
+     * Closing the pool while a multi-chunk request is parked must surface a
+     * {@link KafkaException} and refund all reserved bytes.
+     */
+    @Test
+    public void testCloseDuringAtomicWait() throws Exception {
+        int chunkSize = 64;
+        long total = 2L * chunkSize;
+        ChunkedBufferPool p = pool(total, chunkSize);
+        // Drain so the next request must wait.
+        ByteBuffer h1 = p.allocate(chunkSize, 100);
+        ByteBuffer h2 = p.allocate(chunkSize, 100);
+        assertEquals(0, p.availableMemory());
+
+        AtomicReference<Throwable> err = new AtomicReference<>();
+        Thread t = new Thread(() -> {
+            try {
+                p.allocateChunks(2 * chunkSize, 10_000);
+            } catch (Throwable th) {
+                err.set(th);
+            }
+        }, "close-during-wait");
+        t.start();
+        long deadlineMs = System.currentTimeMillis() + 2_000;
+        while (p.queued() == 0 && System.currentTimeMillis() < deadlineMs)
+            Thread.sleep(5);
+        assertEquals(1, p.queued());
+
+        // Close the pool: signals all waiters; the waiter must throw KafkaException.
+        p.close();
+        t.join(5_000);
+        assertTrue(err.get() instanceof KafkaException,
+                "expected KafkaException, got " + err.get());
+        p.deallocate(h1);
+        p.deallocate(h2);
+    }
+}
diff --git a/clients/src/test/java/org/apache/kafka/clients/producer/internals/ChunkedByteBufferOutputStreamTest.java b/clients/src/test/java/org/apache/kafka/clients/producer/internals/ChunkedByteBufferOutputStreamTest.java
new file mode 100644
index 0000000000000..569c3bbcfcbdc
--- /dev/null
+++ b/clients/src/test/java/org/apache/kafka/clients/producer/internals/ChunkedByteBufferOutputStreamTest.java
@@ -0,0 +1,171 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.kafka.clients.producer.internals;
+
+import org.apache.kafka.common.metrics.Metrics;
+import org.apache.kafka.common.utils.MockTime;
+
+import org.junit.jupiter.api.AfterEach;
+import org.junit.jupiter.api.Test;
+
+import java.nio.ByteBuffer;
+import java.util.Collections;
+import java.util.List;
+
+import static org.junit.jupiter.api.Assertions.assertArrayEquals;
+import static org.junit.jupiter.api.Assertions.assertEquals;
+import static org.junit.jupiter.api.Assertions.assertThrows;
+
+public class ChunkedByteBufferOutputStreamTest {
+
+    private final MockTime time = new MockTime();
+    private final Metrics metrics = new Metrics(time);
+    private final String metricGroup = "test";
+
+    @AfterEach
+    public void teardown() {
+        metrics.close();
+    }
+
+    private ChunkedBufferPool pool(long total, int chunkSize) {
+        return new ChunkedBufferPool(total, chunkSize, metrics, time, metricGroup);
+    }
+
+    private List<ByteBuffer> chunks(ChunkedBufferPool pool, int chunkSize, int count) throws InterruptedException {
+        return pool.allocateChunks(chunkSize * count, 100);
+    }
+
+    /** Writes that fit in the initial chunk: stream produces the bytes back via buffer(). */
+    @Test
+    public void testSingleChunkWriteRoundtrip() throws Exception {
+        int chunkSize = 16;
+        ChunkedBufferPool p = pool(64, chunkSize);
+        ChunkedByteBufferOutputStream stream = new ChunkedByteBufferOutputStream(chunks(p, chunkSize, 1), chunkSize, p);
+
+        byte[] payload = new byte[]{1, 2, 3, 4, 5};
+        stream.write(payload, 0, payload.length);
+
+        ByteBuffer flat = stream.buffer();
+        flat.flip();
+        byte[] out = new byte[flat.remaining()];
+        flat.get(out);
+        assertArrayEquals(payload, out);
+
+        stream.deallocate();
+    }
+
+    /** Writes that span multiple pre-supplied chunks land in subsequent chunks in order. */
+    @Test
+    public void testWriteSpansChunks() throws Exception {
+        int chunkSize = 8;
+        ChunkedBufferPool p = pool(64, chunkSize);
+        ChunkedByteBufferOutputStream stream = new ChunkedByteBufferOutputStream(chunks(p, chunkSize, 3), chunkSize, p);
+
+        byte[] payload = new byte[20];
+        for (int i = 0; i < payload.length; i++) payload[i] = (byte) i;
+        stream.write(payload, 0, payload.length);
+
+        ByteBuffer flat = stream.buffer();
+        flat.flip();
+        byte[] out = new byte[flat.remaining()];
+        flat.get(out);
+        assertArrayEquals(payload, out);
+
+        stream.deallocate();
+    }
+
+    /** remaining() sums current + queued chunk capacities and shrinks as writes consume bytes. */
+    @Test
+    public void testRemainingSumsChunks() throws Exception {
+        int chunkSize = 8;
+        ChunkedBufferPool p = pool(64, chunkSize);
+        ChunkedByteBufferOutputStream stream = new ChunkedByteBufferOutputStream(chunks(p, chunkSize, 2), chunkSize, p);
+
+        assertEquals(2 * chunkSize, stream.remaining());
+        stream.write(new byte[3], 0, 3);
+        assertEquals(2 * chunkSize - 3, stream.remaining());
+        stream.write(new byte[chunkSize], 0, chunkSize); // crosses into chunk 2
+        assertEquals(chunkSize - 3, stream.remaining());
+
+        stream.deallocate();
+    }
+
+    /** addBuffers extends capacity so a write that would have overflowed now fits. */
+    @Test
+    public void testAddBuffersExtendsStream() throws Exception {
+        int chunkSize = 8;
+        ChunkedBufferPool p = pool(64, chunkSize);
+        ChunkedByteBufferOutputStream stream = new ChunkedByteBufferOutputStream(chunks(p, chunkSize, 1), chunkSize, p);
+
+        // Fill the initial chunk.
+        stream.write(new byte[chunkSize], 0, chunkSize);
+        assertEquals(0, stream.remaining());
+
+        // Extend and write more — must land in the new chunk.
+        stream.addBuffers(Collections.singletonList(p.allocate(chunkSize, 100)));
+        assertEquals(chunkSize, stream.remaining());
+
+        byte[] more = new byte[]{9, 9, 9};
+        stream.write(more, 0, more.length);
+        assertEquals(chunkSize - 3, stream.remaining());
+
+        stream.deallocate();
+    }
+
+    /** Writing past all pre-supplied (and added) chunks throws — uncompressed-only contract. */
+    @Test
+    public void testOverflowThrows() throws Exception {
+        int chunkSize = 4;
+        ChunkedBufferPool p = pool(64, chunkSize);
+        ChunkedByteBufferOutputStream stream = new ChunkedByteBufferOutputStream(chunks(p, chunkSize, 1), chunkSize, p);
+
+        stream.write(new byte[chunkSize], 0, chunkSize);
+        assertThrows(IllegalStateException.class, () -> stream.write((byte) 1));
+
+        stream.deallocate();
+    }
+
+    /** position(int) at construction walks across pre-supplied chunks. */
+    @Test
+    public void testPositionWalksAcrossChunks() throws Exception {
+        int chunkSize = 4;
+        ChunkedBufferPool p = pool(32, chunkSize);
+        ChunkedByteBufferOutputStream stream = new ChunkedByteBufferOutputStream(chunks(p, chunkSize, 3), chunkSize, p);
+
+        stream.position(6); // straddles chunk 0 (4 bytes) and chunk 1 (2 bytes)
+        assertEquals(6, stream.position());
+        assertEquals(6, stream.remaining());
+
+        stream.deallocate();
+    }
+
+    /** deallocate returns all chunks to the pool. */
+    @Test
+    public void testDeallocateReturnsAllChunks() throws Exception {
+        int chunkSize = 8;
+        long total = 64;
+        ChunkedBufferPool p = pool(total, chunkSize);
+        List<ByteBuffer> initial = chunks(p, chunkSize, 3);
+        ChunkedByteBufferOutputStream stream = new ChunkedByteBufferOutputStream(initial, chunkSize, p);
+        // Also attach one extra chunk so deallocate must handle the added-buffer case too.
+        stream.addBuffers(Collections.singletonList(p.allocate(chunkSize, 100)));
+        assertEquals(total - 4L * chunkSize, p.availableMemory());
+
+        stream.deallocate();
+        assertEquals(total, p.availableMemory());
+    }
+}
diff --git a/clients/src/test/java/org/apache/kafka/clients/producer/internals/ChunkedRecordAccumulatorTest.java b/clients/src/test/java/org/apache/kafka/clients/producer/internals/ChunkedRecordAccumulatorTest.java
new file mode 100644
index 0000000000000..b191aadb4645b
--- /dev/null
+++ b/clients/src/test/java/org/apache/kafka/clients/producer/internals/ChunkedRecordAccumulatorTest.java
@@ -0,0 +1,350 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.kafka.clients.producer.internals;
+
+import org.apache.kafka.clients.MetadataSnapshot;
+import org.apache.kafka.common.Cluster;
+import org.apache.kafka.common.Node;
+import org.apache.kafka.common.TopicPartition;
+import org.apache.kafka.common.compress.Compression;
+import org.apache.kafka.common.metrics.Metrics;
+import org.apache.kafka.common.protocol.Errors;
+import org.apache.kafka.common.record.internal.MemoryRecords;
+import org.apache.kafka.common.record.internal.Record;
+import org.apache.kafka.common.record.internal.RecordBatch;
+import org.apache.kafka.common.requests.MetadataResponse.PartitionMetadata;
+import org.apache.kafka.common.utils.MockTime;
+import org.apache.kafka.common.utils.internals.LogContext;
+
+import org.junit.jupiter.api.AfterEach;
+import org.junit.jupiter.api.Test;
+
+import java.nio.ByteBuffer;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.Collections;
+import java.util.Deque;
+import java.util.List;
+import java.util.Map;
+import java.util.Optional;
+import java.util.concurrent.CountDownLatch;
+import java.util.concurrent.TimeUnit;
+import java.util.concurrent.atomic.AtomicBoolean;
+import java.util.concurrent.atomic.AtomicReference;
+import java.util.stream.Collectors;
+import java.util.stream.Stream;
+
+import static org.junit.jupiter.api.Assertions.assertEquals;
+import static org.junit.jupiter.api.Assertions.assertFalse;
+import static org.junit.jupiter.api.Assertions.assertNotNull;
+import static org.junit.jupiter.api.Assertions.assertNull;
+import static org.junit.jupiter.api.Assertions.assertThrows;
+import static org.junit.jupiter.api.Assertions.assertTrue;
+
+public class ChunkedRecordAccumulatorTest {
+
+    private final String topic = "test";
+    private final int partition1 = 0;
+    private final Node node1 = new Node(0, "localhost", 1111);
+    private final TopicPartition tp1 = new TopicPartition(topic, partition1);
+
+    private final PartitionMetadata partMetadata1 =
+            new PartitionMetadata(Errors.NONE, tp1, Optional.of(node1.id()), Optional.empty(), null, null, null);
+    private final List<PartitionMetadata> partMetadatas = new ArrayList<>(Arrays.asList(partMetadata1));
+    private final Map<Integer, Node> nodes =
+            Stream.of(node1).collect(Collectors.toMap(Node::id, java.util.function.Function.identity()));
+    private final MetadataSnapshot metadataCache = new MetadataSnapshot(null, nodes, partMetadatas,
+            Collections.emptySet(), Collections.emptySet(), Collections.emptySet(), null, Collections.emptyMap());
+    private final Cluster cluster = metadataCache.cluster();
+
+    private final MockTime time = new MockTime();
+    private final Metrics metrics = new Metrics(time);
+    private final LogContext logContext = new LogContext();
+    private final long maxBlockTimeMs = 1000;
+    private final byte[] key = "k".getBytes();
+
+    @AfterEach
+    public void teardown() {
+        metrics.close();
+    }
+
+    private ChunkedRecordAccumulator newAccumulator(int batchSize, int chunkSize, long totalMemory, Compression compression) {
+        ChunkedBufferPool pool = new ChunkedBufferPool(totalMemory, chunkSize, metrics, time, "producer-metrics");
+        return new ChunkedRecordAccumulator(logContext, batchSize, compression,
+                /* lingerMs */ 0, /* retryBackoffMs */ 0L, /* retryBackoffMaxMs */ 0L,
+                /* deliveryTimeoutMs */ 3200, metrics, "producer-metrics", time,
+                /* transactionManager */ null, pool);
+    }
+
+    /** First record creates a batch backed by chunked allocation. */
+    @Test
+    public void testFirstRecordCreatesChunkedBatch() throws Exception {
+        int chunkSize = 256;
+        ChunkedRecordAccumulator accum = newAccumulator(1024, chunkSize, 16L * chunkSize, Compression.NONE);
+
+        byte[] value = new byte[64];
+        accum.append(topic, partition1, 0L, key, value, Record.EMPTY_HEADERS, null,
+                maxBlockTimeMs, time.milliseconds(), cluster);
+
+        Deque<ProducerBatch> dq = batchesFor(accum, tp1);
+        assertEquals(1, dq.size());
+        ProducerBatch batch = dq.peekFirst();
+        assertNotNull(batch);
+        assertEquals(1, batch.recordCount);
+        accum.close();
+    }
+
+    /**
+     * A small follow-up record fits in the existing batch's pre-allocated chunks — no extension
+     * needed.
+     */
+    @Test
+    public void testFollowupRecordFitsWithoutExtension() throws Exception {
+        int chunkSize = 256;
+        ChunkedRecordAccumulator accum = newAccumulator(1024, chunkSize, 16L * chunkSize, Compression.NONE);
+
+        accum.append(topic, partition1, 0L, key, new byte[16], Record.EMPTY_HEADERS, null,
+                maxBlockTimeMs, time.milliseconds(), cluster);
+        accum.append(topic, partition1, 0L, key, new byte[16], Record.EMPTY_HEADERS, null,
+                maxBlockTimeMs, time.milliseconds(), cluster);
+
+        Deque<ProducerBatch> dq = batchesFor(accum, tp1);
+        assertEquals(1, dq.size(), "Both records should land in the same batch");
+        assertEquals(2, dq.peekFirst().recordCount);
+        accum.close();
+    }
+
+    /**
+     * A follow-up record that would overflow the existing chunks triggers mid-batch extension:
+     * additional chunks are attached and the record lands in the same batch.
+     */
+    @Test
+    public void testMidBatchExtensionGrowsExistingBatch() throws Exception {
+        int chunkSize = 128;
+        ChunkedRecordAccumulator accum = newAccumulator(8192, chunkSize, 16L * chunkSize, Compression.NONE);
+
+        // First record fits in 1 chunk (~64+overhead).
+        accum.append(topic, partition1, 0L, key, new byte[32], Record.EMPTY_HEADERS, null,
+                maxBlockTimeMs, time.milliseconds(), cluster);
+
+        Deque<ProducerBatch> dq = batchesFor(accum, tp1);
+        assertEquals(1, dq.size());
+        int initialRecordCount = dq.peekFirst().recordCount;
+        assertEquals(1, initialRecordCount);
+
+        // Second record big enough to require an extra chunk.
+        accum.append(topic, partition1, 0L, key, new byte[200], Record.EMPTY_HEADERS, null,
+                maxBlockTimeMs, time.milliseconds(), cluster);
+
+        // Same batch, now with the second record.
+        assertEquals(1, dq.size(), "Should still be the same batch — extension should not roll");
+        assertEquals(2, dq.peekFirst().recordCount,
+                "Second record should land in the extended batch");
+        accum.close();
+    }
+
+    /** Compression is rejected at batch creation in PR1 with a clear message. */
+    @Test
+    public void testCompressionThrowsAtBatchCreation() {
+        int chunkSize = 256;
+        ChunkedRecordAccumulator accum = newAccumulator(1024, chunkSize, 16L * chunkSize, Compression.gzip().build());
+
+        UnsupportedOperationException ex = assertThrows(UnsupportedOperationException.class, () ->
+                accum.append(topic, partition1, 0L, key, new byte[32], Record.EMPTY_HEADERS, null,
+                        maxBlockTimeMs, time.milliseconds(), cluster));
+        assertTrue(ex.getMessage().contains("not implemented yet"),
+                "Expected 'not implemented yet' in message, got: " + ex.getMessage());
+        accum.close();
+    }
+
+    /**
+     * Race between two concurrent appenders on the same chunked tail. Both threads probe
+     * {@code extensionBytesNeeded} under the deque lock against the same physical remaining
+     * capacity, drop the lock, and allocate gap-sized chunks off-lock. When the first appender
+     * re-takes the lock and consumes most of the original remaining, the second appender's
+     * allocation — sized against the pre-race remaining — is short by the bytes the first
+     * appender consumed. Without a re-probe in the second-lock block, the second appender
+     * proceeds to write past the stream's physical capacity and {@code advanceToNextChunk}
+     * throws {@link IllegalStateException}.
+     * <p>
+     * Coordination: a {@link ChunkedBufferPool} subclass blocks the FIRST {@code allocateChunks}
+     * call after the test arms a latch (i.e. thread A's). Thread B's allocate is the second call
+     * and proceeds normally — B re-takes the lock, attaches its chunks, appends its record, and
+     * returns. Only then is thread A released, so A attaches its chunks and tries to append
+     * against a tail whose remaining has already shrunk.
+     */
+    @Test
+    public void testConcurrentExtensionRaceDoesNotOverflowChunkedStream() throws Exception {
+        final int chunkSize = 256;
+        final int recordValueSize = 350;  // sized so gap ≈ chunkSize → minimal rounding cushion
+
+        final AtomicBoolean armed = new AtomicBoolean(false);
+        final CountDownLatch aHoldsChunks = new CountDownLatch(1);
+        final CountDownLatch bDoneAttaching = new CountDownLatch(1);
+
+        ChunkedBufferPool pool = new ChunkedBufferPool(64L * chunkSize, chunkSize, metrics, time, "producer-metrics") {
+            @Override
+            public List<ByteBuffer> allocateChunks(int totalSize, long maxTimeToBlockMs) throws InterruptedException {
+                boolean blockThisCall = armed.compareAndSet(true, false);
+                List<ByteBuffer> chunks = super.allocateChunks(totalSize, maxTimeToBlockMs);
+                if (blockThisCall) {
+                    aHoldsChunks.countDown();
+                    if (!bDoneAttaching.await(10, TimeUnit.SECONDS)) {
+                        throw new InterruptedException("test timeout waiting for B");
+                    }
+                }
+                return chunks;
+            }
+        };
+        ChunkedRecordAccumulator accum = new ChunkedRecordAccumulator(logContext, 8192, Compression.NONE,
+                /* lingerMs */ 0, /* retryBackoffMs */ 0L, /* retryBackoffMaxMs */ 0L,
+                /* deliveryTimeoutMs */ 3200, metrics, "producer-metrics", time,
+                /* transactionManager */ null, pool);
+        try {
+            // Warmup: tiny first record establishes a chunked tail with a known remainingInStream.
+            accum.append(topic, partition1, 0L, key, new byte[1], Record.EMPTY_HEADERS, null,
+                    maxBlockTimeMs, time.milliseconds(), cluster);
+
+            // Arm the race: the next allocateChunks call (A's) will block until B has appended.
+            armed.set(true);
+
+            AtomicReference<Throwable> aError = new AtomicReference<>();
+            Thread tA = new Thread(() -> {
+                try {
+                    accum.append(topic, partition1, 0L, key, new byte[recordValueSize],
+                            Record.EMPTY_HEADERS, null, maxBlockTimeMs, time.milliseconds(), cluster);
+                } catch (Throwable t) {
+                    aError.set(t);
+                }
+            }, "test-appender-A");
+            tA.start();
+
+            // Wait for A to be parked inside allocateChunks holding its (pre-attach) chunks.
+            assertTrue(aHoldsChunks.await(10, TimeUnit.SECONDS), "A did not reach allocateChunks");
+
+            // B runs on this thread. Its allocateChunks is the second call — armed=false now,
+            // so it proceeds without blocking. B probes the same R as A (A hasn't attached yet),
+            // attaches its own chunks, appends its record. Now the tail's remaining has shrunk
+            // by B's actual record size, but A's still-off-lock allocation didn't know that.
+            accum.append(topic, partition1, 0L, key, new byte[recordValueSize], Record.EMPTY_HEADERS, null,
+                    maxBlockTimeMs, time.milliseconds(), cluster);
+
+            // Release A. Without the fix in the second-lock block, A's tryAppendToExisting
+            // will overflow the chunked stream and throw IllegalStateException.
+            bDoneAttaching.countDown();
+            tA.join(10_000);
+            assertFalse(tA.isAlive(), "Thread A did not complete in time");
+
+            assertNull(aError.get(),
+                    "Thread A failed: " + (aError.get() == null ? "" : aError.get().toString()));
+        } finally {
+            // Drain any state regardless of outcome.
+            accum.close();
+        }
+    }
+
+    /**
+     * Regression guard: an inflight chunked batch returns all K chunks to the pool on the
+     * expiration / broker-disconnect path, not just one {@code initialCapacity} chunkSize.
+     * The parent {@code RecordAccumulator.deallocate} for inflight batches credits only
+     * {@code initialCapacity} back to the pool and throws; {@link ChunkedRecordAccumulator}
+     * overrides {@code deallocate} to return all chunks before the throw. Removing that
+     * override would re-introduce a (K−1)×chunkSize leak and fail this test.
+     */
+    @Test
+    public void testInflightExpirationReturnsAllChunksToPool() throws Exception {
+        int chunkSize = 128;
+        long totalMemory = 32L * chunkSize;
+        ChunkedBufferPool pool = new ChunkedBufferPool(totalMemory, chunkSize, metrics, time, "producer-metrics");
+        ChunkedRecordAccumulator accum = new ChunkedRecordAccumulator(logContext, 8192, Compression.NONE,
+                /* lingerMs */ 0, /* retryBackoffMs */ 0L, /* retryBackoffMaxMs */ 0L,
+                /* deliveryTimeoutMs */ 3200, metrics, "producer-metrics", time,
+                /* transactionManager */ null, pool);
+
+        // A record large enough that allocateChunks reserves multiple chunks (K > 1).
+        byte[] value = new byte[400];
+        accum.append(topic, partition1, 0L, key, value, Record.EMPTY_HEADERS, null,
+                maxBlockTimeMs, time.milliseconds(), cluster);
+
+        Deque<ProducerBatch> dq = batchesFor(accum, tp1);
+        assertEquals(1, dq.size());
+        ProducerBatch batch = dq.peekFirst();
+
+        // Confirm the batch actually consumed multiple chunks. estimateSizeInBytesUpperBound for
+        // a 400-byte value with v2 framing is well over chunkSize, so K should be >= 2.
+        long heldBeforeDeallocate = totalMemory - pool.availableMemory();
+        assertTrue(heldBeforeDeallocate >= 2L * chunkSize,
+                "test setup expects K >= 2; pool held " + heldBeforeDeallocate + " bytes");
+
+        // Mark the batch as if the Sender had drained it.
+        batch.setInflight(true);
+
+        // The inflight branch throws after deallocating. The chunked override must return
+        // all K chunks (not just initialCapacity) before propagating the exception.
+        assertThrows(IllegalStateException.class, () -> accum.deallocate(batch));
+
+        assertEquals(totalMemory, pool.availableMemory(),
+                "pool should be fully restored after inflight-expiration deallocate; "
+                        + "any K-1 unsurrendered chunks indicate the chunked-leak regression");
+
+        accum.close();
+    }
+
+    private Deque<ProducerBatch> batchesFor(RecordAccumulator accum, TopicPartition tp) {
+        return accum.getDeque(tp);
+    }
+
+    /**
+     * Sender's drain (and any other caller of {@code batch.close()}) cascades through
+     * {@code MemoryRecordsBuilder.closeForRecordAppends()} →
+     * {@code appendStream.close()} → {@code bufferStream.close()}. The chunked stream's
+     * buffer must remain readable after that cascade so the builder can flatten chunks into
+     * the heap buffer it sends over the network. If the chunked stream's {@code close()}
+     * prematurely returns chunks to the pool, the flattened buffer is empty and the header
+     * write fails (or produces an empty record set).
+     */
+    @Test
+    public void testBatchCloseDoesNotDeallocateChunksPrematurely() throws Exception {
+        int chunkSize = 256;
+        ChunkedRecordAccumulator accum = newAccumulator(8192, chunkSize, 32L * chunkSize, Compression.NONE);
+
+        byte[] value = new byte[64];
+        accum.append(topic, partition1, 0L, key, value, Record.EMPTY_HEADERS, null,
+                maxBlockTimeMs, time.milliseconds(), cluster);
+
+        Deque<ProducerBatch> dq = batchesFor(accum, tp1);
+        ProducerBatch batch = dq.peekFirst();
+        assertNotNull(batch);
+
+        // Matches the call sites in RecordAccumulator.drain (line 941) and Sender.
+        batch.close();
+        MemoryRecords records = batch.records();
+        assertTrue(records.sizeInBytes() > 0,
+                "batch must produce a non-empty record set after close; chunks were deallocated prematurely");
+
+        int count = 0;
+        for (RecordBatch rb : records.batches()) {
+            for (Record r : rb) {
+                count++;
+                assertNotNull(r.value());
+            }
+        }
+        assertEquals(1, count, "expected exactly 1 record after close");
+
+        accum.close();
+    }
+}

From 64147f6b492933e343d983f3a168edcdc427976b Mon Sep 17 00:00:00 2001
From: Lianet Magrans <lmagrans@confluent.io>
Date: Thu, 18 Jun 2026 15:27:38 -0400
Subject: [PATCH 03/11] config & ratio-aware chunk sizing

---
 .../kafka/clients/producer/KafkaProducer.java |  58 ++++-
 .../clients/producer/ProducerConfig.java      |  21 ++
 .../producer/internals/ChunkedBufferPool.java |  47 ++--
 .../ChunkedByteBufferOutputStream.java        |  36 +--
 .../internals/ChunkedProducerBatch.java       |  94 +++++++
 .../internals/ChunkedRecordAccumulator.java   | 236 ++++++++----------
 .../producer/internals/ProducerBatch.java     |  38 +--
 .../producer/internals/RecordAccumulator.java |  54 +++-
 .../record/internal/AbstractRecords.java      |  25 ++
 .../record/internal/MemoryRecordsBuilder.java |  48 +++-
 .../clients/producer/ProducerConfigTest.java  |  35 +++
 .../ChunkedRecordAccumulatorTest.java         |  91 +++++--
 12 files changed, 544 insertions(+), 239 deletions(-)
 create mode 100644 clients/src/main/java/org/apache/kafka/clients/producer/internals/ChunkedProducerBatch.java

diff --git a/clients/src/main/java/org/apache/kafka/clients/producer/KafkaProducer.java b/clients/src/main/java/org/apache/kafka/clients/producer/KafkaProducer.java
index dfa72f2adad71..0990a68439171 100644
--- a/clients/src/main/java/org/apache/kafka/clients/producer/KafkaProducer.java
+++ b/clients/src/main/java/org/apache/kafka/clients/producer/KafkaProducer.java
@@ -28,6 +28,8 @@
 import org.apache.kafka.clients.consumer.OffsetCommitCallback;
 import org.apache.kafka.clients.producer.internals.BufferPool;
 import org.apache.kafka.clients.producer.internals.BuiltInPartitioner;
+import org.apache.kafka.clients.producer.internals.ChunkedBufferPool;
+import org.apache.kafka.clients.producer.internals.ChunkedRecordAccumulator;
 import org.apache.kafka.clients.producer.internals.KafkaProducerMetrics;
 import org.apache.kafka.clients.producer.internals.ProducerInterceptors;
 import org.apache.kafka.clients.producer.internals.ProducerMetadata;
@@ -90,6 +92,7 @@
 import java.util.Arrays;
 import java.util.Collections;
 import java.util.List;
+import java.util.Locale;
 import java.util.Map;
 import java.util.Objects;
 import java.util.Optional;
@@ -456,19 +459,48 @@ public KafkaProducer(Properties properties, Serializer<K> keySerializer, Seriali
             // As per Kafka producer configuration documentation batch.size may be set to 0 to explicitly disable
             // batching which in practice actually means using a batch size of 1.
             int batchSize = Math.max(1, config.getInt(ProducerConfig.BATCH_SIZE_CONFIG));
-            this.accumulator = new RecordAccumulator(logContext,
-                    batchSize,
-                    compression,
-                    lingerMs(config),
-                    retryBackoffMs,
-                    retryBackoffMaxMs,
-                    deliveryTimeoutMs,
-                    partitionerConfig,
-                    metrics,
-                    PRODUCER_METRIC_GROUP_NAME,
-                    time,
-                    transactionManager,
-                    new BufferPool(this.totalMemorySize, batchSize, metrics, time, PRODUCER_METRIC_GROUP_NAME));
+            String allocationStrategy = config.getString(ProducerConfig.BUFFER_MEMORY_ALLOCATION_STRATEGY_CONFIG)
+                    .toLowerCase(Locale.ROOT);
+            boolean incremental = ProducerConfig.BUFFER_MEMORY_ALLOCATION_STRATEGY_INCREMENTAL.equals(allocationStrategy);
+            if (incremental && compression.type() != CompressionType.NONE) {
+                throw new ConfigException("The " + ProducerConfig.BUFFER_MEMORY_ALLOCATION_STRATEGY_INCREMENTAL
+                        + " " + ProducerConfig.BUFFER_MEMORY_ALLOCATION_STRATEGY_CONFIG
+                        + " does not support compression yet. " + ProducerConfig.COMPRESSION_TYPE_CONFIG
+                        + " must be set to none.");
+            }
+            // Use the chunked path only when a batch is at least one full chunk
+            // (batch.size >= CHUNK_SIZE). Below that, a batch can't fill even one chunk, so chunking
+            // would reserve a whole chunk for a batch capped under it (over-reservation); the full
+            // strategy (reserving up to batch.size per batch) is both leaner and identical to trunk.
+            if (incremental && batchSize >= ChunkedRecordAccumulator.CHUNK_SIZE) {
+                this.accumulator = new ChunkedRecordAccumulator(logContext,
+                        batchSize,
+                        compression,
+                        lingerMs(config),
+                        retryBackoffMs,
+                        retryBackoffMaxMs,
+                        deliveryTimeoutMs,
+                        partitionerConfig,
+                        metrics,
+                        PRODUCER_METRIC_GROUP_NAME,
+                        time,
+                        transactionManager,
+                        new ChunkedBufferPool(this.totalMemorySize, ChunkedRecordAccumulator.CHUNK_SIZE, metrics, time, PRODUCER_METRIC_GROUP_NAME));
+            } else {
+                this.accumulator = new RecordAccumulator(logContext,
+                        batchSize,
+                        compression,
+                        lingerMs(config),
+                        retryBackoffMs,
+                        retryBackoffMaxMs,
+                        deliveryTimeoutMs,
+                        partitionerConfig,
+                        metrics,
+                        PRODUCER_METRIC_GROUP_NAME,
+                        time,
+                        transactionManager,
+                        new BufferPool(this.totalMemorySize, batchSize, metrics, time, PRODUCER_METRIC_GROUP_NAME));
+            }
 
             this.errors = this.metrics.sensor("errors");
             this.sender = newSender(logContext, kafkaClient, this.metadata);
diff --git a/clients/src/main/java/org/apache/kafka/clients/producer/ProducerConfig.java b/clients/src/main/java/org/apache/kafka/clients/producer/ProducerConfig.java
index 38a007b6d212b..286f3e37dc823 100644
--- a/clients/src/main/java/org/apache/kafka/clients/producer/ProducerConfig.java
+++ b/clients/src/main/java/org/apache/kafka/clients/producer/ProducerConfig.java
@@ -228,6 +228,20 @@ public class ProducerConfig extends AbstractConfig {
                                                     + "not all memory the producer uses is used for buffering. Some additional memory will be used for compression (if "
                                                     + "compression is enabled) as well as for maintaining in-flight requests.";
 
+    /** <code>buffer.memory.allocation.strategy</code> */
+    public static final String BUFFER_MEMORY_ALLOCATION_STRATEGY_CONFIG = "buffer.memory.allocation.strategy";
+    public static final String BUFFER_MEMORY_ALLOCATION_STRATEGY_FULL = "full";
+    public static final String BUFFER_MEMORY_ALLOCATION_STRATEGY_INCREMENTAL = "incremental";
+    private static final String BUFFER_MEMORY_ALLOCATION_STRATEGY_DOC = "Controls how the producer allocates memory from <code>" + BUFFER_MEMORY_CONFIG + "</code> for record batches. The following values are supported: "
+                                                    + "<ul>"
+                                                    + "<li><code>" + BUFFER_MEMORY_ALLOCATION_STRATEGY_FULL + "</code>: reserve <code>" + BATCH_SIZE_CONFIG + "</code> bytes up front for each in-progress batch, "
+                                                    + "regardless of the actual size of the records appended to it.</li>"
+                                                    + "<li><code>" + BUFFER_MEMORY_ALLOCATION_STRATEGY_INCREMENTAL + "</code>: allocate memory on demand as records are appended, growing each batch "
+                                                    + "up to <code>" + BATCH_SIZE_CONFIG + "</code> bytes. This keeps memory usage proportional to the data actually buffered rather than to the number of "
+                                                    + "active partitions, which allows larger <code>" + BATCH_SIZE_CONFIG + "</code> values (e.g. for high-latency clusters) without reserving "
+                                                    + "<code>" + BATCH_SIZE_CONFIG + "</code> bytes for every active partition.</li>"
+                                                    + "</ul>";
+
     /** <code>retry.backoff.ms</code> */
     public static final String RETRY_BACKOFF_MS_CONFIG = CommonClientConfigs.RETRY_BACKOFF_MS_CONFIG;
 
@@ -399,6 +413,13 @@ public class ProducerConfig extends AbstractConfig {
                                         Importance.MEDIUM,
                                         CommonClientConfigs.CLIENT_DNS_LOOKUP_DOC)
                                 .define(BUFFER_MEMORY_CONFIG, Type.LONG, 32 * 1024 * 1024L, atLeast(0L), Importance.HIGH, BUFFER_MEMORY_DOC)
+                                .define(BUFFER_MEMORY_ALLOCATION_STRATEGY_CONFIG,
+                                        Type.STRING,
+                                        BUFFER_MEMORY_ALLOCATION_STRATEGY_FULL,
+                                        ConfigDef.CaseInsensitiveValidString
+                                                .in(BUFFER_MEMORY_ALLOCATION_STRATEGY_FULL, BUFFER_MEMORY_ALLOCATION_STRATEGY_INCREMENTAL),
+                                        Importance.MEDIUM,
+                                        BUFFER_MEMORY_ALLOCATION_STRATEGY_DOC)
                                 .define(RETRIES_CONFIG, Type.INT, Integer.MAX_VALUE, between(0, Integer.MAX_VALUE), Importance.HIGH, RETRIES_DOC)
                                 .define(ACKS_CONFIG,
                                         Type.STRING,
diff --git a/clients/src/main/java/org/apache/kafka/clients/producer/internals/ChunkedBufferPool.java b/clients/src/main/java/org/apache/kafka/clients/producer/internals/ChunkedBufferPool.java
index 96a4e6e0b36e4..9fa367d80e889 100644
--- a/clients/src/main/java/org/apache/kafka/clients/producer/internals/ChunkedBufferPool.java
+++ b/clients/src/main/java/org/apache/kafka/clients/producer/internals/ChunkedBufferPool.java
@@ -28,15 +28,12 @@
 import java.util.concurrent.locks.Condition;
 
 /**
- * A {@link BufferPool} dedicated to chunk-sized buffer reuse. The chunk size is passed to the
- * parent as its {@link #poolableSize()}.
+ * A {@link BufferPool} dedicated to chunk-sized buffer reuse (chunk size = {@link #poolableSize()}).
  * <p>
- * Adds {@link #allocateChunks(int, long)} for callers that need to acquire multiple
- * chunks at once. The implementation is atomic across the K-chunk request:
- * a single lock acquisition for the whole reservation, a single {@link Condition}
- * added to {@link #waiters} so the request is FIFO-fair against single-chunk acquirers, and
- * any failure (timeout, close, OOM) refunds the entire reservation atomically before the
- * exception propagates. No partial chunk holds are visible outside the lock during the wait.
+ * Adds {@link #allocateChunks(int, long)} to acquire multiple chunks atomically: one lock
+ * acquisition and a single {@link Condition} on {@link #waiters} (FIFO-fair against single-chunk
+ * acquirers), with any failure (timeout, close, OOM) refunding the whole reservation before the
+ * exception propagates. No partial holds are visible during the wait.
  */
 public class ChunkedBufferPool extends BufferPool {
 
@@ -45,22 +42,19 @@ public ChunkedBufferPool(long memory, int chunkSize, Metrics metrics, Time time,
     }
 
     /**
-     * Allocate {@code ceil(totalSize / chunkSize)} chunk-sized buffers atomically.
-     * This follows the same principle as the allocate in the parent BufferPool class: if there is
-     * memory available, allocate the requested number of chunks. If not, wait for memory.
-     * If it needs to wait, it blocks up to {@code maxTimeToBlockMs} for the whole request,
-     * queued on {@link #waiters} alongside single-chunk acquirers (FIFO).
-     * <p>
-     * On any failure path every byte reserved during the call is returned to the pool
-     * and the next waiter is signaled. No partial chunk holds are visible to other threads during the wait.
-     * The reservation is tracked as bytes against {@link #nonPooledAvailableMemory} plus chunks polled from {@link #free}.
+     * Allocate {@code ceil(totalSize / chunkSize)} chunk-sized buffers atomically, mirroring
+     * {@link BufferPool#allocate}: satisfied immediately if memory is available, else blocks up to
+     * {@code maxTimeToBlockMs} for the whole request (FIFO on {@link #waiters}). The reservation is
+     * tracked as bytes against {@link #nonPooledAvailableMemory} plus chunks polled from
+     * {@link #free}; on any failure path every reserved byte is returned and the next waiter signaled.
      *
      * @param totalSize        minimum total bytes of capacity required across the returned chunks
      * @param maxTimeToBlockMs maximum time in milliseconds to block waiting for memory
      * @return list of {@code ceil(totalSize / chunkSize)} {@code ByteBuffer}s, each of capacity
      *         {@code chunkSize}
      * @throws InterruptedException     if interrupted while waiting
-     * @throws IllegalArgumentException if {@code totalSize <= 0} or {@code totalSize > totalMemory()}
+     * @throws IllegalArgumentException if {@code totalSize <= 0}, or if the request rounded up to
+     *         whole chunks exceeds {@code totalMemory()}
      * @throws BufferExhaustedException if the request can't be satisfied within {@code maxTimeToBlockMs}
      * @throws KafkaException           if the pool is closed during the wait
      */
@@ -75,11 +69,17 @@ public List<ByteBuffer> allocateChunks(int totalSize, long maxTimeToBlockMs) thr
         int chunkSize = poolableSize();
         int numChunks = (int) (((long) totalSize + chunkSize - 1L) / chunkSize);
         long memoryRequired = (long) numChunks * chunkSize;
+        // Rounding totalSize up to whole chunks can require more pool memory than buffer.memory
+        // holds even when totalSize alone fits (the single-buffer "full" path reserves exactly the
+        // record size, so it never hits this). Reject it here to fail fast, instead of blocking for
+        // max.block.ms and then throwing BufferExhausted for a request that can never be satisfied.
+        if (memoryRequired > totalMemory())
+            throw new IllegalArgumentException("Attempt to allocate " + numChunks + " chunks of "
+                + chunkSize + " bytes (" + memoryRequired + " bytes total), but there is a hard limit of "
+                + totalMemory() + " on memory allocations.");
 
-        // Chunks pulled from the free list during the reservation. Remaining bytes
-        // (memoryRequired - pooled.size() * chunkSize) are reserved against
-        // nonPooledAvailableMemory and materialized as raw allocations after the lock is
-        // released.
+        // Chunks pulled from the free list; the remaining bytes are reserved against
+        // nonPooledAvailableMemory and materialized as raw allocations after the lock is released.
         List<ByteBuffer> pooled = new ArrayList<>(numChunks);
 
         lock.lock();
@@ -95,8 +95,7 @@ public List<ByteBuffer> allocateChunks(int totalSize, long maxTimeToBlockMs) thr
                     pooled.add(free.pollFirst());
                 long remainingBytes = memoryRequired - (long) pooled.size() * chunkSize;
                 if (remainingBytes > 0) {
-                    // remainingBytes is bounded by memoryRequired ≤ totalMemory; the entry check
-                    // rejects requests larger than that, so the int cast is safe in practice.
+                    // remainingBytes <= memoryRequired <= totalMemory (validated above), so the int cast is safe.
                     freeUp((int) remainingBytes);
                     this.nonPooledAvailableMemory -= remainingBytes;
                 }
diff --git a/clients/src/main/java/org/apache/kafka/clients/producer/internals/ChunkedByteBufferOutputStream.java b/clients/src/main/java/org/apache/kafka/clients/producer/internals/ChunkedByteBufferOutputStream.java
index ccf1d3d7374c9..3b9a9ed42ff5e 100644
--- a/clients/src/main/java/org/apache/kafka/clients/producer/internals/ChunkedByteBufferOutputStream.java
+++ b/clients/src/main/java/org/apache/kafka/clients/producer/internals/ChunkedByteBufferOutputStream.java
@@ -32,10 +32,9 @@
  * chunks), {@link IllegalStateException} is thrown. The caller is responsible for attaching
  * additional chunks before any such write.
  * <p>
- * Automatic mid-write growth (allocating a chunk from inside {@code write()}) will be added
- * together with compression support in a follow-up: compressor buffering can cause a record's
- * write to exceed its reservation, which the caller can't predict in advance. Until then, the
- * write path stays allocation-free and deterministic.
+ * Automatic mid-write growth (allocating from inside {@code write()}) is a follow-up tied to
+ * compression support, where compressor buffering can make a write exceed its reservation; until
+ * then the write path stays allocation-free and deterministic.
  * <p>
  * When {@link #buffer()} is called (typically at batch close), all chunks are flattened into a
  * single contiguous ByteBuffer — exactly one copy operation.
@@ -110,11 +109,10 @@ private void ensureChunkCapacity(int needed) {
     }
 
     /**
-     * Advances {@code currentChunk} to the next pre-supplied chunk. The caller is responsible
-     * for obtaining additional chunks (e.g. {@code ChunkedBufferPool.allocateChunks}) and
-     * attaching them via {@link #addBuffers(List)} before any write that would exceed
-     * {@link #remaining()}. The KIP's mid-record growth path (non-blocking pool then direct
-     * heap) lands together with compression support.
+     * Advances {@code currentChunk} to the next pre-supplied chunk; throws if none is left. The
+     * caller must attach additional chunks via {@link #addBuffers(List)} before any write that
+     * exceeds {@link #remaining()}. (Mid-record growth — non-blocking pool then heap — is a
+     * follow-up that lands with compression support.)
      */
     private void advanceToNextChunk() {
         if (currentChunkIndex + 1 >= chunks.size()) {
@@ -140,6 +138,11 @@ public ByteBuffer buffer() {
         if (flattenedBuffer != null && !dirty) {
             return flattenedBuffer;
         }
+        // TODO: KAFKA-20687. This flatten runs at batch close, when the chunk set is final.
+        //  Today all chunks (used and unused) are returned to the pool only when the batch
+        //  completes (via deallocate(pool)). Consider releasing the fully-unused chunks
+        //  early, at close, rather than holding them until completion — removing them from
+        //  `chunks` here so the completion-time deallocate(pool) does not double-return them.
         int totalSize = 0;
         for (ByteBuffer chunk : chunks) {
             totalSize += chunk.position();
@@ -153,6 +156,12 @@ public ByteBuffer buffer() {
             chunk.position(chunkPos);
         }
         dirty = false;
+        // The bytes are now copied into flattenedBuffer, but we intentionally do not release the
+        //  chunks until batch completion, so the in-flight data stays reserved against the
+        //  buffer.memory budget (the pool's available memory reflects it), consistent with the
+        //  "full" strategy. Releasing here would return that memory to the pool while the bytes are
+        //  still in flight in the heap copy, letting the pool admit more than buffer.memory intends.
+        //  This flattening is an initial approach and will be removed with KAFKA-20580.
         return flattenedBuffer;
     }
 
@@ -219,15 +228,14 @@ public int initialCapacity() {
 
     @Override
     public void ensureRemaining(int remainingBytesRequired) {
-        // The stream advances one chunk at a time — the most any single ensureChunkCapacity
-        // call can guarantee is `chunkSize` of contiguous-by-chunk space. Callers needing more
-        // than that are expected to attach chunks via addBuffers before writing; write(byte[])
-        // itself loops across chunks so contiguous capacity isn't required.
+        // A single call can guarantee at most `chunkSize` of space (the stream advances one chunk
+        // at a time); callers needing more attach chunks via addBuffers first. write(byte[]) loops
+        // across chunks, so contiguous capacity isn't required.
         ensureChunkCapacity(Math.min(remainingBytesRequired, chunkSize));
     }
 
     /**
-     * Returns all pool-allocated chunks to the buffer pool.
+     * Returns all pool-allocated chunks to the buffer pool. Called at batch completion.
      */
     public void deallocate(BufferPool pool) {
         if (pool != null) {
diff --git a/clients/src/main/java/org/apache/kafka/clients/producer/internals/ChunkedProducerBatch.java b/clients/src/main/java/org/apache/kafka/clients/producer/internals/ChunkedProducerBatch.java
new file mode 100644
index 0000000000000..e80ed7861826d
--- /dev/null
+++ b/clients/src/main/java/org/apache/kafka/clients/producer/internals/ChunkedProducerBatch.java
@@ -0,0 +1,94 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.kafka.clients.producer.internals;
+
+import org.apache.kafka.common.TopicPartition;
+import org.apache.kafka.common.header.Header;
+import org.apache.kafka.common.record.internal.MemoryRecordsBuilder;
+import org.apache.kafka.common.utils.ByteBufferOutputStream;
+
+import java.nio.ByteBuffer;
+import java.util.List;
+
+/**
+ * A {@link ProducerBatch} for the incremental (chunked) buffer.memory allocation strategy, backed
+ * by a {@link MemoryRecordsBuilder} whose stream is a {@link ChunkedByteBufferOutputStream}.
+ * It adds mid-batch chunk extension support ({@link #extensionBytesNeeded} /
+ * {@link #addBuffers}) and overrides the pool deallocation hooks so all chunks are returned to
+ * the pool rather than a single buffer.
+ * <p>
+ * This class is not thread safe and external synchronization must be used when modifying it.
+ */
+public class ChunkedProducerBatch extends ProducerBatch {
+
+    // The parent's builder reference is private; keep our own to access stream capacity state.
+    private final MemoryRecordsBuilder recordsBuilder;
+
+    public ChunkedProducerBatch(TopicPartition tp, MemoryRecordsBuilder recordsBuilder, long createdMs) {
+        super(tp, recordsBuilder, createdMs);
+        this.recordsBuilder = recordsBuilder;
+    }
+
+    /**
+     * Bytes of physical buffer this batch needs before {@code tryAppend} could accept the given
+     * record. Returns 0 when the record fits (either logically full, or the stream's combined
+     * chunk capacity has room). Positive when {@code hasRoomFor} allows the record but the
+     * chunks lack physical capacity — the accumulator allocates exactly the missing bytes
+     * (rounded up to whole chunks) and attaches them via {@link #addBuffers} before retrying.
+     */
+    int extensionBytesNeeded(long timestamp, byte[] key, byte[] value, Header[] headers) {
+        if (recordCount == 0)
+            return 0;
+        if (!recordsBuilder.hasRoomFor(timestamp, key, value, headers))
+            return 0;
+        // Size against the batch's projected total output after this record (header counted once,
+        // ratio-adjusted when compressed), not per-record. Per-record sizing would over-count the
+        // header and miss the compressor's flush-accumulation behavior.
+        int target = recordsBuilder.estimatedBytesWrittenAfter(key, value, headers);
+        ByteBufferOutputStream stream = recordsBuilder.bufferStream();
+        int totalAttachedCapacity = stream.position() + stream.remaining();
+        return Math.max(0, target - totalAttachedCapacity);
+    }
+
+    /**
+     * Attach pre-allocated chunks to this batch's stream so the next {@code tryAppend} can
+     * spill into them. Ownership of the chunks transfers to the stream.
+     */
+    void addBuffers(List<ByteBuffer> chunks) {
+        stream().addBuffers(chunks);
+    }
+
+    @Override
+    protected void deallocateBuffer(BufferPool pool) {
+        stream().deallocate(pool);
+    }
+
+    /**
+     * Unlike the single-buffer batch — which must donate a fresh buffer because the network
+     * layer may still be reading the pooled one — a chunked batch's inflight bytes live in the
+     * separate flattened buffer (see {@link ChunkedByteBufferOutputStream#buffer()}), so it is
+     * safe to return the actual chunks to the pool here.
+     */
+    @Override
+    protected void deallocateInflightBuffer(BufferPool pool) {
+        stream().deallocate(pool);
+    }
+
+    private ChunkedByteBufferOutputStream stream() {
+        return (ChunkedByteBufferOutputStream) recordsBuilder.bufferStream();
+    }
+}
diff --git a/clients/src/main/java/org/apache/kafka/clients/producer/internals/ChunkedRecordAccumulator.java b/clients/src/main/java/org/apache/kafka/clients/producer/internals/ChunkedRecordAccumulator.java
index cac18c608d195..a587754d80c44 100644
--- a/clients/src/main/java/org/apache/kafka/clients/producer/internals/ChunkedRecordAccumulator.java
+++ b/clients/src/main/java/org/apache/kafka/clients/producer/internals/ChunkedRecordAccumulator.java
@@ -17,13 +17,17 @@
 package org.apache.kafka.clients.producer.internals;
 
 import org.apache.kafka.clients.producer.BufferExhaustedException;
+import org.apache.kafka.clients.producer.Callback;
 import org.apache.kafka.clients.producer.RecordMetadata;
 import org.apache.kafka.common.Cluster;
+import org.apache.kafka.common.KafkaException;
+import org.apache.kafka.common.TopicPartition;
 import org.apache.kafka.common.compress.Compression;
 import org.apache.kafka.common.header.Header;
 import org.apache.kafka.common.metrics.Metrics;
 import org.apache.kafka.common.record.TimestampType;
 import org.apache.kafka.common.record.internal.AbstractRecords;
+import org.apache.kafka.common.record.internal.CompressionRatioEstimator;
 import org.apache.kafka.common.record.internal.CompressionType;
 import org.apache.kafka.common.record.internal.MemoryRecordsBuilder;
 import org.apache.kafka.common.record.internal.Record;
@@ -40,24 +44,31 @@
  * A {@link RecordAccumulator} variant that uses chunked buffer allocation: each new batch
  * pre-allocates {@code ceil(recordSize / chunkSize)} chunks from a {@link ChunkedBufferPool},
  * and grows mid-batch by attaching additional chunks via
- * {@link ProducerBatch#addBuffers(List)} when later records would overflow.
+ * {@link ChunkedProducerBatch#addBuffers(List)} when later records would overflow.
  * <p>
- * This is the "dynamic" buffer.memory allocation strategy: memory consumption scales with
+ * This is the "incremental" buffer.memory allocation strategy: memory consumption scales with
  * actual data buffered, not with {@code active_partition_count × batch.size}.
  * <p>
- * <b>PR1 scope:</b> uncompressed only. Batch creation throws
- * {@link UnsupportedOperationException} if compression is requested. The mid-batch flow
- * matches the KIP: try a non-blocking pool acquire for the extension chunks; on
- * {@link BufferExhaustedException}, close the existing batch (so the sender can drain it)
- * and let the new record flow through the first-record path, which blocks on the pool for
- * a fresh batch's chunks up to {@code max.block.ms}.
+ * When a record arrives mid-batch (an open batch already exists) and the batch's chunks are
+ * full, the flow is: under the deque lock, probe that open batch — the tail of the partition's
+ * deque — via the {@link #tryAppend} override, which returns
+ * {@link RecordAppendResult#needsExtension(int)} when the batch has logical room but lacks
+ * physical chunk capacity, then attempt a <b>non-blocking</b> pool acquire for the extra chunks. If the pool is exhausted, close the current batch (so the sender can drain it)
+ * and route the record through the first-record path, which <b>blocks</b> on the pool up to
+ * {@code max.block.ms}. As a result, unlike the "full" strategy, {@code send()} may block (or
+ * fail with {@link BufferExhaustedException}) on records other than the first of a batch.
  * <p>
- * TODO: add the new buffer.memory.allocation.strategy config and instantiate this class only when set to "dynamic"
- * TODO: add support for compressed data. Throws UnsupportedOperationException for now,
- *   and IllegalStateException if overflow detected. Follow-ups will support compressed data and growth.
+ * TODO: support compressed data (with mid-record growth); the constructor rejects compression for now.
  */
 public class ChunkedRecordAccumulator extends RecordAccumulator {
 
+    /**
+     * Fixed size of every chunk, independent of {@code batch.size}. The incremental strategy is
+     * only used when {@code batch.size >= CHUNK_SIZE} (see {@code KafkaProducer}); below it a batch
+     * is smaller than a single chunk, so the producer uses the full strategy instead.
+     */
+    public static final int CHUNK_SIZE = 16 * 1024;
+
     private final ChunkedBufferPool chunkedFree;
 
     public ChunkedRecordAccumulator(LogContext logContext,
@@ -75,6 +86,11 @@ public ChunkedRecordAccumulator(LogContext logContext,
                                     ChunkedBufferPool bufferPool) {
         super(logContext, batchSize, compression, lingerMs, retryBackoffMs, retryBackoffMaxMs,
                 deliveryTimeoutMs, partitionerConfig, metrics, metricGrpName, time, transactionManager, bufferPool);
+        // TODO: drop this once the incremental strategy supports compressed data (with the
+        //   mid-record growth fallback for compressor overshoot).
+        if (compression.type() != CompressionType.NONE)
+            throw new UnsupportedOperationException(
+                    "Compression is not yet supported with the incremental buffer.memory allocation strategy");
         this.chunkedFree = bufferPool;
     }
 
@@ -132,31 +148,25 @@ public RecordAppendResult append(String topic,
                     if (partitionChanged(topic, topicInfo, partitionInfo, dq, nowMs, cluster))
                         continue;
 
-                    // Probe the tail directly. If the existing batch has logical room but the
-                    // underlying stream lacks physical capacity, capture the gap and fall through
-                    // to allocate the extension outside the deque lock. Otherwise, attempt the
-                    // append against the existing batch.
-                    ProducerBatch tail = dq.peekLast();
-                    int gap = tail == null ? 0 : tail.extensionBytesNeeded(timestamp, key, value, headers);
-                    if (gap > 0) {
-                        extensionBytes = gap;
-                    } else {
-                        extensionBytes = 0;  // clear any stale value carried from a prior iteration
-                        RecordAppendResult appendResult = tryAppend(timestamp, key, value, headers, callbacks, dq, nowMs);
-                        if (appendResult != null) {
-                            boolean enableSwitch = allBatchesFull(dq);
-                            topicInfo.builtInPartitioner.updatePartitionInfo(partitionInfo, appendResult.appendedBytes, cluster, enableSwitch);
-                            return appendResult;
-                        }
+                    // The tryAppend override probes the physical capacity of the tail batch
+                    // (dq.peekLast(), the partition's open batch): a needsBufferExtension result
+                    // means it has logical room but its chunks lack space for this record — fall
+                    // through to allocate the gap outside the deque lock. A null result means the
+                    // tail batch is full or absent — fall through to the first-record (new batch) path.
+                    RecordAppendResult appendResult = tryAppend(timestamp, key, value, headers, callbacks, dq, nowMs);
+                    if (appendResult != null && !appendResult.needsBufferExtension) {
+                        boolean enableSwitch = allBatchesFull(dq);
+                        topicInfo.builtInPartitioner.updatePartitionInfo(partitionInfo, appendResult.appendedBytes, cluster, enableSwitch);
+                        return appendResult;
                     }
+                    extensionBytes = appendResult == null ? 0 : appendResult.extensionBytesNeeded;
                 }
 
                 if (extensionBytes > 0 && extensionChunks == null) {
-                    // Mid-batch extension: non-blocking attempt only. The producer thread already
-                    // holds chunks for the open batch; blocking here risks deadlock with the Sender
-                    // thread (which frees pool memory by completing batches). If the pool is
-                    // exhausted, we close the existing batch (so it becomes drainable) and fall
-                    // through to the first-record blocking path on the next loop iteration.
+                    // Mid-batch extension: non-blocking only. The thread already holds the open
+                    // batch's chunks, so blocking here could deadlock with the Sender (which frees
+                    // pool memory by completing batches). On exhaustion, close the batch (making it
+                    // drainable) and fall through to the blocking first-record path next iteration.
                     try {
                         extensionChunks = chunkedFree.allocateChunks(extensionBytes, 0L);
                     } catch (BufferExhaustedException e) {
@@ -168,21 +178,19 @@ public RecordAppendResult append(String topic,
                                 last.closeForRecordAppends();
                             }
                         }
-                        extensionBytes = 0;
                         continue;
                     }
                     nowMs = time.milliseconds();
                 } else if (extensionBytes == 0 && bufferStream == null) {
-                    // First-record path: block on the pool for enough chunks to fit this record.
-                    // Temporary while compressed data not supported.
-                    // TODO: drop this throw and route through the
-                    //   compressed path (which adds the mid-record growth fallback for compressor overshoot).
-                    if (compression.type() != CompressionType.NONE) {
-                        throw new UnsupportedOperationException(
-                                "Compression is not implemented yet for the dynamic buffer.memory allocation strategy");
-                    }
-                    int size = AbstractRecords.estimateSizeInBytesUpperBound(
+                    // First-record path: block on the pool for enough chunks to fit this record,
+                    // sized with the same cumulative estimator used mid-batch (header + record bytes
+                    // for NONE, ratio-adjusted when compressed) so the two stay consistent.
+                    int recordUncompressed = AbstractRecords.recordSizeUpperBound(
                             RecordBatch.CURRENT_MAGIC_VALUE, compression.type(), key, value, headers);
+                    int size = MemoryRecordsBuilder.estimatedBytesWritten(
+                            RecordBatch.CURRENT_MAGIC_VALUE, compression.type(),
+                            CompressionRatioEstimator.estimation(topic, compression.type()),
+                            recordUncompressed);
                     log.trace("Allocating {} byte chunked buffer ({} byte chunks) for topic {} partition {} with remaining timeout {}ms",
                             size, chunkedFree.poolableSize(), topic, effectivePartition, maxTimeToBlock);
                     List<ByteBuffer> initialChunks = chunkedFree.allocateChunks(size, maxTimeToBlock);
@@ -196,67 +204,48 @@ public RecordAppendResult append(String topic,
 
                     if (extensionChunks != null) {
                         ProducerBatch last = dq.peekLast();
-                        // The off-lock allocateChunks window allows the original tail batch to be
-                        // drained and a split batch (plain ByteBufferOutputStream) to be addFirst'd
-                        // into an emptied deque — see splitAndReenqueue. The instanceof guard below
-                        // is load-bearing: addBuffers is only defined on ChunkedByteBufferOutputStream;
-                        // the else branch returns the chunks to the pool to avoid leaking them.
-                        if (last != null && last.isWritable()
-                                && last.bufferStream() instanceof ChunkedByteBufferOutputStream) {
-                            ((ChunkedByteBufferOutputStream) last.bufferStream()).addBuffers(extensionChunks);
+                        // The off-lock allocateChunks window allows the probed tail batch to be
+                        // drained and replaced — possibly by a split batch (a plain
+                        // ProducerBatch), which can't take extension chunks. Only attach to a
+                        // writable chunked tail; otherwise refund the chunks and re-evaluate.
+                        if (last instanceof ChunkedProducerBatch && last.isWritable()) {
+                            ((ChunkedProducerBatch) last).addBuffers(extensionChunks);
                             extensionChunks = null;
-                            extensionBytes = 0;
-                            // Re-probe after attach: between our first-lock probe and now we were off-lock,
-                            // and another appender (or a tail-replacement) may have consumed capacity that
-                            // our extension allocation was sized against. If a gap remains, loop to acquire
-                            // more chunks against the now-current state. Without this re-probe, a
-                            // tryAppend that passes hasRoomFor (writeLimit) but exceeds the stream's
-                            // physical remaining() trips IllegalStateException in
-                            // ChunkedByteBufferOutputStream.advanceToNextChunk. Termination: writeLimit is
-                            // bounded and fixed; once hasRoomFor turns false, extensionBytesNeeded returns 0
-                            // and we fall through to new-batch creation. Regression test:
-                            // testConcurrentExtensionRaceDoesNotOverflowChunkedStream.
-                            int recheck = last.extensionBytesNeeded(timestamp, key, value, headers);
-                            if (recheck > 0) {
-                                extensionBytes = recheck;
-                                continue;
-                            }
                             RecordAppendResult retryResult = tryAppend(timestamp, key, value, headers, callbacks, dq, nowMs);
-                            if (retryResult != null) {
+                            if (retryResult != null && !retryResult.needsBufferExtension) {
                                 boolean enableSwitch = allBatchesFull(dq);
                                 topicInfo.builtInPartitioner.updatePartitionInfo(partitionInfo, retryResult.appendedBytes, cluster, enableSwitch);
                                 return retryResult;
                             }
-                            // Batch became full via writeLimit while we were off-lock — fall through
-                            // to new-batch creation on the next iteration.
+                            // needsBufferExtension: a concurrent appender consumed capacity our
+                            // extension was sized against — loop to re-probe. null: batch became
+                            // full — loop into new-batch creation. Terminates because writeLimit is
+                            // fixed: once full, the probe stops requesting extension. Regression:
+                            // testConcurrentExtensionRaceDoesNotOverflowChunkedStream.
                             continue;
                         }
                         // Tail is gone, closed, or non-chunked (e.g., a split batch). Return chunks to pool.
                         for (ByteBuffer chunk : extensionChunks)
                             chunkedFree.deallocate(chunk);
                         extensionChunks = null;
-                        extensionBytes = 0;
-                        continue;
-                    }
-
-                    // bufferStream is non-null here (first-record path). Before creating a new
-                    // batch with it, re-probe the tail: a concurrent appender may have created a
-                    // chunked batch we should extend instead. If so, release the oversized
-                    // bufferStream and let the next iteration take the extension path.
-                    ProducerBatch tail = dq.peekLast();
-                    if (tail != null && tail.isWritable()
-                            && tail.bufferStream() instanceof ChunkedByteBufferOutputStream
-                            && tail.extensionBytesNeeded(timestamp, key, value, headers) > 0) {
-                        bufferStream.deallocate();
-                        bufferStream = null;
                         continue;
                     }
 
+                    // bufferStream is non-null here (first-record path).
                     int firstRecordSize = AbstractRecords.estimateSizeInBytesUpperBound(
                             RecordBatch.CURRENT_MAGIC_VALUE, compression.type(), key, value, headers);
                     final ChunkedByteBufferOutputStream batchStream = bufferStream;
                     RecordAppendResult appendResult = appendNewBatch(topic, effectivePartition, dq, timestamp, key, value, headers, callbacks,
                             () -> chunkedRecordsBuilder(batchStream, firstRecordSize), nowMs);
+                    if (appendResult.needsBufferExtension) {
+                        // A concurrent appender created a tail batch we should extend rather
+                        // than start a new one (detected by appendNewBatch's in-lock tryAppend).
+                        // Our bufferStream was sized for a fresh batch — release it and loop so
+                        // the extension path allocates exactly the gap-sized chunks.
+                        bufferStream.deallocate();
+                        bufferStream = null;
+                        continue;
+                    }
                     if (appendResult.newBatchCreated)
                         bufferStream = null;
                     boolean enableSwitch = allBatchesFull(dq);
@@ -276,61 +265,46 @@ public RecordAppendResult append(String topic,
     }
 
     /**
-     * Build a {@link MemoryRecordsBuilder} backed by the chunked stream.
-     * {@code writeLimit} is set to {@code max(batchSize, firstRecordSize)} — the same logical
-     * cap the legacy path uses. Physical chunk capacity grows on demand via
-     * {@link ChunkedByteBufferOutputStream#addBuffers(List)}; {@code writeLimit} bounds when a
-     * batch is considered logically full and a new one must be started.
-     */
-    private MemoryRecordsBuilder chunkedRecordsBuilder(ChunkedByteBufferOutputStream bufferStream,
-                                                       int firstRecordSize) {
-        int writeLimit = Math.max(batchSize, firstRecordSize);
-        return new MemoryRecordsBuilder(bufferStream, RecordBatch.CURRENT_MAGIC_VALUE, compression,
-                TimestampType.CREATE_TIME, 0L, RecordBatch.NO_TIMESTAMP, RecordBatch.NO_PRODUCER_ID,
-                RecordBatch.NO_PRODUCER_EPOCH, RecordBatch.NO_SEQUENCE, false, false,
-                RecordBatch.NO_PARTITION_LEADER_EPOCH, writeLimit);
-    }
-
-    /**
-     * Chunked-aware deallocation for {@link ProducerBatch}.
+     * Try to append to a ProducerBatch, with mid-batch chunk extension support.
      * <p>
-     * Intercepts the {@link #isInflight()} branch of the parent's
-     * {@link RecordAccumulator#deallocate(ProducerBatch)} when the batch is backed by a
-     * {@link ChunkedByteBufferOutputStream}. The parent's defensive path credits only
-     * {@code initialCapacity} (one {@code chunkSize}) back to the pool and throws — that's the
-     * KAFKA-19012-aware safety net for the legacy single-buffer path, where the in-flight
-     * buffer <i>is</i> the pooled buffer. A chunked batch holds K chunks but the in-flight
-     * network bytes live in a separate flattened heap buffer (see
-     * {@link ChunkedByteBufferOutputStream#buffer()}), so it is safe to return all K chunks
-     * to the pool here. The {@code IllegalStateException} is still propagated to preserve the
-     * contract that deallocating an inflight batch is unexpected upstream.
+     * If the tail batch has logical room (writeLimit-wise) but its chunked stream lacks
+     * physical capacity, returns {@link RecordAppendResult#needsExtension(int)} without
+     * attempting the append; the caller allocates chunks outside the deque lock, attaches
+     * them, and retries. Otherwise defers to the parent.
      */
     @Override
-    public void deallocate(ProducerBatch batch) {
-        if (!batch.isSplitBatch()
-                && !batch.isBufferDeallocated()
-                && batch.isInflight()
-                && batch.bufferStream() instanceof ChunkedByteBufferOutputStream) {
-            batch.markBufferDeallocated();
-            deallocateBatchBuffer(batch);
-            throw new IllegalStateException("Attempting to deallocate a batch that is inflight. Batch is " + batch);
+    protected RecordAppendResult tryAppend(long timestamp, byte[] key, byte[] value, Header[] headers,
+                                           Callback callback, Deque<ProducerBatch> deque, long nowMs) {
+        if (closed)
+            throw new KafkaException("Producer closed while send in progress");
+        ProducerBatch last = deque.peekLast();
+        // Split batches in an incremental deque are plain ProducerBatch (heap-backed, grow-on-demand)
+        // and never need chunk extension, so the probe only applies to chunked batches.
+        if (last instanceof ChunkedProducerBatch) {
+            int extensionBytes = ((ChunkedProducerBatch) last).extensionBytesNeeded(timestamp, key, value, headers);
+            if (extensionBytes > 0)
+                return RecordAppendResult.needsExtension(extensionBytes);
         }
-        super.deallocate(batch);
+        return super.tryAppend(timestamp, key, value, headers, callback, deque, nowMs);
+    }
+
+    @Override
+    protected ProducerBatch createProducerBatch(TopicPartition tp, MemoryRecordsBuilder recordsBuilder, long nowMs) {
+        return new ChunkedProducerBatch(tp, recordsBuilder, nowMs);
     }
 
     /**
-     * Route chunked batch deallocation through the stream rather than the legacy
-     * single-buffer pool path. The flattened buffer returned by {@code batch.buffer()} is a
-     * fresh heap allocation and must NOT be passed to the pool — instead, the stream owns
-     * the chunk-sized buffers and returns them to the pool.
+     * Build a {@link MemoryRecordsBuilder} backed by the chunked stream. {@code writeLimit} =
+     * {@code max(batchSize, firstRecordSize)} (the same logical cap as the full path) bounds when
+     * the batch is full; physical capacity grows on demand via
+     * {@link ChunkedByteBufferOutputStream#addBuffers(List)}.
      */
-    @Override
-    protected void deallocateBatchBuffer(ProducerBatch batch) {
-        if (batch.bufferStream() instanceof ChunkedByteBufferOutputStream) {
-            ((ChunkedByteBufferOutputStream) batch.bufferStream()).deallocate(chunkedFree);
-        } else {
-            // Split batches and any non-chunked batches go through the default deallocation.
-            super.deallocateBatchBuffer(batch);
-        }
+    private MemoryRecordsBuilder chunkedRecordsBuilder(ChunkedByteBufferOutputStream bufferStream,
+                                                       int firstRecordSize) {
+        int writeLimit = Math.max(batchSize, firstRecordSize);
+        return new MemoryRecordsBuilder(bufferStream, RecordBatch.CURRENT_MAGIC_VALUE, compression,
+                TimestampType.CREATE_TIME, 0L, RecordBatch.NO_TIMESTAMP, RecordBatch.NO_PRODUCER_ID,
+                RecordBatch.NO_PRODUCER_EPOCH, RecordBatch.NO_SEQUENCE, false, false,
+                RecordBatch.NO_PARTITION_LEADER_EPOCH, writeLimit);
     }
 }
diff --git a/clients/src/main/java/org/apache/kafka/clients/producer/internals/ProducerBatch.java b/clients/src/main/java/org/apache/kafka/clients/producer/internals/ProducerBatch.java
index a7e47583d310b..6adac7a24424d 100644
--- a/clients/src/main/java/org/apache/kafka/clients/producer/internals/ProducerBatch.java
+++ b/clients/src/main/java/org/apache/kafka/clients/producer/internals/ProducerBatch.java
@@ -31,7 +31,6 @@
 import org.apache.kafka.common.record.internal.Record;
 import org.apache.kafka.common.record.internal.RecordBatch;
 import org.apache.kafka.common.requests.ProduceResponse;
-import org.apache.kafka.common.utils.ByteBufferOutputStream;
 import org.apache.kafka.common.utils.ProducerIdAndEpoch;
 import org.apache.kafka.common.utils.Time;
 
@@ -58,7 +57,7 @@
  *
  * This class is not thread safe and external synchronization must be used when modifying it
  */
-public final class ProducerBatch {
+public class ProducerBatch {
 
     private static final Logger log = LoggerFactory.getLogger(ProducerBatch.class);
 
@@ -141,35 +140,22 @@ boolean hasLeaderChangedForTheOngoingRetry() {
 
 
     /**
-     * Bytes of physical buffer this batch needs before {@code tryAppend} could accept the given
-     * record. Returns 0 when the record fits (either logically full, or stream has room).
-     * Positive when {@code hasRoomFor} allows the record but the underlying stream lacks
-     * physical capacity — used by the dynamic strategy to allocate exactly the missing bytes
-     * before retrying.
-     * <p>
-     * For batches with the default {@link ByteBufferOutputStream} (legacy path), the stream's
-     * {@code remaining()} is large enough that this almost always returns 0.
+     * Return this batch's buffer memory to the pool. The default single-buffer batch returns
+     * the pooled buffer at its initial capacity; {@link ChunkedProducerBatch} overrides this
+     * to return all of its chunks.
      */
-    int extensionBytesNeeded(long timestamp, byte[] key, byte[] value, Header[] headers) {
-        if (recordCount == 0)
-            return 0;
-        if (!recordsBuilder.hasRoomFor(timestamp, key, value, headers))
-            return 0;
-        // Upper-bound estimate is fine: over-counting by header bytes only over-allocates by a
-        // small fraction of one chunk. Under-estimation would be unsafe.
-        int recordSize = AbstractRecords.estimateSizeInBytesUpperBound(
-                magic(), recordsBuilder.compression().type(), key, value, headers);
-        int gap = recordSize - recordsBuilder.bufferStream().remaining();
-        return Math.max(0, gap);
+    protected void deallocateBuffer(BufferPool pool) {
+        pool.deallocate(buffer(), initialCapacity());
     }
 
     /**
-     * The batch's underlying output stream. Exposed for the dynamic strategy to attach
-     * extension chunks and route deallocation through the chunked stream rather than the
-     * standard {@code BufferPool.deallocate(ByteBuffer, int)} path.
+     * Credit this batch's memory back to the pool when it is unexpectedly still inflight
+     * (KAFKA-19012): the buffer can't be touched (the network may still be reading it), so the
+     * default donates a fresh same-capacity buffer. {@link ChunkedProducerBatch} instead returns
+     * the actual chunks (safe — inflight bytes live in the separate flattened buffer).
      */
-    ByteBufferOutputStream bufferStream() {
-        return recordsBuilder.bufferStream();
+    protected void deallocateInflightBuffer(BufferPool pool) {
+        pool.deallocate(ByteBuffer.allocate(initialCapacity()));
     }
 
     /**
diff --git a/clients/src/main/java/org/apache/kafka/clients/producer/internals/RecordAccumulator.java b/clients/src/main/java/org/apache/kafka/clients/producer/internals/RecordAccumulator.java
index 29ddd07a489bc..7bb4243e24f9d 100644
--- a/clients/src/main/java/org/apache/kafka/clients/producer/internals/RecordAccumulator.java
+++ b/clients/src/main/java/org/apache/kafka/clients/producer/internals/RecordAccumulator.java
@@ -398,12 +398,15 @@ protected RecordAppendResult appendNewBatch(String topic,
 
         RecordAppendResult appendResult = tryAppend(timestamp, key, value, headers, callbacks, dq, nowMs);
         if (appendResult != null) {
-            // Somebody else found us a batch, return the one we waited for! Hopefully this doesn't happen often...
+            // Propagate without creating a new batch: either another thread already made us a batch
+            // (success), or — incremental strategy — a concurrent appender created an extendable tail
+            // (needsBufferExtension), so the caller releases its pre-allocated buffer and retries via
+            // the extension path.
             return appendResult;
         }
 
         MemoryRecordsBuilder recordsBuilder = recordsBuilderSupplier.get();
-        ProducerBatch batch = new ProducerBatch(new TopicPartition(topic, partition), recordsBuilder, nowMs);
+        ProducerBatch batch = createProducerBatch(new TopicPartition(topic, partition), recordsBuilder, nowMs);
         FutureRecordMetadata future = Objects.requireNonNull(batch.tryAppend(timestamp, key, value, headers,
                 callbacks, nowMs));
 
@@ -413,6 +416,14 @@ protected RecordAppendResult appendNewBatch(String topic,
         return new RecordAppendResult(future, dq.size() > 1 || batch.isFull(), true, batch.estimatedSizeInBytes());
     }
 
+    /**
+     * Create the {@link ProducerBatch} for a new batch. The incremental strategy overrides this to
+     * create a {@link ChunkedProducerBatch}.
+     */
+    protected ProducerBatch createProducerBatch(TopicPartition tp, MemoryRecordsBuilder recordsBuilder, long nowMs) {
+        return new ProducerBatch(tp, recordsBuilder, nowMs);
+    }
+
     /**
      * Check if all batches in the queue are full.
      */
@@ -1057,23 +1068,16 @@ public void deallocate(ProducerBatch batch) {
             } else {
                 batch.markBufferDeallocated();
                 if (batch.isInflight()) {
-                    // Create a fresh ByteBuffer to give to BufferPool to reuse since we can't safely call deallocate with the ProduceBatch's buffer
-                    free.deallocate(ByteBuffer.allocate(batch.initialCapacity()));
+                    // We can't safely deallocate the buffer of an inflight batch; the batch credits
+                    // the memory back to the pool in a way that suits its buffer type.
+                    batch.deallocateInflightBuffer(free);
                     throw new IllegalStateException("Attempting to deallocate a batch that is inflight. Batch is " + batch);
                 }
-                deallocateBatchBuffer(batch);
+                batch.deallocateBuffer(free);
             }
         }
     }
 
-    /**
-     * Return the batch's underlying buffer to the pool.
-     * This default implementation returns the buffer at its initial capacity (single buffer).
-     */
-    protected void deallocateBatchBuffer(ProducerBatch batch) {
-        free.deallocate(batch.buffer(), batch.initialCapacity());
-    }
-
     /**
      * Remove from the incomplete list but do not free memory yet
      */
@@ -1267,17 +1271,41 @@ public static final class RecordAppendResult {
         public final FutureRecordMetadata future;
         public final boolean batchIsFull;
         public final boolean newBatchCreated;
+        /**
+         * Signal (incremental strategy) that the tail batch has logical room (writeLimit-wise) but
+         * its chunks lack physical capacity. The append was NOT attempted; the caller allocates
+         * {@link #extensionBytesNeeded} bytes of chunk capacity, attaches them via
+         * {@link ChunkedProducerBatch#addBuffers}, and retries. When {@code true}, {@code future} is null.
+         */
+        public final boolean needsBufferExtension;
+        public final int extensionBytesNeeded;
         public final int appendedBytes;
 
         public RecordAppendResult(FutureRecordMetadata future,
                                   boolean batchIsFull,
                                   boolean newBatchCreated,
                                   int appendedBytes) {
+            this(future, batchIsFull, newBatchCreated, false, 0, appendedBytes);
+        }
+
+        private RecordAppendResult(FutureRecordMetadata future,
+                                   boolean batchIsFull,
+                                   boolean newBatchCreated,
+                                   boolean needsBufferExtension,
+                                   int extensionBytesNeeded,
+                                   int appendedBytes) {
             this.future = future;
             this.batchIsFull = batchIsFull;
             this.newBatchCreated = newBatchCreated;
+            this.needsBufferExtension = needsBufferExtension;
+            this.extensionBytesNeeded = extensionBytesNeeded;
             this.appendedBytes = appendedBytes;
         }
+
+        /** A signal-only result indicating the caller must allocate more chunk capacity. */
+        static RecordAppendResult needsExtension(int extensionBytesNeeded) {
+            return new RecordAppendResult(null, false, false, true, extensionBytesNeeded, 0);
+        }
     }
 
     /*
diff --git a/clients/src/main/java/org/apache/kafka/common/record/internal/AbstractRecords.java b/clients/src/main/java/org/apache/kafka/common/record/internal/AbstractRecords.java
index d0704d18ec37a..d8eb6c4dd0be7 100644
--- a/clients/src/main/java/org/apache/kafka/common/record/internal/AbstractRecords.java
+++ b/clients/src/main/java/org/apache/kafka/common/record/internal/AbstractRecords.java
@@ -143,6 +143,31 @@ else if (compressionType != CompressionType.NONE)
             return Records.LOG_OVERHEAD + LegacyRecord.recordSize(magic, key, value);
     }
 
+    /**
+     * Upper-bound size of one record's contribution to a batch, <em>excluding</em> the batch header.
+     * For magic v2 it's {@link #estimateSizeInBytesUpperBound} minus
+     * {@link #recordBatchHeaderSizeInBytes}; for older magic it equals
+     * {@link #estimateSizeInBytesUpperBound}. Used by the incremental strategy's cumulative sizing,
+     * where the header is counted once via {@link MemoryRecordsBuilder#estimatedBytesWritten}.
+     */
+    public static int recordSizeUpperBound(byte magic, CompressionType compressionType, byte[] key,
+                                           byte[] value, Header[] headers) {
+        return recordSizeUpperBound(magic, compressionType, Utils.wrapNullable(key), Utils.wrapNullable(value), headers);
+    }
+
+    /**
+     * @see #recordSizeUpperBound(byte, CompressionType, byte[], byte[], Header[])
+     */
+    private static int recordSizeUpperBound(byte magic, CompressionType compressionType, ByteBuffer key,
+                                            ByteBuffer value, Header[] headers) {
+        if (magic >= RecordBatch.MAGIC_VALUE_V2)
+            return DefaultRecord.recordSizeUpperBound(key, value, headers);
+        else if (compressionType != CompressionType.NONE)
+            return Records.LOG_OVERHEAD + LegacyRecord.recordOverhead(magic) + LegacyRecord.recordSize(magic, key, value);
+        else
+            return Records.LOG_OVERHEAD + LegacyRecord.recordSize(magic, key, value);
+    }
+
     /**
      * Return the size of the record batch header.
      *
diff --git a/clients/src/main/java/org/apache/kafka/common/record/internal/MemoryRecordsBuilder.java b/clients/src/main/java/org/apache/kafka/common/record/internal/MemoryRecordsBuilder.java
index 47a955cfcf18e..2bf48518b2d6f 100644
--- a/clients/src/main/java/org/apache/kafka/common/record/internal/MemoryRecordsBuilder.java
+++ b/clients/src/main/java/org/apache/kafka/common/record/internal/MemoryRecordsBuilder.java
@@ -212,7 +212,7 @@ public int initialCapacity() {
     }
 
     /**
-     * The underlying output stream. Exposed so the dynamic-strategy callers can attach
+     * The underlying output stream. Exposed so the incremental-strategy callers can attach
      * pre-allocated chunks to a {@code ChunkedByteBufferOutputStream} and route deallocation
      * polymorphically.
      */
@@ -824,12 +824,52 @@ private void ensureOpenForRecordBatchWrite() {
      * @return The estimated number of bytes written
      */
     private int estimatedBytesWritten() {
-        if (compression.type() == CompressionType.NONE) {
+        return estimatedBytesWritten(magic, compression.type(), estimatedCompressionRatio, uncompressedRecordsSizeInBytes);
+    }
+
+    /**
+     * Static projection of the bytes a builder with the given magic, compression type, and ratio
+     * would have written for {@code uncompressedRecordsSizeInBytes} of records. Exact for
+     * uncompressed ({@code header + uncompressedBytes}); for compressed it applies the ratio and 5%
+     * safety multiplier to the whole total, like the per-builder {@link #estimatedBytesWritten()}.
+     * <p>
+     * Used by the incremental strategy to size chunk reservations (first record, and mid-batch via
+     * {@link #estimatedBytesWrittenAfter}). This is the batch's projected total <em>output</em>,
+     * distinct from the {@link #hasRoomFor} writeLimit gate, which counts the incoming record at
+     * full uncompressed size; the two coincide for uncompressed data and diverge under compression.
+     */
+    public static int estimatedBytesWritten(byte magic, CompressionType compressionType,
+                                            float compressionRatio,
+                                            int uncompressedRecordsSizeInBytes) {
+        int batchHeaderSizeInBytes = AbstractRecords.recordBatchHeaderSizeInBytes(magic, compressionType);
+        if (compressionType == CompressionType.NONE) {
             return batchHeaderSizeInBytes + uncompressedRecordsSizeInBytes;
         } else {
-            // estimate the written bytes to the underlying byte buffer based on uncompressed written bytes
-            return batchHeaderSizeInBytes + (int) (uncompressedRecordsSizeInBytes * estimatedCompressionRatio * COMPRESSION_RATE_ESTIMATION_FACTOR);
+            return batchHeaderSizeInBytes + (int) (uncompressedRecordsSizeInBytes * compressionRatio * COMPRESSION_RATE_ESTIMATION_FACTOR);
+        }
+    }
+
+    /**
+     * Projected value of {@link #estimatedBytesWritten} after appending one more record with the
+     * given fields, using the record's worst-case (upper-bound) per-record size. Used by the
+     * incremental strategy to size mid-batch chunk extensions.
+     */
+    public int estimatedBytesWrittenAfter(byte[] key, byte[] value, Header[] headers) {
+        return estimatedBytesWrittenAfter(wrapNullable(key), wrapNullable(value), headers);
+    }
+
+    /**
+     * @see #estimatedBytesWrittenAfter(byte[], byte[], Header[])
+     */
+    private int estimatedBytesWrittenAfter(ByteBuffer key, ByteBuffer value, Header[] headers) {
+        final int recordSize;
+        if (magic < RecordBatch.MAGIC_VALUE_V2) {
+            recordSize = Records.LOG_OVERHEAD + LegacyRecord.recordSize(magic, key, value);
+        } else {
+            recordSize = DefaultRecord.recordSizeUpperBound(key, value, headers);
         }
+        return estimatedBytesWritten(magic, compression.type(), estimatedCompressionRatio,
+                uncompressedRecordsSizeInBytes + recordSize);
     }
 
     /**
diff --git a/clients/src/test/java/org/apache/kafka/clients/producer/ProducerConfigTest.java b/clients/src/test/java/org/apache/kafka/clients/producer/ProducerConfigTest.java
index 5fd9ab727e046..45615be25dce0 100644
--- a/clients/src/test/java/org/apache/kafka/clients/producer/ProducerConfigTest.java
+++ b/clients/src/test/java/org/apache/kafka/clients/producer/ProducerConfigTest.java
@@ -129,6 +129,41 @@ public void testInvalidMetadataRecoveryStrategy() {
         assertTrue(ce.getMessage().contains(CommonClientConfigs.METADATA_RECOVERY_STRATEGY_CONFIG));
     }
 
+    @Test
+    public void testDefaultBufferMemoryAllocationStrategy() {
+        Map<String, Object> configs = new HashMap<>();
+        configs.put(ProducerConfig.KEY_SERIALIZER_CLASS_CONFIG, keySerializerClass);
+        configs.put(ProducerConfig.VALUE_SERIALIZER_CLASS_CONFIG, valueSerializerClass);
+        configs.put(ConsumerConfig.BOOTSTRAP_SERVERS_CONFIG, "localhost:9092");
+        final ProducerConfig producerConfig = new ProducerConfig(configs);
+        assertEquals(ProducerConfig.BUFFER_MEMORY_ALLOCATION_STRATEGY_FULL,
+                producerConfig.getString(ProducerConfig.BUFFER_MEMORY_ALLOCATION_STRATEGY_CONFIG));
+    }
+
+    @Test
+    public void testValidBufferMemoryAllocationStrategy() {
+        Map<String, Object> configs = new HashMap<>();
+        configs.put(ProducerConfig.KEY_SERIALIZER_CLASS_CONFIG, keySerializerClass);
+        configs.put(ProducerConfig.VALUE_SERIALIZER_CLASS_CONFIG, valueSerializerClass);
+        configs.put(ConsumerConfig.BOOTSTRAP_SERVERS_CONFIG, "localhost:9092");
+        configs.put(ProducerConfig.BUFFER_MEMORY_ALLOCATION_STRATEGY_CONFIG,
+                ProducerConfig.BUFFER_MEMORY_ALLOCATION_STRATEGY_INCREMENTAL);
+        final ProducerConfig producerConfig = new ProducerConfig(configs);
+        assertEquals(ProducerConfig.BUFFER_MEMORY_ALLOCATION_STRATEGY_INCREMENTAL,
+                producerConfig.getString(ProducerConfig.BUFFER_MEMORY_ALLOCATION_STRATEGY_CONFIG));
+    }
+
+    @Test
+    public void testInvalidBufferMemoryAllocationStrategy() {
+        Map<String, Object> configs = new HashMap<>();
+        configs.put(ProducerConfig.KEY_SERIALIZER_CLASS_CONFIG, keySerializerClass);
+        configs.put(ProducerConfig.VALUE_SERIALIZER_CLASS_CONFIG, valueSerializerClass);
+        configs.put(ProducerConfig.BUFFER_MEMORY_ALLOCATION_STRATEGY_CONFIG, "abc");
+        configs.put(ConsumerConfig.BOOTSTRAP_SERVERS_CONFIG, "localhost:9092");
+        ConfigException ce = assertThrows(ConfigException.class, () -> new ProducerConfig(configs));
+        assertTrue(ce.getMessage().contains(ProducerConfig.BUFFER_MEMORY_ALLOCATION_STRATEGY_CONFIG));
+    }
+
     @Test
     public void testCaseInsensitiveSecurityProtocol() {
         final String saslSslLowerCase = SecurityProtocol.SASL_SSL.name.toLowerCase(Locale.ROOT);
diff --git a/clients/src/test/java/org/apache/kafka/clients/producer/internals/ChunkedRecordAccumulatorTest.java b/clients/src/test/java/org/apache/kafka/clients/producer/internals/ChunkedRecordAccumulatorTest.java
index b191aadb4645b..191224cd437d8 100644
--- a/clients/src/test/java/org/apache/kafka/clients/producer/internals/ChunkedRecordAccumulatorTest.java
+++ b/clients/src/test/java/org/apache/kafka/clients/producer/internals/ChunkedRecordAccumulatorTest.java
@@ -157,20 +157,6 @@ public void testMidBatchExtensionGrowsExistingBatch() throws Exception {
         accum.close();
     }
 
-    /** Compression is rejected at batch creation in PR1 with a clear message. */
-    @Test
-    public void testCompressionThrowsAtBatchCreation() {
-        int chunkSize = 256;
-        ChunkedRecordAccumulator accum = newAccumulator(1024, chunkSize, 16L * chunkSize, Compression.gzip().build());
-
-        UnsupportedOperationException ex = assertThrows(UnsupportedOperationException.class, () ->
-                accum.append(topic, partition1, 0L, key, new byte[32], Record.EMPTY_HEADERS, null,
-                        maxBlockTimeMs, time.milliseconds(), cluster));
-        assertTrue(ex.getMessage().contains("not implemented yet"),
-                "Expected 'not implemented yet' in message, got: " + ex.getMessage());
-        accum.close();
-    }
-
     /**
      * Race between two concurrent appenders on the same chunked tail. Both threads probe
      * {@code extensionBytesNeeded} under the deque lock against the same physical remaining
@@ -347,4 +333,81 @@ public void testBatchCloseDoesNotDeallocateChunksPrematurely() throws Exception
 
         accum.close();
     }
+
+    /**
+     * Chunks attached grow with the batch's cumulative projected output, not per-record. With a
+     * chunkSize smaller than the batch's eventual content, a sequence of small records should
+     * extend the chunks as the running total (header + uncompressed bytes for NONE) crosses
+     * chunk boundaries — not allocate the first record's worth and then never extend until
+     * physical capacity is exhausted (which the per-record formula would do).
+     */
+    @Test
+    public void testExtensionTracksCumulativeBatchSize() throws Exception {
+        int chunkSize = 64;
+        int batchSize = 512;
+        ChunkedRecordAccumulator accum = newAccumulator(batchSize, chunkSize, 64L * chunkSize, Compression.NONE);
+
+        byte[] smallValue = new byte[24];
+        for (int i = 0; i < 6; i++) {
+            accum.append(topic, partition1, 0L, key, smallValue, Record.EMPTY_HEADERS, null,
+                    maxBlockTimeMs, time.milliseconds(), cluster);
+        }
+
+        Deque<ProducerBatch> dq = batchesFor(accum, tp1);
+        assertEquals(1, dq.size());
+        ProducerBatch batch = dq.peekFirst();
+        assertNotNull(batch);
+        assertEquals(6, batch.recordCount);
+
+        // batch.close() flattens chunks and writes the header. If chunks under-allocated, this
+        // throws (insufficient capacity in the underlying buffer).
+        batch.close();
+        MemoryRecords records = batch.records();
+        int actualSize = records.sizeInBytes();
+        // Under NONE compression, estimatedBytesWritten is exact: physical bytes ≈ header + sum
+        // of per-record bytes. The chunks attached must cover that, so actualSize must be >
+        // chunkSize for a multi-record batch with non-trivial content.
+        assertTrue(actualSize > chunkSize,
+                "batch should have grown beyond a single chunk; got " + actualSize);
+
+        accum.close();
+    }
+
+    /**
+     * The cumulative sizing formula counts the batch header once. The previous per-record
+     * formula effectively included the batch header in each record's upper bound, which
+     * over-allocated as records accumulated. With the cumulative formula, chunk count for N
+     * small records of total uncompressed size {@code U} is {@code ceil((header + U) / chunkSize)},
+     * not {@code N × ceil((header + recordSize) / chunkSize)}.
+     */
+    @Test
+    public void testCumulativeAccountsForHeaderOnce() throws Exception {
+        int chunkSize = 256;
+        int batchSize = 8192;
+        long totalMemory = 64L * chunkSize;
+        ChunkedBufferPool pool = new ChunkedBufferPool(totalMemory, chunkSize, metrics, time, "producer-metrics");
+        ChunkedRecordAccumulator accum = new ChunkedRecordAccumulator(logContext, batchSize, Compression.NONE,
+                /* lingerMs */ 0, /* retryBackoffMs */ 0L, /* retryBackoffMaxMs */ 0L,
+                /* deliveryTimeoutMs */ 3200, metrics, "producer-metrics", time,
+                /* transactionManager */ null, pool);
+
+        long beforeAlloc = pool.availableMemory();
+        // First record establishes the batch. Each subsequent small record contributes its
+        // uncompressed bytes to the cumulative target; the batch header is NOT re-counted.
+        byte[] smallValue = new byte[8];
+        for (int i = 0; i < 4; i++) {
+            accum.append(topic, partition1, 0L, key, smallValue, Record.EMPTY_HEADERS, null,
+                    maxBlockTimeMs, time.milliseconds(), cluster);
+        }
+
+        // Each per-record append adds ~10-30 bytes (key + value + record overhead, V2). Cumulative
+        // total for 4 records is well below chunkSize=256, so only 1 chunk should ever be attached.
+        // The per-record formula (header counted once per record) would have allocated more.
+        long held = beforeAlloc - pool.availableMemory();
+        assertEquals(chunkSize, held,
+                "cumulative formula should hold exactly one chunk for a small-record batch; "
+                        + "header double-counting (per-record formula) would inflate this");
+
+        accum.close();
+    }
 }

From 8c82cd261c2c79d091697f35779bf7b5067cb635 Mon Sep 17 00:00:00 2001
From: Lianet Magrans <lmagrans@confluent.io>
Date: Thu, 18 Jun 2026 15:27:50 -0400
Subject: [PATCH 04/11] integration tests

---
 .../kafka/api/BaseProducerSendTest.scala      | 10 +-
 ...ncrementalAllocationProducerSendTest.scala | 94 +++++++++++++++++++
 .../scala/unit/kafka/utils/TestUtils.scala    |  4 +-
 3 files changed, 106 insertions(+), 2 deletions(-)
 create mode 100644 core/src/test/scala/integration/kafka/api/IncrementalAllocationProducerSendTest.scala

diff --git a/core/src/test/scala/integration/kafka/api/BaseProducerSendTest.scala b/core/src/test/scala/integration/kafka/api/BaseProducerSendTest.scala
index 71d0dd8273992..1d50bd8910549 100644
--- a/core/src/test/scala/integration/kafka/api/BaseProducerSendTest.scala
+++ b/core/src/test/scala/integration/kafka/api/BaseProducerSendTest.scala
@@ -101,6 +101,13 @@ abstract class BaseProducerSendTest extends KafkaServerTestHarness {
     super.tearDown()
   }
 
+  /**
+   * Additional producer properties applied to every producer created via [[createProducer]].
+   * Subclasses override this to run the whole suite under a different producer configuration
+   * (e.g. a different buffer.memory.allocation.strategy).
+   */
+  protected def producerOverrides: Properties = new Properties()
+
   protected def createProducer(lingerMs: Int = 0,
                                deliveryTimeoutMs: Int = 2 * 60 * 1000,
                                batchSize: Int = 16384,
@@ -117,7 +124,8 @@ abstract class BaseProducerSendTest extends KafkaServerTestHarness {
       deliveryTimeoutMs = deliveryTimeoutMs,
       maxBlockMs = maxBlockMs,
       batchSize = batchSize,
-      bufferSize = bufferSize)
+      bufferSize = bufferSize,
+      additionalProperties = Some(producerOverrides))
     registerProducer(producer)
   }
 
diff --git a/core/src/test/scala/integration/kafka/api/IncrementalAllocationProducerSendTest.scala b/core/src/test/scala/integration/kafka/api/IncrementalAllocationProducerSendTest.scala
new file mode 100644
index 0000000000000..74bcd892813c7
--- /dev/null
+++ b/core/src/test/scala/integration/kafka/api/IncrementalAllocationProducerSendTest.scala
@@ -0,0 +1,94 @@
+/**
+  * Licensed to the Apache Software Foundation (ASF) under one or more
+  * contributor license agreements.  See the NOTICE file distributed with
+  * this work for additional information regarding copyright ownership.
+  * The ASF licenses this file to You under the Apache License, Version 2.0
+  * (the "License"); you may not use this file except in compliance with
+  * the License.  You may obtain a copy of the License at
+  *
+  *    http://www.apache.org/licenses/LICENSE-2.0
+  *
+  * Unless required by applicable law or agreed to in writing, software
+  * distributed under the License is distributed on an "AS IS" BASIS,
+  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  * See the License for the specific language governing permissions and
+  * limitations under the License.
+  */
+
+package kafka.api
+
+import java.util.concurrent.ThreadLocalRandom
+import java.util.{Properties, List => JList}
+import kafka.utils.{TestInfoUtils, TestUtils}
+import org.apache.kafka.clients.producer.{ProducerConfig, ProducerRecord}
+import org.apache.kafka.common.TopicPartition
+import org.apache.kafka.common.network.ListenerName
+import org.apache.kafka.common.security.auth.SecurityProtocol
+import org.junit.jupiter.api.Assertions.{assertArrayEquals, assertEquals}
+import org.junit.jupiter.params.ParameterizedTest
+import org.junit.jupiter.params.provider.MethodSource
+
+/**
+ * Runs the whole [[BaseProducerSendTest]] suite with the producer configured with the incremental
+ * buffer.memory allocation strategy, plus a few scenarios specific to chunked allocation.
+ */
+class IncrementalAllocationProducerSendTest extends BaseProducerSendTest {
+
+  override protected def producerOverrides: Properties = {
+    val props = new Properties()
+    props.put(ProducerConfig.BUFFER_MEMORY_ALLOCATION_STRATEGY_CONFIG,
+      ProducerConfig.BUFFER_MEMORY_ALLOCATION_STRATEGY_INCREMENTAL)
+    props
+  }
+
+  // The incremental strategy does not support compression yet; the base test would fail at
+  // producer construction. Overriding without the test annotations removes it from this
+  // subclass's run. TODO: remove this override when compression support lands.
+  override def testSendCompressedMessageWithCreateTime(groupProtocol: String): Unit = {}
+
+  // batch.size=0 is below the internal chunk size, so the incremental strategy falls back to the full
+  // allocation path (a batch is smaller than one chunk). Verifies that fallback yields a working producer.
+  @ParameterizedTest(name = TestInfoUtils.TestWithParameterizedGroupProtocolNames)
+  @MethodSource(Array("getTestGroupProtocolParametersConsumerGroupProtocolOnly"))
+  def testBatchSizeZero(groupProtocol: String): Unit = {
+    sendAndVerify(createProducer(
+      lingerMs = Int.MaxValue,
+      deliveryTimeoutMs = Int.MaxValue,
+      batchSize = 0))
+  }
+
+  @ParameterizedTest(name = TestInfoUtils.TestWithParameterizedGroupProtocolNames)
+  @MethodSource(Array("getTestGroupProtocolParametersConsumerGroupProtocolOnly"))
+  def testSendLargeRecordsSpanningMultipleChunks(groupProtocol: String): Unit = {
+    TestUtils.createTopicWithAdmin(admin, topic, brokers, controllerServers, 1, 2)
+    val partition = 0
+    val tp = new TopicPartition(topic, partition)
+    val valueSize = 600_000 // far larger than one chunk, so each record spans many chunks
+    // TODO: also exercise the compressed codecs here once the incremental strategy supports compression.
+
+    val producer = createProducer(batchSize = 1024 * 1024, bufferSize = 8L * 1024 * 1024)
+    val value = randomBytes(valueSize)
+    val metadata = producer.send(new ProducerRecord(topic, partition, "key".getBytes, value)).get
+    assertEquals(valueSize, metadata.serializedValueSize)
+
+    // Verify the exact bytes round-trip.
+    val consumer = TestUtils.createConsumer(
+      bootstrapServers(listenerName = ListenerName.forSecurityProtocol(SecurityProtocol.PLAINTEXT)),
+      groupProtocolFromTestParameters())
+    try {
+      consumer.assign(JList.of(tp))
+      consumer.seekToBeginning(JList.of(tp))
+      val consumed = TestUtils.consumeRecords(consumer, 1)
+      assertEquals(metadata.offset, consumed.head.offset)
+      assertArrayEquals(value, consumed.head.value)
+    } finally {
+      consumer.close()
+    }
+  }
+
+  private def randomBytes(size: Int): Array[Byte] = {
+    val bytes = new Array[Byte](size)
+    ThreadLocalRandom.current().nextBytes(bytes)
+    bytes
+  }
+}
diff --git a/core/src/test/scala/unit/kafka/utils/TestUtils.scala b/core/src/test/scala/unit/kafka/utils/TestUtils.scala
index bc41c2ed2aeeb..088fe6384ba6d 100755
--- a/core/src/test/scala/unit/kafka/utils/TestUtils.scala
+++ b/core/src/test/scala/unit/kafka/utils/TestUtils.scala
@@ -491,7 +491,8 @@ object TestUtils extends Logging {
                            saslProperties: Option[Properties] = None,
                            keySerializer: Serializer[K] = new ByteArraySerializer,
                            valueSerializer: Serializer[V] = new ByteArraySerializer,
-                           enableIdempotence: Boolean = false): KafkaProducer[K, V] = {
+                           enableIdempotence: Boolean = false,
+                           additionalProperties: Option[Properties] = None): KafkaProducer[K, V] = {
     val producerProps = new Properties
     producerProps.put(ProducerConfig.BOOTSTRAP_SERVERS_CONFIG, brokerList)
     producerProps.put(ProducerConfig.ACKS_CONFIG, acks.toString)
@@ -504,6 +505,7 @@ object TestUtils extends Logging {
     producerProps.put(ProducerConfig.BATCH_SIZE_CONFIG, batchSize.toString)
     producerProps.put(ProducerConfig.COMPRESSION_TYPE_CONFIG, compressionType)
     producerProps.put(ProducerConfig.ENABLE_IDEMPOTENCE_CONFIG, enableIdempotence.toString)
+    additionalProperties.foreach(producerProps ++= _)
     producerProps ++= JaasTestUtils.producerSecurityConfigs(securityProtocol, OptionConverters.toJava(trustStoreFile), OptionConverters.toJava(saslProperties))
     new KafkaProducer[K, V](producerProps, keySerializer, valueSerializer)
   }

From b71d5b716d3427f2cdaea14839ffa16ff83a6ae6 Mon Sep 17 00:00:00 2001
From: Lianet Magrans <lmagrans@confluent.io>
Date: Thu, 18 Jun 2026 15:54:53 -0400
Subject: [PATCH 05/11] refactor validation & upd docs

---
 .../kafka/clients/producer/KafkaProducer.java |  2 +-
 .../producer/internals/ChunkedBufferPool.java | 26 ++++++++++---------
 .../internals/ChunkedProducerBatch.java       |  5 ++--
 .../internals/ChunkedRecordAccumulator.java   |  4 +--
 .../ChunkedRecordAccumulatorTest.java         | 22 ++++++++--------
 5 files changed, 31 insertions(+), 28 deletions(-)

diff --git a/clients/src/main/java/org/apache/kafka/clients/producer/KafkaProducer.java b/clients/src/main/java/org/apache/kafka/clients/producer/KafkaProducer.java
index 0990a68439171..51835478ea5df 100644
--- a/clients/src/main/java/org/apache/kafka/clients/producer/KafkaProducer.java
+++ b/clients/src/main/java/org/apache/kafka/clients/producer/KafkaProducer.java
@@ -471,7 +471,7 @@ public KafkaProducer(Properties properties, Serializer<K> keySerializer, Seriali
             // Use the chunked path only when a batch is at least one full chunk
             // (batch.size >= CHUNK_SIZE). Below that, a batch can't fill even one chunk, so chunking
             // would reserve a whole chunk for a batch capped under it (over-reservation); the full
-            // strategy (reserving up to batch.size per batch) is both leaner and identical to trunk.
+            // strategy (reserving max(batch.size, record size) per batch) is both leaner and identical to trunk.
             if (incremental && batchSize >= ChunkedRecordAccumulator.CHUNK_SIZE) {
                 this.accumulator = new ChunkedRecordAccumulator(logContext,
                         batchSize,
diff --git a/clients/src/main/java/org/apache/kafka/clients/producer/internals/ChunkedBufferPool.java b/clients/src/main/java/org/apache/kafka/clients/producer/internals/ChunkedBufferPool.java
index 9fa367d80e889..1c40451c28dbe 100644
--- a/clients/src/main/java/org/apache/kafka/clients/producer/internals/ChunkedBufferPool.java
+++ b/clients/src/main/java/org/apache/kafka/clients/producer/internals/ChunkedBufferPool.java
@@ -61,22 +61,11 @@ public ChunkedBufferPool(long memory, int chunkSize, Metrics metrics, Time time,
     public List<ByteBuffer> allocateChunks(int totalSize, long maxTimeToBlockMs) throws InterruptedException {
         if (totalSize <= 0)
             throw new IllegalArgumentException("totalSize must be positive: " + totalSize);
-        if (totalSize > totalMemory())
-            throw new IllegalArgumentException("Attempt to allocate " + totalSize
-                + " bytes across chunks, but there is a hard limit of "
-                + totalMemory() + " on memory allocations.");
+        throwIfChunksNeededExceedsPool(totalSize);
 
         int chunkSize = poolableSize();
         int numChunks = (int) (((long) totalSize + chunkSize - 1L) / chunkSize);
         long memoryRequired = (long) numChunks * chunkSize;
-        // Rounding totalSize up to whole chunks can require more pool memory than buffer.memory
-        // holds even when totalSize alone fits (the single-buffer "full" path reserves exactly the
-        // record size, so it never hits this). Reject it here to fail fast, instead of blocking for
-        // max.block.ms and then throwing BufferExhausted for a request that can never be satisfied.
-        if (memoryRequired > totalMemory())
-            throw new IllegalArgumentException("Attempt to allocate " + numChunks + " chunks of "
-                + chunkSize + " bytes (" + memoryRequired + " bytes total), but there is a hard limit of "
-                + totalMemory() + " on memory allocations.");
 
         // Chunks pulled from the free list; the remaining bytes are reserved against
         // nonPooledAvailableMemory and materialized as raw allocations after the lock is released.
@@ -196,4 +185,17 @@ public List<ByteBuffer> allocateChunks(int totalSize, long maxTimeToBlockMs) thr
             }
         }
     }
+
+    /**
+     * Throw if rounding {@code totalSize} up to whole chunks would exceed the pool.
+     */
+    private void throwIfChunksNeededExceedsPool(int totalSize) {
+        int chunkSize = poolableSize();
+        int numChunks = (int) (((long) totalSize + chunkSize - 1L) / chunkSize);
+        long memoryRequired = (long) numChunks * chunkSize;
+        if (memoryRequired > totalMemory())
+            throw new IllegalArgumentException("Attempt to allocate " + totalSize + " bytes ("
+                + numChunks + " chunks of " + chunkSize + " = " + memoryRequired + " bytes), but the "
+                + "hard limit on memory allocations is " + totalMemory() + ".");
+    }
 }
diff --git a/clients/src/main/java/org/apache/kafka/clients/producer/internals/ChunkedProducerBatch.java b/clients/src/main/java/org/apache/kafka/clients/producer/internals/ChunkedProducerBatch.java
index e80ed7861826d..78a187cf431d7 100644
--- a/clients/src/main/java/org/apache/kafka/clients/producer/internals/ChunkedProducerBatch.java
+++ b/clients/src/main/java/org/apache/kafka/clients/producer/internals/ChunkedProducerBatch.java
@@ -45,8 +45,9 @@ public ChunkedProducerBatch(TopicPartition tp, MemoryRecordsBuilder recordsBuild
 
     /**
      * Bytes of physical buffer this batch needs before {@code tryAppend} could accept the given
-     * record. Returns 0 when the record fits (either logically full, or the stream's combined
-     * chunk capacity has room). Positive when {@code hasRoomFor} allows the record but the
+     * record. Returns 0 when no extension is needed: the batch is empty (first record), it is
+     * logically full, or the stream's combined chunk capacity already has room. Positive when
+     * {@code hasRoomFor} allows the record but the
      * chunks lack physical capacity — the accumulator allocates exactly the missing bytes
      * (rounded up to whole chunks) and attaches them via {@link #addBuffers} before retrying.
      */
diff --git a/clients/src/main/java/org/apache/kafka/clients/producer/internals/ChunkedRecordAccumulator.java b/clients/src/main/java/org/apache/kafka/clients/producer/internals/ChunkedRecordAccumulator.java
index a587754d80c44..a16f85dbf9ef7 100644
--- a/clients/src/main/java/org/apache/kafka/clients/producer/internals/ChunkedRecordAccumulator.java
+++ b/clients/src/main/java/org/apache/kafka/clients/producer/internals/ChunkedRecordAccumulator.java
@@ -42,8 +42,8 @@
 
 /**
  * A {@link RecordAccumulator} variant that uses chunked buffer allocation: each new batch
- * pre-allocates {@code ceil(recordSize / chunkSize)} chunks from a {@link ChunkedBufferPool},
- * and grows mid-batch by attaching additional chunks via
+ * pre-allocates the chunks needed for the first record's estimated size (batch header plus the
+ * record), and grows mid-batch by attaching additional chunks via
  * {@link ChunkedProducerBatch#addBuffers(List)} when later records would overflow.
  * <p>
  * This is the "incremental" buffer.memory allocation strategy: memory consumption scales with
diff --git a/clients/src/test/java/org/apache/kafka/clients/producer/internals/ChunkedRecordAccumulatorTest.java b/clients/src/test/java/org/apache/kafka/clients/producer/internals/ChunkedRecordAccumulatorTest.java
index 191224cd437d8..35bc979ae86a4 100644
--- a/clients/src/test/java/org/apache/kafka/clients/producer/internals/ChunkedRecordAccumulatorTest.java
+++ b/clients/src/test/java/org/apache/kafka/clients/producer/internals/ChunkedRecordAccumulatorTest.java
@@ -246,10 +246,10 @@ public List<ByteBuffer> allocateChunks(int totalSize, long maxTimeToBlockMs) thr
     /**
      * Regression guard: an inflight chunked batch returns all K chunks to the pool on the
      * expiration / broker-disconnect path, not just one {@code initialCapacity} chunkSize.
-     * The parent {@code RecordAccumulator.deallocate} for inflight batches credits only
-     * {@code initialCapacity} back to the pool and throws; {@link ChunkedRecordAccumulator}
-     * overrides {@code deallocate} to return all chunks before the throw. Removing that
-     * override would re-introduce a (K−1)×chunkSize leak and fail this test.
+     * For an inflight batch the parent {@code RecordAccumulator.deallocate} calls
+     * {@code batch.deallocateInflightBuffer(pool)} and then throws; {@link ChunkedProducerBatch}
+     * overrides that hook to return all chunks (instead of donating one fresh buffer). Removing
+     * that override would re-introduce a (K−1)×chunkSize leak and fail this test.
      */
     @Test
     public void testInflightExpirationReturnsAllChunksToPool() throws Exception {
@@ -297,11 +297,11 @@ private Deque<ProducerBatch> batchesFor(RecordAccumulator accum, TopicPartition
     /**
      * Sender's drain (and any other caller of {@code batch.close()}) cascades through
      * {@code MemoryRecordsBuilder.closeForRecordAppends()} →
-     * {@code appendStream.close()} → {@code bufferStream.close()}. The chunked stream's
-     * buffer must remain readable after that cascade so the builder can flatten chunks into
-     * the heap buffer it sends over the network. If the chunked stream's {@code close()}
-     * prematurely returns chunks to the pool, the flattened buffer is empty and the header
-     * write fails (or produces an empty record set).
+     * {@code appendStream.close()} → {@code bufferStream.close()}. The chunk data must remain
+     * readable through that cascade so the builder can flatten the chunks into the heap buffer it
+     * sends over the network — chunks are returned to the pool only at batch completion
+     * (deallocate), never at close. If they were released at close, the flattened buffer would be
+     * empty and the header write would fail (or produce an empty record set).
      */
     @Test
     public void testBatchCloseDoesNotDeallocateChunksPrematurely() throws Exception {
@@ -316,7 +316,7 @@ public void testBatchCloseDoesNotDeallocateChunksPrematurely() throws Exception
         ProducerBatch batch = dq.peekFirst();
         assertNotNull(batch);
 
-        // Matches the call sites in RecordAccumulator.drain (line 941) and Sender.
+        // Matches the call sites in RecordAccumulator.drain and Sender.
         batch.close();
         MemoryRecords records = batch.records();
         assertTrue(records.sizeInBytes() > 0,
@@ -364,7 +364,7 @@ public void testExtensionTracksCumulativeBatchSize() throws Exception {
         batch.close();
         MemoryRecords records = batch.records();
         int actualSize = records.sizeInBytes();
-        // Under NONE compression, estimatedBytesWritten is exact: physical bytes ≈ header + sum
+        // Under NONE compression, estimatedBytesWritten is exact: physical bytes = header + sum
         // of per-record bytes. The chunks attached must cover that, so actualSize must be >
         // chunkSize for a multi-record batch with non-trivial content.
         assertTrue(actualSize > chunkSize,

From bdc14e90f471b4c11fd881c98de4f165c009afc5 Mon Sep 17 00:00:00 2001
From: Lianet Magrans <lmagrans@confluent.io>
Date: Mon, 22 Jun 2026 09:54:12 -0400
Subject: [PATCH 06/11] update config

---
 .../apache/kafka/clients/producer/KafkaProducer.java |  3 +--
 .../kafka/clients/producer/ProducerConfig.java       | 12 ++++++------
 2 files changed, 7 insertions(+), 8 deletions(-)

diff --git a/clients/src/main/java/org/apache/kafka/clients/producer/KafkaProducer.java b/clients/src/main/java/org/apache/kafka/clients/producer/KafkaProducer.java
index 51835478ea5df..3b13014768281 100644
--- a/clients/src/main/java/org/apache/kafka/clients/producer/KafkaProducer.java
+++ b/clients/src/main/java/org/apache/kafka/clients/producer/KafkaProducer.java
@@ -470,8 +470,7 @@ public KafkaProducer(Properties properties, Serializer<K> keySerializer, Seriali
             }
             // Use the chunked path only when a batch is at least one full chunk
             // (batch.size >= CHUNK_SIZE). Below that, a batch can't fill even one chunk, so chunking
-            // would reserve a whole chunk for a batch capped under it (over-reservation); the full
-            // strategy (reserving max(batch.size, record size) per batch) is both leaner and identical to trunk.
+            // would over-reserve.
             if (incremental && batchSize >= ChunkedRecordAccumulator.CHUNK_SIZE) {
                 this.accumulator = new ChunkedRecordAccumulator(logContext,
                         batchSize,
diff --git a/clients/src/main/java/org/apache/kafka/clients/producer/ProducerConfig.java b/clients/src/main/java/org/apache/kafka/clients/producer/ProducerConfig.java
index 286f3e37dc823..696e9c103118f 100644
--- a/clients/src/main/java/org/apache/kafka/clients/producer/ProducerConfig.java
+++ b/clients/src/main/java/org/apache/kafka/clients/producer/ProducerConfig.java
@@ -234,12 +234,12 @@ public class ProducerConfig extends AbstractConfig {
     public static final String BUFFER_MEMORY_ALLOCATION_STRATEGY_INCREMENTAL = "incremental";
     private static final String BUFFER_MEMORY_ALLOCATION_STRATEGY_DOC = "Controls how the producer allocates memory from <code>" + BUFFER_MEMORY_CONFIG + "</code> for record batches. The following values are supported: "
                                                     + "<ul>"
-                                                    + "<li><code>" + BUFFER_MEMORY_ALLOCATION_STRATEGY_FULL + "</code>: reserve <code>" + BATCH_SIZE_CONFIG + "</code> bytes up front for each in-progress batch, "
-                                                    + "regardless of the actual size of the records appended to it.</li>"
-                                                    + "<li><code>" + BUFFER_MEMORY_ALLOCATION_STRATEGY_INCREMENTAL + "</code>: allocate memory on demand as records are appended, growing each batch "
-                                                    + "up to <code>" + BATCH_SIZE_CONFIG + "</code> bytes. This keeps memory usage proportional to the data actually buffered rather than to the number of "
-                                                    + "active partitions, which allows larger <code>" + BATCH_SIZE_CONFIG + "</code> values (e.g. for high-latency clusters) without reserving "
-                                                    + "<code>" + BATCH_SIZE_CONFIG + "</code> bytes for every active partition.</li>"
+                                                    + "<li><code>" + BUFFER_MEMORY_ALLOCATION_STRATEGY_FULL + "</code>: reserves a full <code>" + BATCH_SIZE_CONFIG + "</code> up front when a batch is created, "
+                                                    + "regardless of how much data it ends up holding. Pool memory therefore scales with the number of active partitions.</li>"
+                                                    + "<li><code>" + BUFFER_MEMORY_ALLOCATION_STRATEGY_INCREMENTAL + "</code>: allocates memory on demand as records are appended, growing a batch "
+                                                    + "up to <code>" + BATCH_SIZE_CONFIG + "</code>. Pool memory therefore scales with the data actually buffered rather than the number of active "
+                                                    + "partitions, allowing larger <code>" + BATCH_SIZE_CONFIG + "</code> values (e.g. for high-latency clusters) without reserving "
+                                                    + "<code>" + BATCH_SIZE_CONFIG + "</code> for every active partition.</li>"
                                                     + "</ul>";
 
     /** <code>retry.backoff.ms</code> */

From 4e914999d0af74b25e3714d8cc118194b550ea49 Mon Sep 17 00:00:00 2001
From: Lianet Magrans <lmagrans@confluent.io>
Date: Mon, 22 Jun 2026 14:55:55 -0400
Subject: [PATCH 07/11] fix for partition change & misc

---
 .../producer/internals/BufferPool.java        |  1 -
 .../producer/internals/ChunkedBufferPool.java | 14 ++--
 .../ChunkedByteBufferOutputStream.java        | 21 ++++-
 .../internals/ChunkedRecordAccumulator.java   | 12 ++-
 .../ChunkedByteBufferOutputStreamTest.java    | 22 +++++
 .../ChunkedRecordAccumulatorTest.java         | 82 +++++++++++++++++++
 6 files changed, 139 insertions(+), 13 deletions(-)

diff --git a/clients/src/main/java/org/apache/kafka/clients/producer/internals/BufferPool.java b/clients/src/main/java/org/apache/kafka/clients/producer/internals/BufferPool.java
index f376c0fe2f4a3..c515cf0a8d79f 100644
--- a/clients/src/main/java/org/apache/kafka/clients/producer/internals/BufferPool.java
+++ b/clients/src/main/java/org/apache/kafka/clients/producer/internals/BufferPool.java
@@ -59,7 +59,6 @@ public class BufferPool {
     private final Metrics metrics;
     protected final Time time;
     private final Sensor waitTime;
-    /** True once {@link #close()} has been invoked. */
     protected boolean closed;
 
     /**
diff --git a/clients/src/main/java/org/apache/kafka/clients/producer/internals/ChunkedBufferPool.java b/clients/src/main/java/org/apache/kafka/clients/producer/internals/ChunkedBufferPool.java
index 1c40451c28dbe..c81fef81cffbe 100644
--- a/clients/src/main/java/org/apache/kafka/clients/producer/internals/ChunkedBufferPool.java
+++ b/clients/src/main/java/org/apache/kafka/clients/producer/internals/ChunkedBufferPool.java
@@ -30,10 +30,7 @@
 /**
  * A {@link BufferPool} dedicated to chunk-sized buffer reuse (chunk size = {@link #poolableSize()}).
  * <p>
- * Adds {@link #allocateChunks(int, long)} to acquire multiple chunks atomically: one lock
- * acquisition and a single {@link Condition} on {@link #waiters} (FIFO-fair against single-chunk
- * acquirers), with any failure (timeout, close, OOM) refunding the whole reservation before the
- * exception propagates. No partial holds are visible during the wait.
+ * Adds {@link #allocateChunks(int, long)} to acquire multiple chunks atomically.
  */
 public class ChunkedBufferPool extends BufferPool {
 
@@ -44,9 +41,10 @@ public ChunkedBufferPool(long memory, int chunkSize, Metrics metrics, Time time,
     /**
      * Allocate {@code ceil(totalSize / chunkSize)} chunk-sized buffers atomically, mirroring
      * {@link BufferPool#allocate}: satisfied immediately if memory is available, else blocks up to
-     * {@code maxTimeToBlockMs} for the whole request (FIFO on {@link #waiters}). The reservation is
-     * tracked as bytes against {@link #nonPooledAvailableMemory} plus chunks polled from
-     * {@link #free}; on any failure path every reserved byte is returned and the next waiter signaled.
+     * {@code maxTimeToBlockMs} for the whole request (FIFO on {@link #waiters}).
+     * The reservation is tracked as bytes against {@link #nonPooledAvailableMemory} plus chunks polled
+     * from {@link #free}. Any failure refunds the whole reservation and signals
+     * the next waiter before the exception propagates, so no partial holds are visible during the wait.
      *
      * @param totalSize        minimum total bytes of capacity required across the returned chunks
      * @param maxTimeToBlockMs maximum time in milliseconds to block waiting for memory
@@ -67,7 +65,7 @@ public List<ByteBuffer> allocateChunks(int totalSize, long maxTimeToBlockMs) thr
         int numChunks = (int) (((long) totalSize + chunkSize - 1L) / chunkSize);
         long memoryRequired = (long) numChunks * chunkSize;
 
-        // Chunks pulled from the free list; the remaining bytes are reserved against
+        // Chunks taken from the free list. The remaining bytes are reserved against
         // nonPooledAvailableMemory and materialized as raw allocations after the lock is released.
         List<ByteBuffer> pooled = new ArrayList<>(numChunks);
 
diff --git a/clients/src/main/java/org/apache/kafka/clients/producer/internals/ChunkedByteBufferOutputStream.java b/clients/src/main/java/org/apache/kafka/clients/producer/internals/ChunkedByteBufferOutputStream.java
index 3b9a9ed42ff5e..89110438381f9 100644
--- a/clients/src/main/java/org/apache/kafka/clients/producer/internals/ChunkedByteBufferOutputStream.java
+++ b/clients/src/main/java/org/apache/kafka/clients/producer/internals/ChunkedByteBufferOutputStream.java
@@ -59,9 +59,8 @@ public class ChunkedByteBufferOutputStream extends ByteBufferOutputStream {
      * @param chunkSize     the size of each chunk in bytes
      * @param pool          the buffer pool used for deallocation
      */
-    @SuppressWarnings("this-escape")
     public ChunkedByteBufferOutputStream(List<ByteBuffer> initialChunks, int chunkSize, BufferPool pool) {
-        super(initialChunks.get(0));
+        super(validatedFirstChunk(initialChunks, chunkSize));
         this.chunkSize = chunkSize;
         this.pool = pool;
         this.chunks = new ArrayList<>(initialChunks);
@@ -70,6 +69,21 @@ public ChunkedByteBufferOutputStream(List<ByteBuffer> initialChunks, int chunkSi
         this.dirty = true;
     }
 
+    /**
+     * Validates the chunk contract: {@code initialChunks} non-empty, each chunk's capacity equal to
+     * {@code chunkSize}. Returns the first chunk.
+     */
+    private static ByteBuffer validatedFirstChunk(List<ByteBuffer> initialChunks, int chunkSize) {
+        if (initialChunks == null || initialChunks.isEmpty())
+            throw new IllegalArgumentException("initialChunks must be non-empty");
+        for (ByteBuffer chunk : initialChunks) {
+            if (chunk.capacity() != chunkSize)
+                throw new IllegalArgumentException("each chunk must have capacity " + chunkSize
+                    + ", but found a chunk of capacity " + chunk.capacity());
+        }
+        return initialChunks.get(0);
+    }
+
     @Override
     public void write(int b) {
         ensureChunkCapacity(1);
@@ -223,7 +237,8 @@ public int limit() {
 
     @Override
     public int initialCapacity() {
-        return chunks.isEmpty() ? chunkSize : chunks.get(0).capacity();
+        // Every chunk is chunkSize (enforced by the constructor), so this is constant.
+        return chunkSize;
     }
 
     @Override
diff --git a/clients/src/main/java/org/apache/kafka/clients/producer/internals/ChunkedRecordAccumulator.java b/clients/src/main/java/org/apache/kafka/clients/producer/internals/ChunkedRecordAccumulator.java
index a16f85dbf9ef7..2e74f3b096261 100644
--- a/clients/src/main/java/org/apache/kafka/clients/producer/internals/ChunkedRecordAccumulator.java
+++ b/clients/src/main/java/org/apache/kafka/clients/producer/internals/ChunkedRecordAccumulator.java
@@ -199,8 +199,18 @@ public RecordAppendResult append(String topic,
                 }
 
                 synchronized (dq) {
-                    if (partitionChanged(topic, topicInfo, partitionInfo, dq, nowMs, cluster))
+                    if (partitionChanged(topic, topicInfo, partitionInfo, dq, nowMs, cluster)) {
+                        // The partition switched while we allocated extension chunks off-lock. They
+                        // were sized against the previous partition's tail batch, so they must not be
+                        // attached to a different partition's tail — refund them and let the next
+                        // iteration re-probe the new partition from scratch.
+                        if (extensionChunks != null) {
+                            for (ByteBuffer chunk : extensionChunks)
+                                chunkedFree.deallocate(chunk);
+                            extensionChunks = null;
+                        }
                         continue;
+                    }
 
                     if (extensionChunks != null) {
                         ProducerBatch last = dq.peekLast();
diff --git a/clients/src/test/java/org/apache/kafka/clients/producer/internals/ChunkedByteBufferOutputStreamTest.java b/clients/src/test/java/org/apache/kafka/clients/producer/internals/ChunkedByteBufferOutputStreamTest.java
index 569c3bbcfcbdc..c96824b09c137 100644
--- a/clients/src/test/java/org/apache/kafka/clients/producer/internals/ChunkedByteBufferOutputStreamTest.java
+++ b/clients/src/test/java/org/apache/kafka/clients/producer/internals/ChunkedByteBufferOutputStreamTest.java
@@ -49,6 +49,28 @@ private List<ByteBuffer> chunks(ChunkedBufferPool pool, int chunkSize, int count
         return pool.allocateChunks(chunkSize * count, 100);
     }
 
+    /**
+     * The constructor fails fast with {@link IllegalArgumentException} when its chunk contract is
+     * violated — a null or empty list, or a chunk whose capacity differs from {@code chunkSize} —
+     * rather than the bare NPE / IndexOutOfBoundsException that {@code initialChunks.get(0)} would
+     * otherwise throw from the {@code super(...)} call.
+     */
+    @Test
+    public void testConstructorRejectsInvalidChunks() throws Exception {
+        int chunkSize = 16;
+        ChunkedBufferPool p = pool(64, chunkSize);
+
+        assertThrows(IllegalArgumentException.class,
+            () -> new ChunkedByteBufferOutputStream(null, chunkSize, p));
+        assertThrows(IllegalArgumentException.class,
+            () -> new ChunkedByteBufferOutputStream(Collections.emptyList(), chunkSize, p));
+
+        // A chunk whose capacity doesn't match chunkSize violates the contract.
+        List<ByteBuffer> wrongSize = Collections.singletonList(ByteBuffer.allocate(chunkSize + 1));
+        assertThrows(IllegalArgumentException.class,
+            () -> new ChunkedByteBufferOutputStream(wrongSize, chunkSize, p));
+    }
+
     /** Writes that fit in the initial chunk: stream produces the bytes back via buffer(). */
     @Test
     public void testSingleChunkWriteRoundtrip() throws Exception {
diff --git a/clients/src/test/java/org/apache/kafka/clients/producer/internals/ChunkedRecordAccumulatorTest.java b/clients/src/test/java/org/apache/kafka/clients/producer/internals/ChunkedRecordAccumulatorTest.java
index 35bc979ae86a4..97bdaa09c29c0 100644
--- a/clients/src/test/java/org/apache/kafka/clients/producer/internals/ChunkedRecordAccumulatorTest.java
+++ b/clients/src/test/java/org/apache/kafka/clients/producer/internals/ChunkedRecordAccumulatorTest.java
@@ -44,6 +44,7 @@
 import java.util.concurrent.CountDownLatch;
 import java.util.concurrent.TimeUnit;
 import java.util.concurrent.atomic.AtomicBoolean;
+import java.util.concurrent.atomic.AtomicInteger;
 import java.util.concurrent.atomic.AtomicReference;
 import java.util.stream.Collectors;
 import java.util.stream.Stream;
@@ -243,6 +244,87 @@ public List<ByteBuffer> allocateChunks(int totalSize, long maxTimeToBlockMs) thr
         }
     }
 
+    /**
+     * Regression: when the sticky partition switches while extension chunks are held off-lock,
+     * those chunks — sized against the <em>previous</em> partition's tail batch — must be refunded
+     * to the pool on the retry, not carried into the next iteration and attached to a different
+     * partition's tail (which would over-extend the wrong batch).
+     * <p>
+     * Deterministic reproduction without threads: the extension path is the only one that calls
+     * {@code allocateChunks} non-blocking ({@code maxTimeToBlockMs == 0}), so the pool flags when an
+     * extension allocation has happened; a {@code partitionChanged} override then fires exactly one
+     * spurious switch on the very next check — which is the second-sync-block check, with the
+     * extension chunks held. The fix refunds those chunks before the retry; without it they are
+     * carried and attached instead, so no refund is observed during the append.
+     */
+    @Test
+    public void testPartitionSwitchRefundsHeldExtensionChunks() throws Exception {
+        int chunkSize = 128;
+        long totalMemory = 32L * chunkSize;
+
+        AtomicBoolean extensionAllocated = new AtomicBoolean(false);
+        AtomicInteger chunkDeallocations = new AtomicInteger(0);
+
+        ChunkedBufferPool pool = new ChunkedBufferPool(totalMemory, chunkSize, metrics, time, "producer-metrics") {
+            @Override
+            public List<ByteBuffer> allocateChunks(int totalSize, long maxTimeToBlockMs) throws InterruptedException {
+                List<ByteBuffer> chunks = super.allocateChunks(totalSize, maxTimeToBlockMs);
+                // The mid-batch extension path is the only non-blocking caller.
+                if (maxTimeToBlockMs == 0L)
+                    extensionAllocated.set(true);
+                return chunks;
+            }
+
+            @Override
+            public void deallocate(ByteBuffer buffer) {
+                chunkDeallocations.incrementAndGet();
+                super.deallocate(buffer);
+            }
+        };
+
+        AtomicBoolean switchFired = new AtomicBoolean(false);
+        ChunkedRecordAccumulator accum = new ChunkedRecordAccumulator(logContext, 8192, Compression.NONE,
+                /* lingerMs */ 0, /* retryBackoffMs */ 0L, /* retryBackoffMaxMs */ 0L,
+                /* deliveryTimeoutMs */ 3200, metrics, "producer-metrics", time,
+                /* transactionManager */ null, pool) {
+            @Override
+            protected boolean partitionChanged(String topic, TopicInfo topicInfo,
+                                               BuiltInPartitioner.StickyPartitionInfo partitionInfo,
+                                               Deque<ProducerBatch> deque, long nowMs, Cluster cluster) {
+                // Inject one spurious switch, but only once an extension allocation has happened —
+                // i.e. on the second-sync-block check, while extension chunks are held.
+                if (extensionAllocated.get() && switchFired.compareAndSet(false, true))
+                    return true;
+                return super.partitionChanged(topic, topicInfo, partitionInfo, deque, nowMs, cluster);
+            }
+        };
+        try {
+            // Warmup: tiny first record establishes a chunked tail (first-record path, blocking
+            // allocate — does not flag extensionAllocated).
+            accum.append(topic, partition1, 0L, key, new byte[1], Record.EMPTY_HEADERS, null,
+                    maxBlockTimeMs, time.milliseconds(), cluster);
+            int deallocBeforeExtend = chunkDeallocations.get();
+
+            // Second record overflows the chunk → extension allocated off-lock → injected switch
+            // fires in the second sync block with those chunks held.
+            accum.append(topic, partition1, 0L, key, new byte[200], Record.EMPTY_HEADERS, null,
+                    maxBlockTimeMs, time.milliseconds(), cluster);
+
+            assertTrue(switchFired.get(), "the injected partition switch should have fired");
+            assertTrue(chunkDeallocations.get() > deallocBeforeExtend,
+                    "extension chunks held at the partition switch must be refunded to the pool; "
+                            + "without the fix they are carried and attached instead");
+
+            // The record still appends correctly after the switch-retry.
+            Deque<ProducerBatch> dq = batchesFor(accum, tp1);
+            assertEquals(1, dq.size());
+            assertEquals(2, dq.peekFirst().recordCount,
+                    "second record should still land in the batch after the switch-retry");
+        } finally {
+            accum.close();
+        }
+    }
+
     /**
      * Regression guard: an inflight chunked batch returns all K chunks to the pool on the
      * expiration / broker-disconnect path, not just one {@code initialCapacity} chunkSize.

From 9ccd47dc673aeaaa44d6c2b9ae6bef28ba811d77 Mon Sep 17 00:00:00 2001
From: Lianet Magrans <lmagrans@confluent.io>
Date: Tue, 23 Jun 2026 12:59:43 -0400
Subject: [PATCH 08/11] test updates & docs

---
 .../producer/internals/ChunkedBufferPool.java |   6 +-
 .../ChunkedByteBufferOutputStream.java        |  51 +++----
 .../internals/ChunkedProducerBatch.java       |  14 +-
 .../internals/ChunkedRecordAccumulator.java   |  70 ++++-----
 .../producer/internals/RecordAccumulator.java |  12 +-
 .../record/internal/AbstractRecords.java      |   7 +-
 .../record/internal/MemoryRecordsBuilder.java |  16 +--
 .../internals/ChunkedBufferPoolTest.java      |  51 ++-----
 .../ChunkedByteBufferOutputStreamTest.java    | 135 ++++++++----------
 .../ChunkedRecordAccumulatorTest.java         |  86 +++--------
 10 files changed, 167 insertions(+), 281 deletions(-)

diff --git a/clients/src/main/java/org/apache/kafka/clients/producer/internals/ChunkedBufferPool.java b/clients/src/main/java/org/apache/kafka/clients/producer/internals/ChunkedBufferPool.java
index c81fef81cffbe..f2c8ba35f1568 100644
--- a/clients/src/main/java/org/apache/kafka/clients/producer/internals/ChunkedBufferPool.java
+++ b/clients/src/main/java/org/apache/kafka/clients/producer/internals/ChunkedBufferPool.java
@@ -125,8 +125,6 @@ public List<ByteBuffer> allocateChunks(int totalSize, long maxTimeToBlockMs) thr
                         remainingTimeToBlockNs -= timeNs;
 
                         // Take pool chunks first, then reserve non-pool bytes for the remainder.
-                        // Pool chunks don't bump `accumulated` because their refund path is
-                        // `free.addFirst` in the outer catch, not nonPool.
                         while (pooled.size() < numChunks
                                 && (long) (pooled.size() + 1) * chunkSize + accumulated <= memoryRequired
                                 && !free.isEmpty()) {
@@ -140,7 +138,7 @@ public List<ByteBuffer> allocateChunks(int totalSize, long maxTimeToBlockMs) thr
                             accumulated += got;
                         }
                     }
-                    // Loop terminated normally — clear the rollback marker (mirrors BufferPool.allocate).
+                    // Clear the rollback tracker.
                     accumulated = 0;
                 } finally {
                     // On failure (timeout / close / interrupt), refund the non-pool bytes taken.
@@ -163,7 +161,7 @@ public List<ByteBuffer> allocateChunks(int totalSize, long maxTimeToBlockMs) thr
             }
         }
 
-        // Allocate raw chunks for the reserved non-pooled portion outside the lock. On OOM,
+        // Allocate raw chunks for the reserved non-pooled portion outside the lock. On error,
         // refund all reserved bytes (memoryRequired) and let the next waiter try.
         int chunksStillNeeded = numChunks - pooled.size();
         List<ByteBuffer> result = new ArrayList<>(numChunks);
diff --git a/clients/src/main/java/org/apache/kafka/clients/producer/internals/ChunkedByteBufferOutputStream.java b/clients/src/main/java/org/apache/kafka/clients/producer/internals/ChunkedByteBufferOutputStream.java
index 89110438381f9..97b89cd1b4416 100644
--- a/clients/src/main/java/org/apache/kafka/clients/producer/internals/ChunkedByteBufferOutputStream.java
+++ b/clients/src/main/java/org/apache/kafka/clients/producer/internals/ChunkedByteBufferOutputStream.java
@@ -23,21 +23,20 @@
 import java.util.List;
 
 /**
- * A {@link ByteBufferOutputStream} backed by a linked list of fixed-size chunks instead of a
- * single re-allocated buffer.
+ * A {@link ByteBufferOutputStream} backed by a linked list of fixed-size chunks instead of a single
+ * re-allocated buffer. Chunks are supplied by the caller (initial chunks via the constructor,
+ * additional chunks via {@link #addBuffers(List)}).
  * <p>
- * <b>This stream does not grow on its own.</b> Chunks are supplied by the caller — initial
- * chunks via the constructor, additional chunks via {@link #addBuffers(List)}. When a write's
- * size exceeds the stream's {@link #remaining()} (sum of free bytes across all attached
- * chunks), {@link IllegalStateException} is thrown. The caller is responsible for attaching
- * additional chunks before any such write.
- * <p>
- * Automatic mid-write growth (allocating from inside {@code write()}) is a follow-up tied to
- * compression support, where compressor buffering can make a write exceed its reservation; until
- * then the write path stays allocation-free and deterministic.
- * <p>
- * When {@link #buffer()} is called (typically at batch close), all chunks are flattened into a
- * single contiguous ByteBuffer — exactly one copy operation.
+ * Current/temporary behavior:
+ * <ul>
+ * <li>The stream does not grow on its own: a write whose size exceeds the remaining free bytes
+ *     across all attached chunks throws {@link IllegalStateException}, so the caller must attach
+ *     enough chunks before any such write.
+ *     TODO: KAFKA-20579 (automatic mid-write growth for compression support).</li>
+ * <li>{@link #buffer()} returns the written bytes as a single contiguous {@link ByteBuffer},
+ *     flattening all chunks into a new buffer with an extra copy.
+ *     TODO: KAFKA-20580 (remove the extra copy on send, scatter-gather send).</li>
+ * </ul>
  */
 public class ChunkedByteBufferOutputStream extends ByteBufferOutputStream {
 
@@ -51,10 +50,10 @@ public class ChunkedByteBufferOutputStream extends ByteBufferOutputStream {
 
     /**
      * Constructs a chunked output stream backed by the given pre-allocated chunks. Ownership of
-     * {@code initialChunks} transfers to this stream — they will be returned to the pool via
-     * {@link #deallocate()}.
+     * {@code initialChunks} transfers to this stream (they will be returned to the pool via
+     * {@link #deallocate()}).
      *
-     * @param initialChunks pre-allocated chunks; must be non-empty; each chunk's capacity must
+     * @param initialChunks pre-allocated chunks. Must be non-empty and each chunk's capacity must
      *                      equal {@code chunkSize}
      * @param chunkSize     the size of each chunk in bytes
      * @param pool          the buffer pool used for deallocation
@@ -123,17 +122,12 @@ private void ensureChunkCapacity(int needed) {
     }
 
     /**
-     * Advances {@code currentChunk} to the next pre-supplied chunk; throws if none is left. The
-     * caller must attach additional chunks via {@link #addBuffers(List)} before any write that
-     * exceeds {@link #remaining()}. (Mid-record growth — non-blocking pool then heap — is a
-     * follow-up that lands with compression support.)
+     * Advances {@code currentChunk} to the next pre-supplied chunk.
      */
     private void advanceToNextChunk() {
         if (currentChunkIndex + 1 >= chunks.size()) {
-            throw new IllegalStateException(
-                "No more chunks available; the write exceeded the stream's remaining capacity. "
-                    + "Caller must obtain additional chunks (e.g. from ChunkedBufferPool) and attach them "
-                    + "via addBuffers before any write whose size exceeds remaining()");
+            // TODO: KAFKA-20579. With compression support, grow here instead of throwing.
+            throw new IllegalStateException("write exceeded the stream's remaining chunk capacity");
         }
         currentChunkIndex++;
         currentChunk = chunks.get(currentChunkIndex);
@@ -154,9 +148,7 @@ public ByteBuffer buffer() {
         }
         // TODO: KAFKA-20687. This flatten runs at batch close, when the chunk set is final.
         //  Today all chunks (used and unused) are returned to the pool only when the batch
-        //  completes (via deallocate(pool)). Consider releasing the fully-unused chunks
-        //  early, at close, rather than holding them until completion — removing them from
-        //  `chunks` here so the completion-time deallocate(pool) does not double-return them.
+        //  completes (via deallocate(pool)). Consider releasing the fully-unused chunks early, here.
         int totalSize = 0;
         for (ByteBuffer chunk : chunks) {
             totalSize += chunk.position();
@@ -237,14 +229,13 @@ public int limit() {
 
     @Override
     public int initialCapacity() {
-        // Every chunk is chunkSize (enforced by the constructor), so this is constant.
         return chunkSize;
     }
 
     @Override
     public void ensureRemaining(int remainingBytesRequired) {
         // A single call can guarantee at most `chunkSize` of space (the stream advances one chunk
-        // at a time); callers needing more attach chunks via addBuffers first. write(byte[]) loops
+        // at a time). Callers needing more attach chunks via addBuffers first. write(byte[]) loops
         // across chunks, so contiguous capacity isn't required.
         ensureChunkCapacity(Math.min(remainingBytesRequired, chunkSize));
     }
diff --git a/clients/src/main/java/org/apache/kafka/clients/producer/internals/ChunkedProducerBatch.java b/clients/src/main/java/org/apache/kafka/clients/producer/internals/ChunkedProducerBatch.java
index 78a187cf431d7..05c290737fe24 100644
--- a/clients/src/main/java/org/apache/kafka/clients/producer/internals/ChunkedProducerBatch.java
+++ b/clients/src/main/java/org/apache/kafka/clients/producer/internals/ChunkedProducerBatch.java
@@ -25,7 +25,7 @@
 import java.util.List;
 
 /**
- * A {@link ProducerBatch} for the incremental (chunked) buffer.memory allocation strategy, backed
+ * A {@link ProducerBatch} for the incremental buffer.memory allocation strategy, backed
  * by a {@link MemoryRecordsBuilder} whose stream is a {@link ChunkedByteBufferOutputStream}.
  * It adds mid-batch chunk extension support ({@link #extensionBytesNeeded} /
  * {@link #addBuffers}) and overrides the pool deallocation hooks so all chunks are returned to
@@ -44,12 +44,12 @@ public ChunkedProducerBatch(TopicPartition tp, MemoryRecordsBuilder recordsBuild
     }
 
     /**
-     * Bytes of physical buffer this batch needs before {@code tryAppend} could accept the given
-     * record. Returns 0 when no extension is needed: the batch is empty (first record), it is
-     * logically full, or the stream's combined chunk capacity already has room. Positive when
-     * {@code hasRoomFor} allows the record but the
-     * chunks lack physical capacity — the accumulator allocates exactly the missing bytes
-     * (rounded up to whole chunks) and attaches them via {@link #addBuffers} before retrying.
+     * Bytes of chunk capacity this batch needs before {@code tryAppend} could accept the given
+     * record. Returns 0 when no extension is needed: the batch is empty (first record), it is at
+     * its batch-size limit, or the attached chunk capacity already has room. Positive when the
+     * record is within the batch-size limit but the attached chunks lack capacity — the accumulator
+     * then allocates exactly the missing bytes (rounded up to whole chunks) and attaches them via
+     * {@link #addBuffers} before retrying.
      */
     int extensionBytesNeeded(long timestamp, byte[] key, byte[] value, Header[] headers) {
         if (recordCount == 0)
diff --git a/clients/src/main/java/org/apache/kafka/clients/producer/internals/ChunkedRecordAccumulator.java b/clients/src/main/java/org/apache/kafka/clients/producer/internals/ChunkedRecordAccumulator.java
index 2e74f3b096261..297dac0dca91f 100644
--- a/clients/src/main/java/org/apache/kafka/clients/producer/internals/ChunkedRecordAccumulator.java
+++ b/clients/src/main/java/org/apache/kafka/clients/producer/internals/ChunkedRecordAccumulator.java
@@ -41,22 +41,12 @@
 import java.util.List;
 
 /**
- * A {@link RecordAccumulator} variant that uses chunked buffer allocation: each new batch
- * pre-allocates the chunks needed for the first record's estimated size (batch header plus the
- * record), and grows mid-batch by attaching additional chunks via
- * {@link ChunkedProducerBatch#addBuffers(List)} when later records would overflow.
+ * A {@link RecordAccumulator} variant that backs each batch with fixed-size chunks drawn from a
+ * {@link ChunkedBufferPool}, attaching more chunks on demand as records are appended instead of
+ * reserving {@code batch.size} per batch up front. Buffered memory therefore scales with the data
+ * actually written rather than with {@code active_partition_count × batch.size}.
  * <p>
- * This is the "incremental" buffer.memory allocation strategy: memory consumption scales with
- * actual data buffered, not with {@code active_partition_count × batch.size}.
- * <p>
- * When a record arrives mid-batch (an open batch already exists) and the batch's chunks are
- * full, the flow is: under the deque lock, probe that open batch — the tail of the partition's
- * deque — via the {@link #tryAppend} override, which returns
- * {@link RecordAppendResult#needsExtension(int)} when the batch has logical room but lacks
- * physical chunk capacity, then attempt a <b>non-blocking</b> pool acquire for the extra chunks. If the pool is exhausted, close the current batch (so the sender can drain it)
- * and route the record through the first-record path, which <b>blocks</b> on the pool up to
- * {@code max.block.ms}. As a result, unlike the "full" strategy, {@code send()} may block (or
- * fail with {@link BufferExhaustedException}) on records other than the first of a batch.
+ * See {@link #append} and {@link #tryAppend} for how batches are created and grown.
  * <p>
  * TODO: support compressed data (with mid-record growth); the constructor rejects compression for now.
  */
@@ -128,7 +118,7 @@ public RecordAppendResult append(String topic,
         appendsInProgress.incrementAndGet();
         ChunkedByteBufferOutputStream bufferStream = null;
         List<ByteBuffer> extensionChunks = null;
-        int extensionBytes = 0;
+        int extensionBytes;
         if (headers == null) headers = Record.EMPTY_HEADERS;
         try {
             while (true) {
@@ -148,11 +138,11 @@ public RecordAppendResult append(String topic,
                     if (partitionChanged(topic, topicInfo, partitionInfo, dq, nowMs, cluster))
                         continue;
 
-                    // The tryAppend override probes the physical capacity of the tail batch
-                    // (dq.peekLast(), the partition's open batch): a needsBufferExtension result
-                    // means it has logical room but its chunks lack space for this record — fall
-                    // through to allocate the gap outside the deque lock. A null result means the
-                    // tail batch is full or absent — fall through to the first-record (new batch) path.
+                    // The tryAppend override checks the open batch (dq.peekLast()) for chunk
+                    // capacity: a needsBufferExtension result means it is within its batch-size limit
+                    // but its chunks lack capacity for this record — fall through to allocate the gap
+                    // outside the deque lock. A null result means there is no open batch (it is full
+                    // or absent) — fall through to the first-record (new batch) path.
                     RecordAppendResult appendResult = tryAppend(timestamp, key, value, headers, callbacks, dq, nowMs);
                     if (appendResult != null && !appendResult.needsBufferExtension) {
                         boolean enableSwitch = allBatchesFull(dq);
@@ -162,7 +152,7 @@ public RecordAppendResult append(String topic,
                     extensionBytes = appendResult == null ? 0 : appendResult.extensionBytesNeeded;
                 }
 
-                if (extensionBytes > 0 && extensionChunks == null) {
+                if (extensionBytes > 0) {
                     // Mid-batch extension: non-blocking only. The thread already holds the open
                     // batch's chunks, so blocking here could deadlock with the Sender (which frees
                     // pool memory by completing batches). On exhaustion, close the batch (making it
@@ -201,9 +191,9 @@ public RecordAppendResult append(String topic,
                 synchronized (dq) {
                     if (partitionChanged(topic, topicInfo, partitionInfo, dq, nowMs, cluster)) {
                         // The partition switched while we allocated extension chunks off-lock. They
-                        // were sized against the previous partition's tail batch, so they must not be
-                        // attached to a different partition's tail — refund them and let the next
-                        // iteration re-probe the new partition from scratch.
+                        // were sized against the previous partition's open batch, so they must not be
+                        // attached to a different partition's open batch — refund them and let the next
+                        // iteration re-check the new partition from scratch.
                         if (extensionChunks != null) {
                             for (ByteBuffer chunk : extensionChunks)
                                 chunkedFree.deallocate(chunk);
@@ -214,10 +204,10 @@ public RecordAppendResult append(String topic,
 
                     if (extensionChunks != null) {
                         ProducerBatch last = dq.peekLast();
-                        // The off-lock allocateChunks window allows the probed tail batch to be
+                        // The off-lock allocateChunks window allows the open batch we checked to be
                         // drained and replaced — possibly by a split batch (a plain
                         // ProducerBatch), which can't take extension chunks. Only attach to a
-                        // writable chunked tail; otherwise refund the chunks and re-evaluate.
+                        // writable chunked batch; otherwise refund the chunks and re-evaluate.
                         if (last instanceof ChunkedProducerBatch && last.isWritable()) {
                             ((ChunkedProducerBatch) last).addBuffers(extensionChunks);
                             extensionChunks = null;
@@ -228,27 +218,28 @@ public RecordAppendResult append(String topic,
                                 return retryResult;
                             }
                             // needsBufferExtension: a concurrent appender consumed capacity our
-                            // extension was sized against — loop to re-probe. null: batch became
+                            // extension was sized against — loop to re-check. null: batch became
                             // full — loop into new-batch creation. Terminates because writeLimit is
-                            // fixed: once full, the probe stops requesting extension. Regression:
-                            // testConcurrentExtensionRaceDoesNotOverflowChunkedStream.
+                            // fixed: once full, the check stops requesting extension.
                             continue;
                         }
-                        // Tail is gone, closed, or non-chunked (e.g., a split batch). Return chunks to pool.
+                        // The open batch is gone, closed, or non-chunked (e.g., a split batch). Return chunks to pool.
                         for (ByteBuffer chunk : extensionChunks)
                             chunkedFree.deallocate(chunk);
                         extensionChunks = null;
                         continue;
                     }
 
-                    // bufferStream is non-null here (first-record path).
+                    // First-record path: extensionChunks == null here implies extensionBytes == 0,
+                    // so bufferStream was allocated (this iteration or carried from a prior one).
+                    assert bufferStream != null;
                     int firstRecordSize = AbstractRecords.estimateSizeInBytesUpperBound(
                             RecordBatch.CURRENT_MAGIC_VALUE, compression.type(), key, value, headers);
                     final ChunkedByteBufferOutputStream batchStream = bufferStream;
                     RecordAppendResult appendResult = appendNewBatch(topic, effectivePartition, dq, timestamp, key, value, headers, callbacks,
                             () -> chunkedRecordsBuilder(batchStream, firstRecordSize), nowMs);
                     if (appendResult.needsBufferExtension) {
-                        // A concurrent appender created a tail batch we should extend rather
+                        // A concurrent appender created an open batch we should extend rather
                         // than start a new one (detected by appendNewBatch's in-lock tryAppend).
                         // Our bufferStream was sized for a fresh batch — release it and loop so
                         // the extension path allocates exactly the gap-sized chunks.
@@ -277,8 +268,8 @@ public RecordAppendResult append(String topic,
     /**
      * Try to append to a ProducerBatch, with mid-batch chunk extension support.
      * <p>
-     * If the tail batch has logical room (writeLimit-wise) but its chunked stream lacks
-     * physical capacity, returns {@link RecordAppendResult#needsExtension(int)} without
+     * If the open batch is within its batch-size limit but its chunked stream lacks chunk
+     * capacity, returns {@link RecordAppendResult#needsExtension(int)} without
      * attempting the append; the caller allocates chunks outside the deque lock, attaches
      * them, and retries. Otherwise defers to the parent.
      */
@@ -289,7 +280,7 @@ protected RecordAppendResult tryAppend(long timestamp, byte[] key, byte[] value,
             throw new KafkaException("Producer closed while send in progress");
         ProducerBatch last = deque.peekLast();
         // Split batches in an incremental deque are plain ProducerBatch (heap-backed, grow-on-demand)
-        // and never need chunk extension, so the probe only applies to chunked batches.
+        // and never need chunk extension, so the check only applies to chunked batches.
         if (last instanceof ChunkedProducerBatch) {
             int extensionBytes = ((ChunkedProducerBatch) last).extensionBytesNeeded(timestamp, key, value, headers);
             if (extensionBytes > 0)
@@ -304,10 +295,9 @@ protected ProducerBatch createProducerBatch(TopicPartition tp, MemoryRecordsBuil
     }
 
     /**
-     * Build a {@link MemoryRecordsBuilder} backed by the chunked stream. {@code writeLimit} =
-     * {@code max(batchSize, firstRecordSize)} (the same logical cap as the full path) bounds when
-     * the batch is full; physical capacity grows on demand via
-     * {@link ChunkedByteBufferOutputStream#addBuffers(List)}.
+     * Build a {@link MemoryRecordsBuilder} backed by the chunked stream. Its {@code writeLimit}
+     * bounds when the batch is full (the same batch-size limit as the full path), while chunk
+     * capacity grows on demand via {@link ChunkedByteBufferOutputStream#addBuffers(List)}.
      */
     private MemoryRecordsBuilder chunkedRecordsBuilder(ChunkedByteBufferOutputStream bufferStream,
                                                        int firstRecordSize) {
diff --git a/clients/src/main/java/org/apache/kafka/clients/producer/internals/RecordAccumulator.java b/clients/src/main/java/org/apache/kafka/clients/producer/internals/RecordAccumulator.java
index 7bb4243e24f9d..4125b29acaf5c 100644
--- a/clients/src/main/java/org/apache/kafka/clients/producer/internals/RecordAccumulator.java
+++ b/clients/src/main/java/org/apache/kafka/clients/producer/internals/RecordAccumulator.java
@@ -379,9 +379,9 @@ public RecordAppendResult append(String topic,
      * @param headers the Headers for the record
      * @param callbacks The callbacks to execute
      * @param recordsBuilderSupplier Supplies the {@link MemoryRecordsBuilder} for the new
-     *        batch. Invoked lazily, only when this method is committed to creating a new
-     *        batch (i.e. after the in-lock concurrent-append race check has lost). The
-     *        chunked subclass passes a supplier that produces a stream-backed builder.
+     *        batch. Invoked lazily, only when a new batch is actually created. The chunked
+     *        subclass passes a supplier that produces a builder backed by a
+     *        {@link ChunkedByteBufferOutputStream}.
      * @param nowMs The current time, in milliseconds
      */
     protected RecordAppendResult appendNewBatch(String topic,
@@ -399,7 +399,7 @@ protected RecordAppendResult appendNewBatch(String topic,
         RecordAppendResult appendResult = tryAppend(timestamp, key, value, headers, callbacks, dq, nowMs);
         if (appendResult != null) {
             // Propagate without creating a new batch: either another thread already made us a batch
-            // (success), or — incremental strategy — a concurrent appender created an extendable tail
+            // (success), or — incremental strategy — a concurrent appender created an extendable open batch
             // (needsBufferExtension), so the caller releases its pre-allocated buffer and retries via
             // the extension path.
             return appendResult;
@@ -1272,8 +1272,8 @@ public static final class RecordAppendResult {
         public final boolean batchIsFull;
         public final boolean newBatchCreated;
         /**
-         * Signal (incremental strategy) that the tail batch has logical room (writeLimit-wise) but
-         * its chunks lack physical capacity. The append was NOT attempted; the caller allocates
+         * Signal (incremental strategy) that the open batch is within its batch-size limit but its
+         * chunks lack capacity. The append was NOT attempted; the caller allocates
          * {@link #extensionBytesNeeded} bytes of chunk capacity, attaches them via
          * {@link ChunkedProducerBatch#addBuffers}, and retries. When {@code true}, {@code future} is null.
          */
diff --git a/clients/src/main/java/org/apache/kafka/common/record/internal/AbstractRecords.java b/clients/src/main/java/org/apache/kafka/common/record/internal/AbstractRecords.java
index d8eb6c4dd0be7..4e6e32d75a556 100644
--- a/clients/src/main/java/org/apache/kafka/common/record/internal/AbstractRecords.java
+++ b/clients/src/main/java/org/apache/kafka/common/record/internal/AbstractRecords.java
@@ -144,11 +144,8 @@ else if (compressionType != CompressionType.NONE)
     }
 
     /**
-     * Upper-bound size of one record's contribution to a batch, <em>excluding</em> the batch header.
-     * For magic v2 it's {@link #estimateSizeInBytesUpperBound} minus
-     * {@link #recordBatchHeaderSizeInBytes}; for older magic it equals
-     * {@link #estimateSizeInBytesUpperBound}. Used by the incremental strategy's cumulative sizing,
-     * where the header is counted once via {@link MemoryRecordsBuilder#estimatedBytesWritten}.
+     * Upper-bound on the bytes a single record contributes to a batch, excluding the batch header.
+     * Used by the incremental strategy, which counts the header once, separately.
      */
     public static int recordSizeUpperBound(byte magic, CompressionType compressionType, byte[] key,
                                            byte[] value, Header[] headers) {
diff --git a/clients/src/main/java/org/apache/kafka/common/record/internal/MemoryRecordsBuilder.java b/clients/src/main/java/org/apache/kafka/common/record/internal/MemoryRecordsBuilder.java
index 2bf48518b2d6f..611654ed3e595 100644
--- a/clients/src/main/java/org/apache/kafka/common/record/internal/MemoryRecordsBuilder.java
+++ b/clients/src/main/java/org/apache/kafka/common/record/internal/MemoryRecordsBuilder.java
@@ -212,9 +212,8 @@ public int initialCapacity() {
     }
 
     /**
-     * The underlying output stream. Exposed so the incremental-strategy callers can attach
-     * pre-allocated chunks to a {@code ChunkedByteBufferOutputStream} and route deallocation
-     * polymorphically.
+     * The underlying output stream, exposed so the incremental strategy can manage its
+     * chunk-backed stream.
      */
     public ByteBufferOutputStream bufferStream() {
         return bufferStream;
@@ -828,15 +827,12 @@ private int estimatedBytesWritten() {
     }
 
     /**
-     * Static projection of the bytes a builder with the given magic, compression type, and ratio
-     * would have written for {@code uncompressedRecordsSizeInBytes} of records. Exact for
-     * uncompressed ({@code header + uncompressedBytes}); for compressed it applies the ratio and 5%
-     * safety multiplier to the whole total, like the per-builder {@link #estimatedBytesWritten()}.
+     * Returns the projected number of bytes the builder would write for
+     * {@code uncompressedRecordsSizeInBytes} of records under the given magic, compression type,
+     * and ratio: exact for uncompressed, a ratio-aware estimate for compressed.
      * <p>
      * Used by the incremental strategy to size chunk reservations (first record, and mid-batch via
-     * {@link #estimatedBytesWrittenAfter}). This is the batch's projected total <em>output</em>,
-     * distinct from the {@link #hasRoomFor} writeLimit gate, which counts the incoming record at
-     * full uncompressed size; the two coincide for uncompressed data and diverge under compression.
+     * {@link #estimatedBytesWrittenAfter}).
      */
     public static int estimatedBytesWritten(byte magic, CompressionType compressionType,
                                             float compressionRatio,
diff --git a/clients/src/test/java/org/apache/kafka/clients/producer/internals/ChunkedBufferPoolTest.java b/clients/src/test/java/org/apache/kafka/clients/producer/internals/ChunkedBufferPoolTest.java
index e0de8ae19e4dc..45e9aceb12cf5 100644
--- a/clients/src/test/java/org/apache/kafka/clients/producer/internals/ChunkedBufferPoolTest.java
+++ b/clients/src/test/java/org/apache/kafka/clients/producer/internals/ChunkedBufferPoolTest.java
@@ -19,6 +19,7 @@
 import org.apache.kafka.clients.producer.BufferExhaustedException;
 import org.apache.kafka.common.metrics.Metrics;
 import org.apache.kafka.common.utils.MockTime;
+import org.apache.kafka.test.TestUtils;
 
 import org.junit.jupiter.api.AfterEach;
 import org.junit.jupiter.api.Test;
@@ -32,6 +33,7 @@
 import java.util.concurrent.atomic.AtomicReference;
 
 import static org.junit.jupiter.api.Assertions.assertEquals;
+import static org.junit.jupiter.api.Assertions.assertInstanceOf;
 import static org.junit.jupiter.api.Assertions.assertNotNull;
 import static org.junit.jupiter.api.Assertions.assertNull;
 import static org.junit.jupiter.api.Assertions.assertThrows;
@@ -41,7 +43,6 @@ public class ChunkedBufferPoolTest {
 
     private final MockTime time = new MockTime();
     private final Metrics metrics = new Metrics(time);
-    private final String metricGroup = "producer-metrics";
 
     @AfterEach
     public void teardown() {
@@ -49,6 +50,7 @@ public void teardown() {
     }
 
     private ChunkedBufferPool pool(long totalMemory, int chunkSize) {
+        String metricGroup = "producer-metrics";
         return new ChunkedBufferPool(totalMemory, chunkSize, metrics, time, metricGroup);
     }
 
@@ -62,7 +64,7 @@ public void testAllocateOneChunk() throws Exception {
         assertEquals(chunkSize, chunks.get(0).capacity());
     }
 
-    /** Total size that's not a multiple of chunk size rounds up; returned chunks cover it. */
+    /** Total size that's not a multiple of chunk size rounds up */
     @Test
     public void testAllocateRoundsUpToChunkBoundary() throws Exception {
         int chunkSize = 64;
@@ -105,14 +107,12 @@ public void testDeallocationRestoresMemory() throws Exception {
         assertEquals(total, p.availableMemory());
     }
 
-    /** Requesting more than total memory throws IllegalArgumentException. */
     @Test
     public void testRejectsRequestExceedingTotalMemory() {
         ChunkedBufferPool p = pool(128, 64);
         assertThrows(IllegalArgumentException.class, () -> p.allocateChunks(129, 100));
     }
 
-    /** Non-positive totalSize is rejected. */
     @Test
     public void testRejectsNonPositiveRequest() {
         ChunkedBufferPool p = pool(128, 64);
@@ -122,7 +122,7 @@ public void testRejectsNonPositiveRequest() {
 
     /**
      * When a multi-chunk request can't be fully satisfied within the deadline, chunks already
-     * acquired during the call must be returned to the pool — the caller must not leak memory.
+     * acquired during the call must be returned to the pool.
      */
     @Test
     public void testRollbackOnPartialFailure() throws Exception {
@@ -145,10 +145,7 @@ public void testRollbackOnPartialFailure() throws Exception {
 
     /**
      * No partial chunk holds during the wait. While a multi-chunk request blocks, the pool's
-     * {@code availableMemory()} must reflect bytes the waiter has not yet "earned" — i.e., the
-     * reservation is via the accumulator and not via chunks pulled out of the pool's accounting.
-     * Distinguishes the atomic implementation from the older loop-with-rollback (which held
-     * partials off the accounting between iterations).
+     * {@code availableMemory()} must reflect bytes the waiter has not yet "earned".
      */
     @Test
     public void testNoPartialHoldsDuringWait() throws Exception {
@@ -173,10 +170,7 @@ public void testNoPartialHoldsDuringWait() throws Exception {
         }, "chunked-waiter");
         t.start();
         // Wait until the thread has joined the waiters queue.
-        long deadlineMs = System.currentTimeMillis() + 2_000;
-        while (p.queued() == 0 && System.currentTimeMillis() < deadlineMs)
-            Thread.sleep(5);
-        assertEquals(1, p.queued(), "waiter should be parked on the pool's queue");
+        TestUtils.waitForCondition(() -> p.queued() == 1, "waiter should be parked on the pool's queue");
 
         // While the waiter is parked, the pool's available memory must still report the
         // 1 chunk's worth — the waiter has NOT consumed any of it (no partial hold).
@@ -235,16 +229,10 @@ public void testFifoFairnessAcrossChunkedAndSingleChunkRequests() throws Excepti
         tMulti.start();
         assertTrue(multiStarted.await(2, TimeUnit.SECONDS));
         // Wait for tMulti to be parked before starting tSingle, so the FIFO order is deterministic.
-        long deadlineMs = System.currentTimeMillis() + 2_000;
-        while (p.queued() == 0 && System.currentTimeMillis() < deadlineMs)
-            Thread.sleep(5);
-        assertEquals(1, p.queued(), "multi-chunk waiter should be the only one queued");
+        TestUtils.waitForCondition(() -> p.queued() == 1, "multi-chunk waiter should be the only one queued");
         tSingle.start();
         // Wait for tSingle to also be parked.
-        deadlineMs = System.currentTimeMillis() + 2_000;
-        while (p.queued() < 2 && System.currentTimeMillis() < deadlineMs)
-            Thread.sleep(5);
-        assertEquals(2, p.queued(), "single-chunk waiter joined after the multi-chunk one");
+        TestUtils.waitForCondition(() -> p.queued() == 2, "single-chunk waiter joined after the multi-chunk one");
 
         // Now free chunks one at a time, allowing the multi-chunk request to accumulate.
         p.deallocate(h1);
@@ -302,10 +290,7 @@ public void testSlowPathDoesNotDoubleRefundPolledChunksOnTimeout() throws Except
         }, "partial-holder");
         t.start();
 
-        long deadline = System.currentTimeMillis() + 2_000;
-        while (p.queued() == 0 && System.currentTimeMillis() < deadline)
-            Thread.sleep(5);
-        assertEquals(1, p.queued());
+        TestUtils.waitForCondition(() -> p.queued() == 1, "waiter should be parked on the pool's queue");
 
         // Hand the waiter 2 chunks — it polls them into `pooled` then awaits again for the 3rd.
         p.deallocate(h1);
@@ -315,8 +300,7 @@ public void testSlowPathDoesNotDoubleRefundPolledChunksOnTimeout() throws Except
         Thread.sleep(100);
 
         t.join(5_000);
-        assertTrue(err.get() instanceof BufferExhaustedException,
-                "expected BufferExhaustedException, got " + err.get());
+        assertInstanceOf(BufferExhaustedException.class, err.get(), "expected BufferExhaustedException, got " + err.get());
 
         // After the throw, exactly 2 chunkSizes of physical memory should be back in the pool
         // (h1 + h2). h3 is still held outside the pool. The bug double-refunds: it credits
@@ -358,10 +342,7 @@ public void testSlowPathSuccessDoesNotCorruptAccounting() throws Exception {
         }, "slow-success");
         t.start();
 
-        long deadline = System.currentTimeMillis() + 2_000;
-        while (p.queued() == 0 && System.currentTimeMillis() < deadline)
-            Thread.sleep(5);
-        assertEquals(1, p.queued());
+        TestUtils.waitForCondition(() -> p.queued() == 1, "waiter should be parked on the pool's queue");
 
         // Deallocate 2 chunks → waiter wakes, polls both, accumulated reaches memoryRequired,
         // exits normally. The success path's finally must NOT refund or decrement nonPool.
@@ -406,16 +387,12 @@ public void testCloseDuringAtomicWait() throws Exception {
             }
         }, "close-during-wait");
         t.start();
-        long deadlineMs = System.currentTimeMillis() + 2_000;
-        while (p.queued() == 0 && System.currentTimeMillis() < deadlineMs)
-            Thread.sleep(5);
-        assertEquals(1, p.queued());
+        TestUtils.waitForCondition(() -> p.queued() == 1, "waiter should be parked on the pool's queue");
 
         // Close the pool: signals all waiters; the waiter must throw KafkaException.
         p.close();
         t.join(5_000);
-        assertTrue(err.get() instanceof KafkaException,
-                "expected KafkaException, got " + err.get());
+        assertInstanceOf(KafkaException.class, err.get(), "expected KafkaException, got " + err.get());
         p.deallocate(h1);
         p.deallocate(h2);
     }
diff --git a/clients/src/test/java/org/apache/kafka/clients/producer/internals/ChunkedByteBufferOutputStreamTest.java b/clients/src/test/java/org/apache/kafka/clients/producer/internals/ChunkedByteBufferOutputStreamTest.java
index c96824b09c137..145876730e977 100644
--- a/clients/src/test/java/org/apache/kafka/clients/producer/internals/ChunkedByteBufferOutputStreamTest.java
+++ b/clients/src/test/java/org/apache/kafka/clients/producer/internals/ChunkedByteBufferOutputStreamTest.java
@@ -34,7 +34,6 @@ public class ChunkedByteBufferOutputStreamTest {
 
     private final MockTime time = new MockTime();
     private final Metrics metrics = new Metrics(time);
-    private final String metricGroup = "test";
 
     @AfterEach
     public void teardown() {
@@ -42,6 +41,7 @@ public void teardown() {
     }
 
     private ChunkedBufferPool pool(long total, int chunkSize) {
+        String metricGroup = "test";
         return new ChunkedBufferPool(total, chunkSize, metrics, time, metricGroup);
     }
 
@@ -49,14 +49,8 @@ private List<ByteBuffer> chunks(ChunkedBufferPool pool, int chunkSize, int count
         return pool.allocateChunks(chunkSize * count, 100);
     }
 
-    /**
-     * The constructor fails fast with {@link IllegalArgumentException} when its chunk contract is
-     * violated — a null or empty list, or a chunk whose capacity differs from {@code chunkSize} —
-     * rather than the bare NPE / IndexOutOfBoundsException that {@code initialChunks.get(0)} would
-     * otherwise throw from the {@code super(...)} call.
-     */
     @Test
-    public void testConstructorRejectsInvalidChunks() throws Exception {
+    public void testConstructorRejectsInvalidChunks() {
         int chunkSize = 16;
         ChunkedBufferPool p = pool(64, chunkSize);
 
@@ -71,123 +65,110 @@ public void testConstructorRejectsInvalidChunks() throws Exception {
             () -> new ChunkedByteBufferOutputStream(wrongSize, chunkSize, p));
     }
 
-    /** Writes that fit in the initial chunk: stream produces the bytes back via buffer(). */
     @Test
     public void testSingleChunkWriteRoundtrip() throws Exception {
         int chunkSize = 16;
         ChunkedBufferPool p = pool(64, chunkSize);
-        ChunkedByteBufferOutputStream stream = new ChunkedByteBufferOutputStream(chunks(p, chunkSize, 1), chunkSize, p);
+        try (ChunkedByteBufferOutputStream stream = new ChunkedByteBufferOutputStream(chunks(p, chunkSize, 1), chunkSize, p)) {
 
-        byte[] payload = new byte[]{1, 2, 3, 4, 5};
-        stream.write(payload, 0, payload.length);
+            byte[] payload = new byte[]{1, 2, 3, 4, 5};
+            stream.write(payload, 0, payload.length);
 
-        ByteBuffer flat = stream.buffer();
-        flat.flip();
-        byte[] out = new byte[flat.remaining()];
-        flat.get(out);
-        assertArrayEquals(payload, out);
+            ByteBuffer flat = stream.buffer();
+            flat.flip();
+            byte[] out = new byte[flat.remaining()];
+            flat.get(out);
+            assertArrayEquals(payload, out);
 
-        stream.deallocate();
+            stream.deallocate();
+        }
     }
 
-    /** Writes that span multiple pre-supplied chunks land in subsequent chunks in order. */
     @Test
-    public void testWriteSpansChunks() throws Exception {
+    public void testWriteAcrossMultipleChunks() throws Exception {
         int chunkSize = 8;
         ChunkedBufferPool p = pool(64, chunkSize);
-        ChunkedByteBufferOutputStream stream = new ChunkedByteBufferOutputStream(chunks(p, chunkSize, 3), chunkSize, p);
+        try (ChunkedByteBufferOutputStream stream = new ChunkedByteBufferOutputStream(chunks(p, chunkSize, 3), chunkSize, p)) {
 
-        byte[] payload = new byte[20];
-        for (int i = 0; i < payload.length; i++) payload[i] = (byte) i;
-        stream.write(payload, 0, payload.length);
+            byte[] payload = new byte[20];
+            for (int i = 0; i < payload.length; i++) payload[i] = (byte) i;
+            stream.write(payload, 0, payload.length);
 
-        ByteBuffer flat = stream.buffer();
-        flat.flip();
-        byte[] out = new byte[flat.remaining()];
-        flat.get(out);
-        assertArrayEquals(payload, out);
+            ByteBuffer flat = stream.buffer();
+            flat.flip();
+            byte[] out = new byte[flat.remaining()];
+            flat.get(out);
+            assertArrayEquals(payload, out);
 
-        stream.deallocate();
+            stream.deallocate();
+        }
     }
 
-    /** remaining() sums current + queued chunk capacities and shrinks as writes consume bytes. */
     @Test
-    public void testRemainingSumsChunks() throws Exception {
+    public void testRemainingSumsFreeBytesAcrossChunks() throws Exception {
         int chunkSize = 8;
         ChunkedBufferPool p = pool(64, chunkSize);
-        ChunkedByteBufferOutputStream stream = new ChunkedByteBufferOutputStream(chunks(p, chunkSize, 2), chunkSize, p);
+        try (ChunkedByteBufferOutputStream stream = new ChunkedByteBufferOutputStream(chunks(p, chunkSize, 2), chunkSize, p)) {
 
-        assertEquals(2 * chunkSize, stream.remaining());
-        stream.write(new byte[3], 0, 3);
-        assertEquals(2 * chunkSize - 3, stream.remaining());
-        stream.write(new byte[chunkSize], 0, chunkSize); // crosses into chunk 2
-        assertEquals(chunkSize - 3, stream.remaining());
+            assertEquals(2 * chunkSize, stream.remaining());
+            stream.write(new byte[3], 0, 3);
+            assertEquals(2 * chunkSize - 3, stream.remaining());
+            stream.write(new byte[chunkSize], 0, chunkSize); // crosses into chunk 2
+            assertEquals(chunkSize - 3, stream.remaining());
 
-        stream.deallocate();
+            stream.deallocate();
+        }
     }
 
-    /** addBuffers extends capacity so a write that would have overflowed now fits. */
     @Test
     public void testAddBuffersExtendsStream() throws Exception {
         int chunkSize = 8;
         ChunkedBufferPool p = pool(64, chunkSize);
-        ChunkedByteBufferOutputStream stream = new ChunkedByteBufferOutputStream(chunks(p, chunkSize, 1), chunkSize, p);
+        try (ChunkedByteBufferOutputStream stream = new ChunkedByteBufferOutputStream(chunks(p, chunkSize, 1), chunkSize, p)) {
 
-        // Fill the initial chunk.
-        stream.write(new byte[chunkSize], 0, chunkSize);
-        assertEquals(0, stream.remaining());
+            // Fill the initial chunk.
+            stream.write(new byte[chunkSize], 0, chunkSize);
+            assertEquals(0, stream.remaining());
 
-        // Extend and write more — must land in the new chunk.
-        stream.addBuffers(Collections.singletonList(p.allocate(chunkSize, 100)));
-        assertEquals(chunkSize, stream.remaining());
+            // Extend and write more — must land in the new chunk.
+            stream.addBuffers(Collections.singletonList(p.allocate(chunkSize, 100)));
+            assertEquals(chunkSize, stream.remaining());
 
-        byte[] more = new byte[]{9, 9, 9};
-        stream.write(more, 0, more.length);
-        assertEquals(chunkSize - 3, stream.remaining());
+            byte[] more = new byte[]{9, 9, 9};
+            stream.write(more, 0, more.length);
+            assertEquals(chunkSize - 3, stream.remaining());
 
-        stream.deallocate();
+            stream.deallocate();
+        }
     }
 
-    /** Writing past all pre-supplied (and added) chunks throws — uncompressed-only contract. */
-    @Test
-    public void testOverflowThrows() throws Exception {
-        int chunkSize = 4;
-        ChunkedBufferPool p = pool(64, chunkSize);
-        ChunkedByteBufferOutputStream stream = new ChunkedByteBufferOutputStream(chunks(p, chunkSize, 1), chunkSize, p);
-
-        stream.write(new byte[chunkSize], 0, chunkSize);
-        assertThrows(IllegalStateException.class, () -> stream.write((byte) 1));
-
-        stream.deallocate();
-    }
-
-    /** position(int) at construction walks across pre-supplied chunks. */
     @Test
     public void testPositionWalksAcrossChunks() throws Exception {
         int chunkSize = 4;
         ChunkedBufferPool p = pool(32, chunkSize);
-        ChunkedByteBufferOutputStream stream = new ChunkedByteBufferOutputStream(chunks(p, chunkSize, 3), chunkSize, p);
+        try (ChunkedByteBufferOutputStream stream = new ChunkedByteBufferOutputStream(chunks(p, chunkSize, 3), chunkSize, p)) {
 
-        stream.position(6); // straddles chunk 0 (4 bytes) and chunk 1 (2 bytes)
-        assertEquals(6, stream.position());
-        assertEquals(6, stream.remaining());
+            stream.position(6); // straddles chunk 0 (4 bytes) and chunk 1 (2 bytes)
+            assertEquals(6, stream.position());
+            assertEquals(6, stream.remaining());
 
-        stream.deallocate();
+            stream.deallocate();
+        }
     }
 
-    /** deallocate returns all chunks to the pool. */
     @Test
     public void testDeallocateReturnsAllChunks() throws Exception {
         int chunkSize = 8;
         long total = 64;
         ChunkedBufferPool p = pool(total, chunkSize);
         List<ByteBuffer> initial = chunks(p, chunkSize, 3);
-        ChunkedByteBufferOutputStream stream = new ChunkedByteBufferOutputStream(initial, chunkSize, p);
-        // Also attach one extra chunk so deallocate must handle the added-buffer case too.
-        stream.addBuffers(Collections.singletonList(p.allocate(chunkSize, 100)));
-        assertEquals(total - 4L * chunkSize, p.availableMemory());
+        try (ChunkedByteBufferOutputStream stream = new ChunkedByteBufferOutputStream(initial, chunkSize, p)) {
+            // Also attach one extra chunk so deallocate must handle the added-buffer case too.
+            stream.addBuffers(Collections.singletonList(p.allocate(chunkSize, 100)));
+            assertEquals(total - 4L * chunkSize, p.availableMemory());
 
-        stream.deallocate();
+            stream.deallocate();
+        }
         assertEquals(total, p.availableMemory());
     }
-}
+}
\ No newline at end of file
diff --git a/clients/src/test/java/org/apache/kafka/clients/producer/internals/ChunkedRecordAccumulatorTest.java b/clients/src/test/java/org/apache/kafka/clients/producer/internals/ChunkedRecordAccumulatorTest.java
index 97bdaa09c29c0..d658e201083fe 100644
--- a/clients/src/test/java/org/apache/kafka/clients/producer/internals/ChunkedRecordAccumulatorTest.java
+++ b/clients/src/test/java/org/apache/kafka/clients/producer/internals/ChunkedRecordAccumulatorTest.java
@@ -35,7 +35,6 @@
 
 import java.nio.ByteBuffer;
 import java.util.ArrayList;
-import java.util.Arrays;
 import java.util.Collections;
 import java.util.Deque;
 import java.util.List;
@@ -65,7 +64,7 @@ public class ChunkedRecordAccumulatorTest {
 
     private final PartitionMetadata partMetadata1 =
             new PartitionMetadata(Errors.NONE, tp1, Optional.of(node1.id()), Optional.empty(), null, null, null);
-    private final List<PartitionMetadata> partMetadatas = new ArrayList<>(Arrays.asList(partMetadata1));
+    private final List<PartitionMetadata> partMetadatas = new ArrayList<>(List.of(partMetadata1));
     private final Map<Integer, Node> nodes =
             Stream.of(node1).collect(Collectors.toMap(Node::id, java.util.function.Function.identity()));
     private final MetadataSnapshot metadataCache = new MetadataSnapshot(null, nodes, partMetadatas,
@@ -91,7 +90,6 @@ private ChunkedRecordAccumulator newAccumulator(int batchSize, int chunkSize, lo
                 /* transactionManager */ null, pool);
     }
 
-    /** First record creates a batch backed by chunked allocation. */
     @Test
     public void testFirstRecordCreatesChunkedBatch() throws Exception {
         int chunkSize = 256;
@@ -109,12 +107,8 @@ public void testFirstRecordCreatesChunkedBatch() throws Exception {
         accum.close();
     }
 
-    /**
-     * A small follow-up record fits in the existing batch's pre-allocated chunks — no extension
-     * needed.
-     */
     @Test
-    public void testFollowupRecordFitsWithoutExtension() throws Exception {
+    public void testSmallFollowupRecordFitsWithoutExtension() throws Exception {
         int chunkSize = 256;
         ChunkedRecordAccumulator accum = newAccumulator(1024, chunkSize, 16L * chunkSize, Compression.NONE);
 
@@ -125,6 +119,7 @@ public void testFollowupRecordFitsWithoutExtension() throws Exception {
 
         Deque<ProducerBatch> dq = batchesFor(accum, tp1);
         assertEquals(1, dq.size(), "Both records should land in the same batch");
+        assertNotNull(dq.peekFirst());
         assertEquals(2, dq.peekFirst().recordCount);
         accum.close();
     }
@@ -144,6 +139,7 @@ public void testMidBatchExtensionGrowsExistingBatch() throws Exception {
 
         Deque<ProducerBatch> dq = batchesFor(accum, tp1);
         assertEquals(1, dq.size());
+        assertNotNull(dq.peekFirst());
         int initialRecordCount = dq.peekFirst().recordCount;
         assertEquals(1, initialRecordCount);
 
@@ -153,26 +149,17 @@ public void testMidBatchExtensionGrowsExistingBatch() throws Exception {
 
         // Same batch, now with the second record.
         assertEquals(1, dq.size(), "Should still be the same batch — extension should not roll");
+        assertNotNull(dq.peekFirst());
         assertEquals(2, dq.peekFirst().recordCount,
                 "Second record should land in the extended batch");
         accum.close();
     }
 
     /**
-     * Race between two concurrent appenders on the same chunked tail. Both threads probe
-     * {@code extensionBytesNeeded} under the deque lock against the same physical remaining
-     * capacity, drop the lock, and allocate gap-sized chunks off-lock. When the first appender
-     * re-takes the lock and consumes most of the original remaining, the second appender's
-     * allocation — sized against the pre-race remaining — is short by the bytes the first
-     * appender consumed. Without a re-probe in the second-lock block, the second appender
-     * proceeds to write past the stream's physical capacity and {@code advanceToNextChunk}
-     * throws {@link IllegalStateException}.
-     * <p>
-     * Coordination: a {@link ChunkedBufferPool} subclass blocks the FIRST {@code allocateChunks}
-     * call after the test arms a latch (i.e. thread A's). Thread B's allocate is the second call
-     * and proceeds normally — B re-takes the lock, attaches its chunks, appends its record, and
-     * returns. Only then is thread A released, so A attaches its chunks and tries to append
-     * against a tail whose remaining has already shrunk.
+     * Two concurrent appenders race to extend the same open batch, each sizing its extension
+     * against the same remaining capacity off-lock; once one attaches its chunks, the other's is
+     * short. Verifies the second-lock re-check makes the short appender re-check rather than write
+     * past the stream's capacity, which would fail.
      */
     @Test
     public void testConcurrentExtensionRaceDoesNotOverflowChunkedStream() throws Exception {
@@ -202,7 +189,7 @@ public List<ByteBuffer> allocateChunks(int totalSize, long maxTimeToBlockMs) thr
                 /* deliveryTimeoutMs */ 3200, metrics, "producer-metrics", time,
                 /* transactionManager */ null, pool);
         try {
-            // Warmup: tiny first record establishes a chunked tail with a known remainingInStream.
+            // Warmup: tiny first record establishes an open batch with a known remainingInStream.
             accum.append(topic, partition1, 0L, key, new byte[1], Record.EMPTY_HEADERS, null,
                     maxBlockTimeMs, time.milliseconds(), cluster);
 
@@ -224,8 +211,8 @@ public List<ByteBuffer> allocateChunks(int totalSize, long maxTimeToBlockMs) thr
             assertTrue(aHoldsChunks.await(10, TimeUnit.SECONDS), "A did not reach allocateChunks");
 
             // B runs on this thread. Its allocateChunks is the second call — armed=false now,
-            // so it proceeds without blocking. B probes the same R as A (A hasn't attached yet),
-            // attaches its own chunks, appends its record. Now the tail's remaining has shrunk
+            // so it proceeds without blocking. B reads the same R as A (A hasn't attached yet),
+            // attaches its own chunks, appends its record. Now the open batch's remaining has shrunk
             // by B's actual record size, but A's still-off-lock allocation didn't know that.
             accum.append(topic, partition1, 0L, key, new byte[recordValueSize], Record.EMPTY_HEADERS, null,
                     maxBlockTimeMs, time.milliseconds(), cluster);
@@ -245,17 +232,9 @@ public List<ByteBuffer> allocateChunks(int totalSize, long maxTimeToBlockMs) thr
     }
 
     /**
-     * Regression: when the sticky partition switches while extension chunks are held off-lock,
-     * those chunks — sized against the <em>previous</em> partition's tail batch — must be refunded
-     * to the pool on the retry, not carried into the next iteration and attached to a different
-     * partition's tail (which would over-extend the wrong batch).
-     * <p>
-     * Deterministic reproduction without threads: the extension path is the only one that calls
-     * {@code allocateChunks} non-blocking ({@code maxTimeToBlockMs == 0}), so the pool flags when an
-     * extension allocation has happened; a {@code partitionChanged} override then fires exactly one
-     * spurious switch on the very next check — which is the second-sync-block check, with the
-     * extension chunks held. The fix refunds those chunks before the retry; without it they are
-     * carried and attached instead, so no refund is observed during the append.
+     * When the sticky partition switches while extension chunks are held off-lock, those
+     * chunks (sized against the previous partition's open batch) must be refunded to the pool on the
+     * retry (not carried over and attached to a different partition's open batch)
      */
     @Test
     public void testPartitionSwitchRefundsHeldExtensionChunks() throws Exception {
@@ -299,7 +278,7 @@ protected boolean partitionChanged(String topic, TopicInfo topicInfo,
             }
         };
         try {
-            // Warmup: tiny first record establishes a chunked tail (first-record path, blocking
+            // Warmup: tiny first record establishes an open batch (first-record path, blocking
             // allocate — does not flag extensionAllocated).
             accum.append(topic, partition1, 0L, key, new byte[1], Record.EMPTY_HEADERS, null,
                     maxBlockTimeMs, time.milliseconds(), cluster);
@@ -318,6 +297,7 @@ protected boolean partitionChanged(String topic, TopicInfo topicInfo,
             // The record still appends correctly after the switch-retry.
             Deque<ProducerBatch> dq = batchesFor(accum, tp1);
             assertEquals(1, dq.size());
+            assertNotNull(dq.peekFirst());
             assertEquals(2, dq.peekFirst().recordCount,
                     "second record should still land in the batch after the switch-retry");
         } finally {
@@ -325,14 +305,6 @@ protected boolean partitionChanged(String topic, TopicInfo topicInfo,
         }
     }
 
-    /**
-     * Regression guard: an inflight chunked batch returns all K chunks to the pool on the
-     * expiration / broker-disconnect path, not just one {@code initialCapacity} chunkSize.
-     * For an inflight batch the parent {@code RecordAccumulator.deallocate} calls
-     * {@code batch.deallocateInflightBuffer(pool)} and then throws; {@link ChunkedProducerBatch}
-     * overrides that hook to return all chunks (instead of donating one fresh buffer). Removing
-     * that override would re-introduce a (K−1)×chunkSize leak and fail this test.
-     */
     @Test
     public void testInflightExpirationReturnsAllChunksToPool() throws Exception {
         int chunkSize = 128;
@@ -377,13 +349,7 @@ private Deque<ProducerBatch> batchesFor(RecordAccumulator accum, TopicPartition
     }
 
     /**
-     * Sender's drain (and any other caller of {@code batch.close()}) cascades through
-     * {@code MemoryRecordsBuilder.closeForRecordAppends()} →
-     * {@code appendStream.close()} → {@code bufferStream.close()}. The chunk data must remain
-     * readable through that cascade so the builder can flatten the chunks into the heap buffer it
-     * sends over the network — chunks are returned to the pool only at batch completion
-     * (deallocate), never at close. If they were released at close, the flattened buffer would be
-     * empty and the header write would fail (or produce an empty record set).
+     * Test that chunks are returned to the pool only at batch completion (deallocate), never at close.
      */
     @Test
     public void testBatchCloseDoesNotDeallocateChunksPrematurely() throws Exception {
@@ -417,11 +383,8 @@ public void testBatchCloseDoesNotDeallocateChunksPrematurely() throws Exception
     }
 
     /**
-     * Chunks attached grow with the batch's cumulative projected output, not per-record. With a
-     * chunkSize smaller than the batch's eventual content, a sequence of small records should
-     * extend the chunks as the running total (header + uncompressed bytes for NONE) crosses
-     * chunk boundaries — not allocate the first record's worth and then never extend until
-     * physical capacity is exhausted (which the per-record formula would do).
+     * As small records accumulate in a batch, the attached chunks grow with the batch's cumulative
+     * projected output (not per-record).
      */
     @Test
     public void testExtensionTracksCumulativeBatchSize() throws Exception {
@@ -455,15 +418,8 @@ public void testExtensionTracksCumulativeBatchSize() throws Exception {
         accum.close();
     }
 
-    /**
-     * The cumulative sizing formula counts the batch header once. The previous per-record
-     * formula effectively included the batch header in each record's upper bound, which
-     * over-allocated as records accumulated. With the cumulative formula, chunk count for N
-     * small records of total uncompressed size {@code U} is {@code ceil((header + U) / chunkSize)},
-     * not {@code N × ceil((header + recordSize) / chunkSize)}.
-     */
     @Test
-    public void testCumulativeAccountsForHeaderOnce() throws Exception {
+    public void testCumulativeAccountsForBatchHeaderOnce() throws Exception {
         int chunkSize = 256;
         int batchSize = 8192;
         long totalMemory = 64L * chunkSize;

From 860bde4364550fae304a834b2c5c1dea12320af2 Mon Sep 17 00:00:00 2001
From: Lianet Magrans <lmagrans@confluent.io>
Date: Tue, 23 Jun 2026 13:38:24 -0400
Subject: [PATCH 09/11] fixes

---
 .../apache/kafka/clients/producer/KafkaProducer.java | 12 +++++++-----
 .../internals/ChunkedByteBufferOutputStream.java     |  2 ++
 2 files changed, 9 insertions(+), 5 deletions(-)

diff --git a/clients/src/main/java/org/apache/kafka/clients/producer/KafkaProducer.java b/clients/src/main/java/org/apache/kafka/clients/producer/KafkaProducer.java
index 3b13014768281..85bd74a4fc61d 100644
--- a/clients/src/main/java/org/apache/kafka/clients/producer/KafkaProducer.java
+++ b/clients/src/main/java/org/apache/kafka/clients/producer/KafkaProducer.java
@@ -462,16 +462,18 @@ public KafkaProducer(Properties properties, Serializer<K> keySerializer, Seriali
             String allocationStrategy = config.getString(ProducerConfig.BUFFER_MEMORY_ALLOCATION_STRATEGY_CONFIG)
                     .toLowerCase(Locale.ROOT);
             boolean incremental = ProducerConfig.BUFFER_MEMORY_ALLOCATION_STRATEGY_INCREMENTAL.equals(allocationStrategy);
-            if (incremental && compression.type() != CompressionType.NONE) {
+            // Use the chunked path only when a batch is at least one full chunk
+            // (batch.size >= CHUNK_SIZE). Below that, a batch can't fill even one chunk, so chunking
+            // would over-reserve and the producer falls back to the full strategy instead.
+            boolean useIncremental = incremental && batchSize >= ChunkedRecordAccumulator.CHUNK_SIZE;
+            // The chunked path does not support compression yet (TODO: KAFKA-20579)
+            if (useIncremental && compression.type() != CompressionType.NONE) {
                 throw new ConfigException("The " + ProducerConfig.BUFFER_MEMORY_ALLOCATION_STRATEGY_INCREMENTAL
                         + " " + ProducerConfig.BUFFER_MEMORY_ALLOCATION_STRATEGY_CONFIG
                         + " does not support compression yet. " + ProducerConfig.COMPRESSION_TYPE_CONFIG
                         + " must be set to none.");
             }
-            // Use the chunked path only when a batch is at least one full chunk
-            // (batch.size >= CHUNK_SIZE). Below that, a batch can't fill even one chunk, so chunking
-            // would over-reserve.
-            if (incremental && batchSize >= ChunkedRecordAccumulator.CHUNK_SIZE) {
+            if (useIncremental) {
                 this.accumulator = new ChunkedRecordAccumulator(logContext,
                         batchSize,
                         compression,
diff --git a/clients/src/main/java/org/apache/kafka/clients/producer/internals/ChunkedByteBufferOutputStream.java b/clients/src/main/java/org/apache/kafka/clients/producer/internals/ChunkedByteBufferOutputStream.java
index 97b89cd1b4416..101a5b15cabdc 100644
--- a/clients/src/main/java/org/apache/kafka/clients/producer/internals/ChunkedByteBufferOutputStream.java
+++ b/clients/src/main/java/org/apache/kafka/clients/producer/internals/ChunkedByteBufferOutputStream.java
@@ -216,6 +216,8 @@ public void position(int position) {
      */
     @Override
     public int remaining() {
+        if (currentChunk == null)  // after deallocate no chunks attached, no free capacity
+            return 0;
         int total = currentChunk.remaining();
         for (int i = currentChunkIndex + 1; i < chunks.size(); i++)
             total += chunks.get(i).remaining();

From 179e6750601c41c5155105786a2f6dad476c0d37 Mon Sep 17 00:00:00 2001
From: Lianet Magrans <lmagrans@confluent.io>
Date: Tue, 23 Jun 2026 16:27:11 -0400
Subject: [PATCH 10/11] checkstyle

---
 .../clients/producer/internals/ChunkedBufferPoolTest.java      | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/clients/src/test/java/org/apache/kafka/clients/producer/internals/ChunkedBufferPoolTest.java b/clients/src/test/java/org/apache/kafka/clients/producer/internals/ChunkedBufferPoolTest.java
index 45e9aceb12cf5..5b0f6f9b4fbb4 100644
--- a/clients/src/test/java/org/apache/kafka/clients/producer/internals/ChunkedBufferPoolTest.java
+++ b/clients/src/test/java/org/apache/kafka/clients/producer/internals/ChunkedBufferPoolTest.java
@@ -17,6 +17,7 @@
 package org.apache.kafka.clients.producer.internals;
 
 import org.apache.kafka.clients.producer.BufferExhaustedException;
+import org.apache.kafka.common.KafkaException;
 import org.apache.kafka.common.metrics.Metrics;
 import org.apache.kafka.common.utils.MockTime;
 import org.apache.kafka.test.TestUtils;
@@ -24,8 +25,6 @@
 import org.junit.jupiter.api.AfterEach;
 import org.junit.jupiter.api.Test;
 
-import org.apache.kafka.common.KafkaException;
-
 import java.nio.ByteBuffer;
 import java.util.List;
 import java.util.concurrent.CountDownLatch;

From 9b45abc201bf270bd093d417bbd0589ca0768e84 Mon Sep 17 00:00:00 2001
From: Lianet Magrans <lmagrans@confluent.io>
Date: Tue, 23 Jun 2026 17:38:44 -0400
Subject: [PATCH 11/11] fix test

---
 .../org/apache/kafka/tools/other/ReplicationQuotasTestRig.java | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/tools/src/test/java/org/apache/kafka/tools/other/ReplicationQuotasTestRig.java b/tools/src/test/java/org/apache/kafka/tools/other/ReplicationQuotasTestRig.java
index 8e646534263a7..7830889a5313d 100644
--- a/tools/src/test/java/org/apache/kafka/tools/other/ReplicationQuotasTestRig.java
+++ b/tools/src/test/java/org/apache/kafka/tools/other/ReplicationQuotasTestRig.java
@@ -414,7 +414,8 @@ KafkaProducer<byte[], byte[]> createProducer() {
                 Option.empty(),
                 new ByteArraySerializer(),
                 new ByteArraySerializer(),
-                false
+                false,
+                Option.empty()
             );
         }
     }