From 81c5bd36b44e71186f6e77e2c9c35503b07bce28 Mon Sep 17 00:00:00 2001 From: Tomasz Pietrek Date: Fri, 8 May 2026 12:29:05 +0200 Subject: [PATCH 1/9] Improve batch publish errors Signed-off-by: Tomasz Pietrek --- jetstream-extra/Cargo.toml | 2 +- jetstream-extra/README.md | 54 +++++++++++++++++++ jetstream-extra/src/batch_publish.rs | 6 +++ jetstream-extra/src/lib.rs | 18 +++++++ jetstream-extra/tests/batch_publish_errors.rs | 11 ++-- nats-counters/Cargo.toml | 4 +- 6 files changed, 88 insertions(+), 7 deletions(-) diff --git a/jetstream-extra/Cargo.toml b/jetstream-extra/Cargo.toml index e10886b..21381ce 100644 --- a/jetstream-extra/Cargo.toml +++ b/jetstream-extra/Cargo.toml @@ -12,7 +12,7 @@ keywords = ["nats", "extra", "extensions", "api", "jetstream"] categories = ["network-programming", "api-bindings"] [dependencies] -async-nats = "0.45.0" +async-nats = "0.48.0" bytes = "1" futures = "0.3" futures-util = "0.3" diff --git a/jetstream-extra/README.md b/jetstream-extra/README.md index 036c4a6..0551bca 100644 --- a/jetstream-extra/README.md +++ b/jetstream-extra/README.md @@ -10,6 +10,7 @@ Set of utilities and extensions for the JetStream NATS of the [async-nats](https ## Features - **Batch Publishing** - Atomic batch publishing ensuring all-or-nothing message storage +- **Fast Ingest Batch Publishing** - High-throughput, non-atomic batch publishing with server-driven flow control (requires nats-server 2.14+) - **Batch Fetching** - Efficient multi-message retrieval using DIRECT.GET API ## Batch Publishing @@ -55,6 +56,59 @@ async fn main() -> Result<(), Box> { } ``` +## Fast Ingest Batch Publishing + +High-throughput, non-atomic batch publishing using JetStream's fast-ingest feature (ADR-50, requires nats-server 2.14 or later). Unlike atomic batch publishing, messages are persisted as they arrive and the server uses a flow-control channel to coordinate throughput across concurrent publishers. + +Use fast ingest when: +- You need to ship millions of messages per batch and don't need all-or-nothing semantics. +- Throughput matters more than atomicity. +- You want the server to dynamically tune ack frequency based on load. + +The stream must have `allow_batched: true`. The publisher owns a dedicated inbox subscription for the duration of the batch and drives ack handling inline — no background task, no locks. + +### Complete example + +```rust +use async_nats::jetstream; +use jetstream_extra::batch_publish_fast::{FastPublishExt, GapMode}; +use std::time::Duration; + +#[tokio::main] +async fn main() -> Result<(), Box> { + let client = async_nats::connect("nats://127.0.0.1:4222").await?; + let jetstream = jetstream::new(client); + + // Stream must have allow_batched: true (not yet exposed in async-nats + // 0.45.0 StreamConfig — create via raw JetStream API until upstream + // adds the field). + + let mut batch = jetstream + .fast_publish() + .flow(100) // ack every 100 messages (ceiling) + .max_outstanding_acks(2) // up to 200 messages in flight + .gap_mode(GapMode::Fail) // abort on any gap (default) + .ack_timeout(Duration::from_secs(10)) + .on_error(|e| eprintln!("fast publish event: {e}")) + .build()?; + + // Stream 10,000 messages. The stall gate transparently waits for flow + // acks and sends pings to recover from any lost acks. + for i in 0..10_000 { + batch.add("metrics.cpu", format!("sample {i}").into()).await?; + } + + // End-of-batch commit — the commit message itself is NOT stored. + // Use `commit(...)` instead if you want a final message persisted. + let ack = batch.close().await?; + println!("committed {} messages as batch {}", ack.batch_size, ack.batch_id); + + Ok(()) +} +``` + +See `examples/fast_publisher.rs` for a runnable example. + ## Batch Fetching Efficient batch fetching of messages from JetStream streams using the DIRECT.GET API, supporting: diff --git a/jetstream-extra/src/batch_publish.rs b/jetstream-extra/src/batch_publish.rs index c14639e..eb6914c 100644 --- a/jetstream-extra/src/batch_publish.rs +++ b/jetstream-extra/src/batch_publish.rs @@ -862,6 +862,8 @@ pub enum BatchPublishErrorKind { BatchPublishNotEnabled, /// Batch publish is incomplete and was abandoned BatchPublishIncomplete, + /// Server has too many inflight batches (server limit: 50) + BatchPublishTooManyInflight, /// Batch uses unsupported headers (Nats-Msg-Id or Nats-Expected-Last-Msg-Id) BatchPublishUnsupportedHeader, /// Other unspecified error @@ -875,6 +877,7 @@ impl BatchPublishErrorKind { match error.error_code() { ErrorCode::ATOMIC_PUBLISH_DISABLED => Self::BatchPublishNotEnabled, ErrorCode::ATOMIC_PUBLISH_INCOMPLETE_BATCH => Self::BatchPublishIncomplete, + ErrorCode::ATOMIC_PUBLISH_TOO_MANY_INFLIGHT => Self::BatchPublishTooManyInflight, ErrorCode::ATOMIC_PUBLISH_UNSUPPORTED_HEADER => Self::BatchPublishUnsupportedHeader, ErrorCode::ATOMIC_PUBLISH_TOO_LARGE_BATCH => Self::MaxMessagesExceeded, _ => Self::Other, @@ -895,6 +898,9 @@ impl Display for BatchPublishErrorKind { Self::BatchPublishIncomplete => { write!(f, "batch publish is incomplete and was abandoned") } + Self::BatchPublishTooManyInflight => { + write!(f, "server has too many inflight batches (limit: 50)") + } Self::BatchPublishUnsupportedHeader => write!( f, "batch uses unsupported headers (Nats-Msg-Id or Nats-Expected-Last-Msg-Id)" diff --git a/jetstream-extra/src/lib.rs b/jetstream-extra/src/lib.rs index 8a9bd46..dd01d19 100644 --- a/jetstream-extra/src/lib.rs +++ b/jetstream-extra/src/lib.rs @@ -34,6 +34,23 @@ //! # } //! ``` //! +//! ## Fast Ingest Batch Publishing +//! +//! The [batch_publish_fast] module provides high-throughput, non-atomic batch publishing +//! using JetStream's fast-ingest feature (nats-server 2.14+): +//! +//! ```no_run +//! # use jetstream_extra::batch_publish_fast::FastPublishExt; +//! # async fn example(client: impl FastPublishExt) -> Result<(), Box> { +//! let mut batch = client.fast_publish().build()?; +//! for i in 0..1000 { +//! batch.add("events.data", format!("msg {i}").into()).await?; +//! } +//! let ack = batch.commit("events.done", "final".into()).await?; +//! # Ok(()) +//! # } +//! ``` +//! //! ## Batch Fetching //! //! The [batch_fetch] module provides efficient batch fetching of messages from streams @@ -61,6 +78,7 @@ pub mod batch_fetch; pub mod batch_publish; +pub mod batch_publish_fast; pub use async_nats::Subject; /// Re-exported type returned by Direct Get operation. diff --git a/jetstream-extra/tests/batch_publish_errors.rs b/jetstream-extra/tests/batch_publish_errors.rs index e462c64..f45b560 100644 --- a/jetstream-extra/tests/batch_publish_errors.rs +++ b/jetstream-extra/tests/batch_publish_errors.rs @@ -235,10 +235,10 @@ mod batch_publish_error_tests { let err = batch.add("test_incomplete.1", "data".into()).await; // This might fail either immediately or on commit - if err.is_err() { + if let Err(err) = err { assert_eq!( - err.unwrap_err().kind(), - BatchPublishErrorKind::BatchPublishIncomplete + err.kind(), + BatchPublishErrorKind::BatchPublishTooManyInflight ); } else { // If add succeeded, commit should fail @@ -246,7 +246,10 @@ mod batch_publish_error_tests { .commit("test_incomplete.2", "final".into()) .await .unwrap_err(); - assert_eq!(err.kind(), BatchPublishErrorKind::BatchPublishIncomplete); + assert_eq!( + err.kind(), + BatchPublishErrorKind::BatchPublishTooManyInflight + ); } } diff --git a/nats-counters/Cargo.toml b/nats-counters/Cargo.toml index f242c77..7aca989 100644 --- a/nats-counters/Cargo.toml +++ b/nats-counters/Cargo.toml @@ -12,9 +12,9 @@ keywords = ["nats","extensions", "api", "jetstream", "counters"] categories = ["network-programming", "api-bindings"] [dependencies] -async-nats = "0.45.0" +async-nats = "0.48.0" futures-util = "0.3" -jetstream-extra = "0.2.1" +jetstream-extra = { version = "0.2.1", path = "../jetstream-extra" } num-bigint = "0.4.6" serde = { version = "1", features = ["derive"] } serde_json = "1.0.145" From 1571c835d8b7a7763350a84afa91802b423d44c1 Mon Sep 17 00:00:00 2001 From: Tomasz Pietrek Date: Fri, 8 May 2026 13:28:53 +0200 Subject: [PATCH 2/9] Add fast ingest Signed-off-by: Tomasz Pietrek --- jetstream-extra/src/batch_publish.rs | 237 +++++++++++++----- jetstream-extra/tests/batch_publish_errors.rs | 136 ++++++++++ 2 files changed, 315 insertions(+), 58 deletions(-) diff --git a/jetstream-extra/src/batch_publish.rs b/jetstream-extra/src/batch_publish.rs index eb6914c..05c107d 100644 --- a/jetstream-extra/src/batch_publish.rs +++ b/jetstream-extra/src/batch_publish.rs @@ -71,6 +71,35 @@ //! # } //! ``` //! +//! ## Per-Message Options (TTL, Expectations) +//! +//! Use [`async_nats::jetstream::message::PublishMessage::build`] to attach per-message +//! TTL or stream-state expectations, then pass the result to [BatchPublish::add_message] +//! or [BatchPublish::commit_message]: +//! +//! ```no_run +//! # use jetstream_extra::batch_publish::BatchPublishExt; +//! # use async_nats::jetstream::message::PublishMessage; +//! # use std::time::Duration; +//! # async fn example(client: impl BatchPublishExt) -> Result<(), Box> { +//! let mut batch = client.batch_publish().build(); +//! +//! // Per-ADR-50, `Nats-Expected-Last-Sequence` is allowed only on the first message. +//! let first = PublishMessage::build() +//! .expected_last_sequence(0) +//! .outbound_message("events.1"); +//! batch.add_message(first).await?; +//! +//! let with_ttl = PublishMessage::build() +//! .ttl(Duration::from_secs(60)) +//! .outbound_message("events.ephemeral"); +//! batch.add_message(with_ttl).await?; +//! +//! batch.commit("events.final", "done".into()).await?; +//! # Ok(()) +//! # } +//! ``` +//! //! # Flow Control //! //! Both APIs support flow control through acknowledgments: @@ -84,26 +113,31 @@ //! All operations return [BatchPublishError] with specific error kinds: //! //! - `BatchPublishNotEnabled` - Stream doesn't have `allow_atomic_publish` enabled -//! - `BatchPublishIncomplete` - Too many outstanding batches (server limit: 50) -//! - `BatchPublishUnsupportedHeader` - Message contains `Nats-Msg-Id` or `Nats-Expected-Last-Msg-Id` +//! - `BatchPublishIncomplete` - Batch was abandoned by the server (e.g. inactivity timeout) +//! - `BatchPublishTooManyInflight` - Server inflight cap reached (50/stream, 1000/server) +//! - `BatchPublishMissingSeq` - Batch sequence header missing or malformed +//! - `BatchPublishInvalidId` - Batch ID invalid (e.g. exceeds 64 characters) +//! - `BatchPublishInvalidCommit` - Commit marker invalid +//! - `BatchPublishDuplicateMsgId` - Two messages in batch share `Nats-Msg-Id` +//! - `BatchPublishMirror` - Stream is a mirror; mirrors are incompatible with atomic publish +//! - `BatchPublishUnsupportedHeader` - Message contains an unsupported header //! - `MaxMessagesExceeded` - Batch exceeds 1000 message limit //! - `EmptyBatch` - Attempting to commit an empty batch +//! - `BatchClosed` - Operation attempted on a batch that already failed +//! - `InvalidAck` - Server commit ack failed invariant checks //! //! Server errors are automatically mapped to the appropriate error kind based on the error code. //! Errors during `add` with flow control may indicate transient issues or configuration problems. use futures_util::{Stream, StreamExt}; -use std::{ - fmt::{Debug, Display}, - time::Duration, -}; +use std::{fmt::Display, time::Duration}; use async_nats::{ Request, client, jetstream::{self, message::OutboundMessage, response::Response}, subject::ToSubject, }; -use serde::{Deserialize, de::DeserializeOwned}; +use serde::Deserialize; /// Maximum number of messages allowed in a single batch (server limit) const MAX_BATCH_SIZE: u64 = 1000; @@ -189,17 +223,19 @@ where ack_every: self.ack_every, ack_first: self.ack_first, timeout: self.timeout, + closed: false, } } } pub struct BatchPublish { - pub context: C, - pub sequence: u64, - pub batch_id: String, + pub(crate) context: C, + pub(crate) sequence: u64, + pub(crate) batch_id: String, ack_every: Option, ack_first: bool, timeout: Duration, + closed: bool, } impl BatchPublish @@ -209,15 +245,9 @@ where + jetstream::context::traits::TimeoutProvider + Clone, { - pub fn new(context: C, sequence: u64, batch_id: String) -> Self { - Self { - sequence, - batch_id, - timeout: context.timeout(), - context, - ack_first: true, - ack_every: None, - } + /// Returns the unique batch identifier (used in `Nats-Batch-Id` headers). + pub fn batch_id(&self) -> &str { + &self.batch_id } /// Get the current number of messages in the batch. @@ -298,40 +328,49 @@ where &mut self, mut message: jetstream::message::OutboundMessage, ) -> Result<(), BatchPublishError> { - // Check for unsupported headers - if let Some(headers) = &message.headers - && (headers.get("Nats-Msg-Id").is_some() - || headers.get("Nats-Expected-Last-Msg-Id").is_some()) - { - return Err(BatchPublishError::new( - BatchPublishErrorKind::BatchPublishUnsupportedHeader, - )); + if self.closed { + return Err(BatchPublishError::new(BatchPublishErrorKind::BatchClosed)); } + // Validation errors do not close the batch — they leave server state untouched + // and the caller may retry with a corrected message. + Self::reject_protocol_headers(message.headers.as_ref(), self.sequence)?; - self.sequence += 1; - - if self.sequence > MAX_BATCH_SIZE { + if self.sequence >= MAX_BATCH_SIZE { return Err(BatchPublishError::new( BatchPublishErrorKind::MaxMessagesExceeded, )); } + + self.sequence += 1; self.add_header(&mut message); - if let Some(ack_every) = self.ack_every + let result = if let Some(ack_every) = self.ack_every && self.sequence.is_multiple_of(ack_every) { - self.add_request(message).await?; + self.add_request(message).await } else if self.ack_first && self.sequence == 1 { - self.add_request(message).await?; + self.add_request(message).await } else { self.context .publish_message(message.into()) .await - .map_err(|e| BatchPublishError::with_source(BatchPublishErrorKind::Publish, e))?; + .map_err(|e| BatchPublishError::with_source(BatchPublishErrorKind::Publish, e)) + }; + + if let Err(e) = result { + self.closed = true; + return Err(e); } Ok(()) } + /// Returns `true` if the batch has been closed by an error and can no longer be used. + /// + /// Once closed, all subsequent `add` / `commit` calls return [BatchPublishErrorKind::BatchClosed]. + pub fn is_closed(&self) -> bool { + self.closed + } + /// Commit the batch with a final message. /// /// This sends the final message with batch headers and a commit marker, @@ -393,24 +432,18 @@ where mut self, mut message: jetstream::message::OutboundMessage, ) -> Result { - // Check for unsupported headers - if let Some(headers) = &message.headers - && (headers.get("Nats-Msg-Id").is_some() - || headers.get("Nats-Expected-Last-Msg-Id").is_some()) - { - return Err(BatchPublishError::new( - BatchPublishErrorKind::BatchPublishUnsupportedHeader, - )); + if self.closed { + return Err(BatchPublishError::new(BatchPublishErrorKind::BatchClosed)); } + Self::reject_protocol_headers(message.headers.as_ref(), self.sequence)?; - self.sequence += 1; - - if self.sequence > MAX_BATCH_SIZE { + if self.sequence >= MAX_BATCH_SIZE { return Err(BatchPublishError::new( BatchPublishErrorKind::MaxMessagesExceeded, )); } + self.sequence += 1; self.add_header(&mut message); // Headers are guaranteed to exist after add_header let headers = message @@ -421,6 +454,37 @@ where self.commit_request(message).await } + /// Reject protocol headers the user must not set. Per ADR-50, + /// `Nats-Expected-Last-Sequence` is allowed only on the first message; + /// `prior_sequence` is the publisher's `self.sequence` *before* incrementing + /// for the message being validated (i.e. 0 for the first message). + fn reject_protocol_headers( + headers: Option<&async_nats::HeaderMap>, + prior_sequence: u64, + ) -> Result<(), BatchPublishError> { + let Some(headers) = headers else { + return Ok(()); + }; + const REJECTED: &[&str] = &[ + "Nats-Msg-Id", + "Nats-Expected-Last-Msg-Id", + "Nats-Batch-Commit", + "Nats-Batch-Id", + "Nats-Batch-Sequence", + ]; + if REJECTED.iter().any(|h| headers.get(*h).is_some()) { + return Err(BatchPublishError::new( + BatchPublishErrorKind::BatchPublishUnsupportedHeader, + )); + } + if prior_sequence >= 1 && headers.get("Nats-Expected-Last-Sequence").is_some() { + return Err(BatchPublishError::new( + BatchPublishErrorKind::BatchPublishUnsupportedHeader, + )); + } + Ok(()) + } + /// Discard the batch without committing. /// /// This consumes the batch without sending a commit message. The server will @@ -481,10 +545,10 @@ where } } - async fn commit_request( + async fn commit_request( &self, message: OutboundMessage, - ) -> Result { + ) -> Result { let request = Request { payload: Some(message.payload), headers: message.headers, @@ -497,7 +561,7 @@ where .await .map_err(|e| BatchPublishError::with_source(BatchPublishErrorKind::Request, e))?; - let resp: Response = serde_json::from_slice(response.payload.as_ref()) + let resp: Response = serde_json::from_slice(response.payload.as_ref()) .map_err(|e| BatchPublishError::with_source(BatchPublishErrorKind::Serialization, e))?; match resp { @@ -505,7 +569,15 @@ where let kind = BatchPublishErrorKind::from_api_error(&error); Err(BatchPublishError::with_source(kind, error)) } - Response::Ok(ack) => Ok(ack), + Response::Ok(ack) => { + if ack.stream.is_empty() + || ack.batch_id != self.batch_id + || ack.batch_size != self.sequence + { + return Err(BatchPublishError::new(BatchPublishErrorKind::InvalidAck)); + } + Ok(ack) + } } } } @@ -530,6 +602,10 @@ pub struct BatchPubAck { /// The number of messages in the committed batch. #[serde(rename = "count")] pub batch_size: u64, + /// The counter value, when the batch was published to a counter stream + /// (`AllowMsgCounter` enabled). + #[serde(default, rename = "val")] + pub value: Option, } /// Builder for bulk publishing multiple messages at once @@ -820,6 +896,7 @@ where ack_every: self.ack_every, ack_first: self.ack_first, timeout: self.timeout, + closed: false, }; // Buffer one message to identify the last @@ -843,8 +920,12 @@ where /// Error type for batch publish operations pub type BatchPublishError = async_nats::error::Error; -/// Kinds of errors that can occur during batch publish operations -#[derive(Debug, Clone, Copy, PartialEq)] +/// Kinds of errors that can occur during batch publish operations. +/// +/// Marked `#[non_exhaustive]` — adding a new variant in a future release will +/// not be a breaking change. Match on `_` for forward compatibility. +#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] +#[non_exhaustive] pub enum BatchPublishErrorKind { /// Failed to send request to the server Request, @@ -852,19 +933,32 @@ pub enum BatchPublishErrorKind { Publish, /// Failed to serialize or deserialize data Serialization, - /// Batch is in an invalid state for the operation - BatchFull, /// Exceeded maximum allowed messages in batch (server limit: 1000) MaxMessagesExceeded, /// Empty batch cannot be committed EmptyBatch, + /// Batch was closed by a prior error and can no longer be used + BatchClosed, + /// Server commit ack failed invariant checks (mismatched batch id, count, or empty stream) + InvalidAck, /// Batch publishing is not enabled on the stream (allow_atomic_publish must be true) BatchPublishNotEnabled, /// Batch publish is incomplete and was abandoned BatchPublishIncomplete, - /// Server has too many inflight batches (server limit: 50) + /// Server has too many inflight batches (50 per stream, 1000 server-wide) BatchPublishTooManyInflight, - /// Batch uses unsupported headers (Nats-Msg-Id or Nats-Expected-Last-Msg-Id) + /// Batch sequence header missing or malformed + BatchPublishMissingSeq, + /// Batch ID is invalid (e.g. exceeds 64 characters) + BatchPublishInvalidId, + /// Batch commit marker is invalid + BatchPublishInvalidCommit, + /// Two messages in the batch share the same `Nats-Msg-Id` + BatchPublishDuplicateMsgId, + /// Stream is a mirror; mirrors are incompatible with atomic publish + BatchPublishMirror, + /// Batch uses unsupported headers (e.g. `Nats-Msg-Id`, `Nats-Expected-Last-Msg-Id`, + /// or protocol headers set by the user) BatchPublishUnsupportedHeader, /// Other unspecified error Other, @@ -880,6 +974,13 @@ impl BatchPublishErrorKind { ErrorCode::ATOMIC_PUBLISH_TOO_MANY_INFLIGHT => Self::BatchPublishTooManyInflight, ErrorCode::ATOMIC_PUBLISH_UNSUPPORTED_HEADER => Self::BatchPublishUnsupportedHeader, ErrorCode::ATOMIC_PUBLISH_TOO_LARGE_BATCH => Self::MaxMessagesExceeded, + ErrorCode::ATOMIC_PUBLISH_MISSING_SEQ => Self::BatchPublishMissingSeq, + ErrorCode::ATOMIC_PUBLISH_INVALID_BATCH_ID => Self::BatchPublishInvalidId, + ErrorCode::ATOMIC_PUBLISH_INVALID_BATCH_COMMIT => Self::BatchPublishInvalidCommit, + ErrorCode::ATOMIC_PUBLISH_CONTAINS_DUPLICATE_MESSAGE => { + Self::BatchPublishDuplicateMsgId + } + ErrorCode::MIRROR_WITH_ATOMIC_PUBLISH => Self::BatchPublishMirror, _ => Self::Other, } } @@ -891,7 +992,6 @@ impl Display for BatchPublishErrorKind { Self::Request => write!(f, "request failed"), Self::Publish => write!(f, "publish failed"), Self::Serialization => write!(f, "serialization/deserialization error"), - Self::BatchFull => write!(f, "batch is full"), Self::MaxMessagesExceeded => write!(f, "batch exceeds server limit (1000 messages)"), Self::EmptyBatch => write!(f, "empty batch cannot be committed"), Self::BatchPublishNotEnabled => write!(f, "batch publishing not enabled on stream"), @@ -899,12 +999,33 @@ impl Display for BatchPublishErrorKind { write!(f, "batch publish is incomplete and was abandoned") } Self::BatchPublishTooManyInflight => { - write!(f, "server has too many inflight batches (limit: 50)") + write!( + f, + "server has too many inflight batches (50 per stream, 1000 server-wide)" + ) + } + Self::BatchPublishMissingSeq => { + write!(f, "batch sequence header missing or malformed") + } + Self::BatchPublishInvalidId => { + write!(f, "batch id is invalid (e.g. exceeds 64 characters)") } + Self::BatchPublishInvalidCommit => write!(f, "batch commit marker is invalid"), + Self::BatchPublishDuplicateMsgId => { + write!(f, "two messages in the batch share the same Nats-Msg-Id") + } + Self::BatchPublishMirror => write!( + f, + "stream is a mirror; mirrors are incompatible with atomic publish" + ), Self::BatchPublishUnsupportedHeader => write!( f, - "batch uses unsupported headers (Nats-Msg-Id or Nats-Expected-Last-Msg-Id)" + "batch contains an unsupported header (e.g. Nats-Msg-Id, Nats-Expected-Last-Msg-Id, or a protocol header set by the user)" ), + Self::BatchClosed => { + write!(f, "batch was closed by a prior error and cannot be reused") + } + Self::InvalidAck => write!(f, "server commit ack failed invariant checks"), Self::Other => write!(f, "other error"), } } diff --git a/jetstream-extra/tests/batch_publish_errors.rs b/jetstream-extra/tests/batch_publish_errors.rs index f45b560..9a71385 100644 --- a/jetstream-extra/tests/batch_publish_errors.rs +++ b/jetstream-extra/tests/batch_publish_errors.rs @@ -348,4 +348,140 @@ mod batch_publish_error_tests { let info = stream.info().await.unwrap(); assert_eq!(info.state.messages, 501); } + + #[tokio::test] + async fn test_is_closed_after_server_error() { + let server = nats_server::run_server("tests/configs/jetstream.conf"); + let client = async_nats::connect(server.client_url()).await.unwrap(); + let jetstream = async_nats::jetstream::new(client); + + // Stream with atomic publish DISABLED — first add will fail with NotEnabled. + let _ = setup_test_stream(&jetstream, "test_closed", false).await; + + let mut batch = jetstream.batch_publish().build(); + assert!(!batch.is_closed()); + + let err = batch.add("test_closed.1", "data".into()).await.unwrap_err(); + assert_eq!(err.kind(), BatchPublishErrorKind::BatchPublishNotEnabled); + assert!(batch.is_closed(), "batch must be closed after server error"); + + // Subsequent add must return BatchClosed, not retry. + let err = batch.add("test_closed.2", "data".into()).await.unwrap_err(); + assert_eq!(err.kind(), BatchPublishErrorKind::BatchClosed); + + // Commit must also return BatchClosed. + let err = batch + .commit("test_closed.3", "final".into()) + .await + .unwrap_err(); + assert_eq!(err.kind(), BatchPublishErrorKind::BatchClosed); + } + + #[tokio::test] + async fn test_validation_errors_do_not_close() { + let server = nats_server::run_server("tests/configs/jetstream.conf"); + let client = async_nats::connect(server.client_url()).await.unwrap(); + let jetstream = async_nats::jetstream::new(client); + + let _ = setup_test_stream(&jetstream, "test_no_close", true).await; + + let mut batch = jetstream.batch_publish().build(); + + // Bad header — validation error, must not close. + let mut bad_headers = async_nats::HeaderMap::new(); + bad_headers.insert("Nats-Msg-Id", "bad"); + let bad = OutboundMessage { + subject: "test_no_close.1".into(), + payload: "x".into(), + headers: Some(bad_headers), + }; + let err = batch.add_message(bad).await.unwrap_err(); + assert_eq!( + err.kind(), + BatchPublishErrorKind::BatchPublishUnsupportedHeader + ); + assert!(!batch.is_closed(), "validation error must not close batch"); + + // Recovery succeeds. + batch.add("test_no_close.1", "ok".into()).await.unwrap(); + let ack = batch + .commit("test_no_close.2", "done".into()) + .await + .unwrap(); + assert_eq!(ack.batch_size, 2); + } + + #[tokio::test] + async fn test_protocol_headers_rejected() { + let server = nats_server::run_server("tests/configs/jetstream.conf"); + let client = async_nats::connect(server.client_url()).await.unwrap(); + let jetstream = async_nats::jetstream::new(client); + + let _ = setup_test_stream(&jetstream, "test_proto_hdr", true).await; + + for hdr in ["Nats-Batch-Commit", "Nats-Batch-Id", "Nats-Batch-Sequence"] { + let mut batch = jetstream.batch_publish().build(); + let mut headers = async_nats::HeaderMap::new(); + headers.insert(hdr, "anything"); + let msg = OutboundMessage { + subject: "test_proto_hdr.1".into(), + payload: "x".into(), + headers: Some(headers), + }; + let err = batch.add_message(msg).await.unwrap_err(); + assert_eq!( + err.kind(), + BatchPublishErrorKind::BatchPublishUnsupportedHeader, + "header {hdr} must be rejected on add" + ); + } + } + + #[tokio::test] + async fn test_expected_last_sequence_only_on_first() { + let server = nats_server::run_server("tests/configs/jetstream.conf"); + let client = async_nats::connect(server.client_url()).await.unwrap(); + let jetstream = async_nats::jetstream::new(client); + + let _ = setup_test_stream(&jetstream, "test_els", true).await; + + let mut batch = jetstream.batch_publish().build(); + + // First message with Nats-Expected-Last-Sequence:0 is allowed (empty stream). + let first = jetstream::message::PublishMessage::build() + .expected_last_sequence(0) + .outbound_message("test_els.1"); + batch.add_message(first).await.unwrap(); + + // Second message with the same header must be rejected client-side. + let second = jetstream::message::PublishMessage::build() + .expected_last_sequence(0) + .outbound_message("test_els.2"); + let err = batch.add_message(second).await.unwrap_err(); + assert_eq!( + err.kind(), + BatchPublishErrorKind::BatchPublishUnsupportedHeader + ); + assert!(!batch.is_closed()); + + // Plain add still works after the validation error. + batch.add("test_els.2", "ok".into()).await.unwrap(); + let ack = batch.commit("test_els.3", "done".into()).await.unwrap(); + assert_eq!(ack.batch_size, 3); + } + + #[test] + fn test_batch_pub_ack_value_field_deserializes() { + use jetstream_extra::batch_publish::BatchPubAck; + + // Counter stream payload: server populates `val`. + let with_val = r#"{"stream":"S","seq":2,"batch":"abc","count":2,"val":"42"}"#; + let ack: BatchPubAck = serde_json::from_str(with_val).unwrap(); + assert_eq!(ack.value.as_deref(), Some("42")); + + // Non-counter stream payload: `val` absent → field is None, not an error. + let without_val = r#"{"stream":"S","seq":2,"batch":"abc","count":2}"#; + let ack: BatchPubAck = serde_json::from_str(without_val).unwrap(); + assert!(ack.value.is_none()); + } } From 2a4fe60f14813e5cb4439333d8ba71eb438982a9 Mon Sep 17 00:00:00 2001 From: Tomasz Pietrek Date: Fri, 8 May 2026 13:40:51 +0200 Subject: [PATCH 3/9] Add missing fast-ingest source, tests, and example Previous commit 1571c83 declared `pub mod batch_publish_fast;` in lib.rs but did not include the module file itself, breaking CI with E0583. Co-Authored-By: Claude Opus 4.7 (1M context) Signed-off-by: Tomasz Pietrek --- .../examples/batch_fetch_time_based.rs | 185 ++ .../batch_fetch_with_error_handling.rs | 123 ++ .../examples/batch_fetch_with_max_bytes.rs | 122 ++ jetstream-extra/examples/fast_publisher.rs | 102 + jetstream-extra/examples/test_batch_fetch.rs | 27 + jetstream-extra/src/batch_publish_fast.rs | 1703 +++++++++++++++++ jetstream-extra/tests/batch_publish_fast.rs | 508 +++++ 7 files changed, 2770 insertions(+) create mode 100644 jetstream-extra/examples/batch_fetch_time_based.rs create mode 100644 jetstream-extra/examples/batch_fetch_with_error_handling.rs create mode 100644 jetstream-extra/examples/batch_fetch_with_max_bytes.rs create mode 100644 jetstream-extra/examples/fast_publisher.rs create mode 100644 jetstream-extra/examples/test_batch_fetch.rs create mode 100644 jetstream-extra/src/batch_publish_fast.rs create mode 100644 jetstream-extra/tests/batch_publish_fast.rs diff --git a/jetstream-extra/examples/batch_fetch_time_based.rs b/jetstream-extra/examples/batch_fetch_time_based.rs new file mode 100644 index 0000000..067a8d1 --- /dev/null +++ b/jetstream-extra/examples/batch_fetch_time_based.rs @@ -0,0 +1,185 @@ +// Copyright 2025 Synadia Communications Inc. +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +use async_nats::jetstream::{self, stream}; +use futures::StreamExt; +use jetstream_extra::batch_fetch::BatchFetchExt; +use std::time::{Duration, SystemTime}; +use time::OffsetDateTime; + +#[tokio::main] +async fn main() -> Result<(), Box> { + // Connect to NATS server + let client = async_nats::connect("localhost:4222").await?; + let context = jetstream::new(client); + + // Create a stream + let stream_name = "TIME_BASED_STREAM"; + context + .create_stream(stream::Config { + name: stream_name.to_string(), + subjects: vec!["sensor.*".to_string()], + allow_direct: true, + ..Default::default() + }) + .await?; + + // Publish messages at different times + println!("Publishing messages with time gaps:"); + + // Publish first batch + let _start_time = SystemTime::now(); + for i in 0..5 { + context + .publish("sensor.temperature", format!("temp reading {}", i).into()) + .await? + .await?; + println!(" Published: temp reading {} at start", i); + } + + // Wait 2 seconds + println!("\n Waiting 2 seconds...\n"); + tokio::time::sleep(Duration::from_secs(2)).await; + let mid_time = OffsetDateTime::now_utc(); + + // Publish second batch + for i in 5..10 { + context + .publish("sensor.temperature", format!("temp reading {}", i).into()) + .await? + .await?; + println!(" Published: temp reading {} at +2s", i); + } + + // Wait another 2 seconds + println!("\n Waiting 2 seconds...\n"); + tokio::time::sleep(Duration::from_secs(2)).await; + let late_time = OffsetDateTime::now_utc(); + + // Publish third batch + for i in 10..15 { + context + .publish("sensor.temperature", format!("temp reading {}", i).into()) + .await? + .await?; + println!(" Published: temp reading {} at +4s", i); + } + + // Example 1: Fetch all messages + println!("\n=== Example 1: Fetch all messages ==="); + let mut messages = context.get_batch(stream_name, 20).send().await?; + + let mut all_count = 0; + while let Some(msg) = messages.next().await { + let _msg = msg?; + all_count += 1; + } + println!("Total messages in stream: {}", all_count); + + // Example 2: Fetch messages after mid_time (should get last 10) + println!("\n=== Example 2: Fetch messages after +2s mark ==="); + let mut messages = context + .get_batch(stream_name, 20) + .start_time(mid_time) + .send() + .await?; + + let mut mid_count = 0; + while let Some(msg) = messages.next().await { + let msg = msg?; + let payload = String::from_utf8(msg.payload.to_vec())?; + println!(" {}: {}", msg.sequence, payload); + mid_count += 1; + } + println!("Messages after +2s: {} (expected 10)", mid_count); + + // Example 3: Fetch messages after late_time (should get last 5) + println!("\n=== Example 3: Fetch messages after +4s mark ==="); + let mut messages = context + .get_batch(stream_name, 20) + .start_time(late_time) + .send() + .await?; + + let mut late_count = 0; + while let Some(msg) = messages.next().await { + let msg = msg?; + let payload = String::from_utf8(msg.payload.to_vec())?; + println!(" {}: {}", msg.sequence, payload); + late_count += 1; + } + println!("Messages after +4s: {} (expected 5)", late_count); + + // Example 4: Use sequence starting point instead of time + println!("\n=== Example 4: Starting from sequence 8 ==="); + let mut messages = context + .get_batch(stream_name, 20) + .sequence(8) + .send() + .await?; + + let mut combo_count = 0; + while let Some(msg) = messages.next().await { + let msg = msg?; + let payload = String::from_utf8(msg.payload.to_vec())?; + println!(" Seq {}: {}", msg.sequence, payload); + combo_count += 1; + } + println!("Messages from seq 8: {}", combo_count); + + // Example 5: Get last messages up to a specific time + println!("\n=== Example 5: Get last messages for subjects up to +2s mark ==="); + let mut messages = context + .get_last_messages_for(stream_name) + .subjects(vec!["sensor.temperature".to_string()]) + .up_to_time(mid_time) + .send() + .await?; + + while let Some(msg) = messages.next().await { + let msg = msg?; + let payload = String::from_utf8(msg.payload.to_vec())?; + println!( + " Last message up to +2s: {} (seq {})", + payload, msg.sequence + ); + } + + // Example 6: Demonstrate nanosecond precision preservation + println!("\n=== Example 6: Time precision check ==="); + let _precise_time = SystemTime::now(); + + // Publish a message with precise timestamp + context + .publish("sensor.precision", "precision test".into()) + .await? + .await?; + + // Fetch it back + let mut messages = context + .get_batch(stream_name, 1) + .subject("sensor.precision") + .send() + .await?; + + if let Some(msg) = messages.next().await { + let msg = msg?; + println!(" Message timestamp: {:?}", msg.time); + println!( + " Nanosecond precision preserved: {}", + msg.time.nanosecond() + ); + } + + Ok(()) +} diff --git a/jetstream-extra/examples/batch_fetch_with_error_handling.rs b/jetstream-extra/examples/batch_fetch_with_error_handling.rs new file mode 100644 index 0000000..7909817 --- /dev/null +++ b/jetstream-extra/examples/batch_fetch_with_error_handling.rs @@ -0,0 +1,123 @@ +// Copyright 2025 Synadia Communications Inc. +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +use async_nats::jetstream::{self, stream}; +use futures::StreamExt; +use jetstream_extra::batch_fetch::{BatchFetchErrorKind, BatchFetchExt}; + +#[tokio::main] +async fn main() -> Result<(), Box> { + // Connect to NATS server + let client = async_nats::connect("localhost:4222").await?; + let context = jetstream::new(client); + + // Create a stream with direct access enabled + let stream_name = "EXAMPLE_STREAM"; + context + .create_stream(stream::Config { + name: stream_name.to_string(), + subjects: vec!["events.*".to_string()], + allow_direct: true, // Required for batch fetch + ..Default::default() + }) + .await?; + + // Publish some test messages + for i in 0..10 { + context + .publish( + format!("events.type{}", i % 3), + format!("message {}", i).into(), + ) + .await? + .await?; + } + + // Example 1: Handle batch size limit errors + println!("Example 1: Batch size limit validation"); + match context.get_batch(stream_name, 200).send().await { + Ok(_) => println!("Unexpected success"), + Err(e) if e.kind() == BatchFetchErrorKind::BatchSizeTooLarge => { + println!("✓ Correctly rejected batch size > 1000: {}", e); + } + Err(e) => println!("Unexpected error: {}", e), + } + + // Example 2: Handle empty stream name + println!("\nExample 3: Empty stream name validation"); + match context.get_batch("", 10).send().await { + Ok(_) => println!("Unexpected success"), + Err(e) if e.kind() == BatchFetchErrorKind::InvalidStreamName => { + println!("✓ Correctly rejected empty stream name: {}", e); + } + Err(e) => println!("Unexpected error: {}", e), + } + + // Example 4: Successful batch fetch with error handling + println!("\nExample 4: Successful batch fetch with error handling"); + let mut messages = context + .get_batch(stream_name, 5) + .subject("events.type0") + .send() + .await?; + + let mut count = 0; + while let Some(result) = messages.next().await { + match result { + Ok(msg) => { + println!( + " Message seq {}: subject={}, payload_size={}", + msg.sequence, + msg.subject, + msg.payload.len() + ); + count += 1; + } + Err(e) => { + // Handle individual message errors + match e.kind() { + BatchFetchErrorKind::NoMessages => { + println!(" No more messages available"); + break; + } + BatchFetchErrorKind::UnsupportedByServer => { + println!(" Server doesn't support batch fetch"); + break; + } + _ => { + println!(" Error fetching message: {}", e); + } + } + } + } + } + println!(" Successfully fetched {} messages", count); + + // Example 5: Handle too many subjects in multi_last + println!("\nExample 5: Too many subjects validation"); + let many_subjects: Vec = (0..1025).map(|i| format!("events.{}", i)).collect(); + match context + .get_last_messages_for(stream_name) + .subjects(many_subjects) + .send() + .await + { + Ok(_) => println!("Unexpected success"), + Err(e) if e.kind() == BatchFetchErrorKind::TooManySubjects => { + println!("✓ Correctly rejected > 1024 subjects: {}", e); + } + Err(e) => println!("Unexpected error: {}", e), + } + + Ok(()) +} diff --git a/jetstream-extra/examples/batch_fetch_with_max_bytes.rs b/jetstream-extra/examples/batch_fetch_with_max_bytes.rs new file mode 100644 index 0000000..1a001d2 --- /dev/null +++ b/jetstream-extra/examples/batch_fetch_with_max_bytes.rs @@ -0,0 +1,122 @@ +// Copyright 2025 Synadia Communications Inc. +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +use async_nats::jetstream::{self, stream}; +use futures::StreamExt; +use jetstream_extra::batch_fetch::BatchFetchExt; + +#[tokio::main] +async fn main() -> Result<(), Box> { + // Connect to NATS server + let client = async_nats::connect("localhost:4222").await?; + let context = jetstream::new(client); + + // Create a stream + let stream_name = "BYTES_LIMIT_STREAM"; + context + .create_stream(stream::Config { + name: stream_name.to_string(), + subjects: vec!["data.*".to_string()], + allow_direct: true, + ..Default::default() + }) + .await?; + + // Publish messages of varying sizes + println!("Publishing messages of different sizes:"); + for i in 0..20 { + let size = (i + 1) * 100; // 100, 200, 300... bytes + let payload = "x".repeat(size); + context + .publish(format!("data.msg{}", i), payload.into()) + .await? + .await?; + println!(" Published message {} with {} bytes", i, size); + } + + // Example 1: Fetch messages with max_bytes limit + println!("\nFetching messages with max_bytes=1000:"); + let mut messages = context + .get_batch(stream_name, 20) + .max_bytes(1000) // But limit total bytes to 1000 + .send() + .await?; + + let mut total_bytes = 0; + let mut count = 0; + while let Some(msg) = messages.next().await { + let msg = msg?; + let payload_size = + base64::Engine::decode(&base64::engine::general_purpose::STANDARD, &msg.payload)?.len(); + total_bytes += payload_size; + count += 1; + println!( + " Message {}: {} bytes (total: {} bytes)", + msg.sequence, payload_size, total_bytes + ); + } + + println!( + "\nFetched {} messages totaling {} bytes (limit was 1000)", + count, total_bytes + ); + + // Example 2: Compare with unlimited fetch + println!("\nFetching same batch without byte limit:"); + let mut messages = context.get_batch(stream_name, 20).send().await?; + + let mut total_bytes_unlimited = 0; + let mut count_unlimited = 0; + while let Some(msg) = messages.next().await { + let msg = msg?; + let payload_size = + base64::Engine::decode(&base64::engine::general_purpose::STANDARD, &msg.payload)?.len(); + total_bytes_unlimited += payload_size; + count_unlimited += 1; + } + + println!( + "Without limit: {} messages, {} bytes", + count_unlimited, total_bytes_unlimited + ); + + // Example 3: Use max_bytes with subject filter + println!("\nCombining max_bytes with subject filter:"); + let mut messages = context + .get_batch(stream_name, 20) + .subject("data.msg1*") // Only msg10-19 + .max_bytes(2000) + .send() + .await?; + + let mut filtered_bytes = 0; + let mut filtered_count = 0; + while let Some(msg) = messages.next().await { + let msg = msg?; + let payload_size = + base64::Engine::decode(&base64::engine::general_purpose::STANDARD, &msg.payload)?.len(); + filtered_bytes += payload_size; + filtered_count += 1; + println!( + " {} (seq {}): {} bytes", + msg.subject, msg.sequence, payload_size + ); + } + + println!( + "\nFiltered fetch: {} messages, {} bytes", + filtered_count, filtered_bytes + ); + + Ok(()) +} diff --git a/jetstream-extra/examples/fast_publisher.rs b/jetstream-extra/examples/fast_publisher.rs new file mode 100644 index 0000000..3a74879 --- /dev/null +++ b/jetstream-extra/examples/fast_publisher.rs @@ -0,0 +1,102 @@ +// Copyright 2026 Synadia Communications Inc. +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +//! Example: high-throughput fast-ingest batch publishing. +//! +//! Requires nats-server 2.14 or later running on `nats://127.0.0.1:4222`. +//! +//! ```bash +//! # One-time setup — start nats-server with JetStream +//! nats-server -js +//! +//! # Run the example +//! cargo run -p jetstream-extra --example fast_publisher +//! ``` +//! +//! The example: +//! 1. Connects to NATS and creates a stream with `allow_batched: true` +//! (via a raw `$JS.API.STREAM.CREATE` request because +//! `async-nats 0.45.0`'s `StreamConfig` does not yet expose the field). +//! 2. Builds a `FastPublisher` with a low flow ceiling (to exercise the +//! stall gate + auto-ping on a small run). +//! 3. Publishes 500 messages, printing progress every 50. +//! 4. Closes the batch via end-of-batch commit (the EOB message itself is +//! not persisted; only the 500 real messages are). + +use std::time::Duration; + +use jetstream_extra::batch_publish_fast::{FastPublishExt, GapMode}; +use serde_json::json; + +#[tokio::main] +async fn main() -> Result<(), Box> { + let client = async_nats::connect("nats://127.0.0.1:4222").await?; + let js = async_nats::jetstream::new(client); + + // Create the stream via raw JSON (async-nats 0.45.0 doesn't know about + // allow_batched yet). + let _ = js.delete_stream("METRICS").await; + let body = json!({ + "name": "METRICS", + "subjects": ["metrics.>"], + "retention": "limits", + "storage": "file", + "allow_batched": true, + }); + let resp: serde_json::Value = js.request("STREAM.CREATE.METRICS", &body).await?; + if resp.get("error").is_some() { + return Err(format!("STREAM.CREATE failed: {resp}").into()); + } + println!("created stream METRICS with allow_batched=true"); + + // Build the publisher. Low flow + max=2 means acks arrive every 50 + // messages and up to 100 are in flight at once — exercises the stall + // gate and auto-ping path on a realistic load. + let mut batch = js + .fast_publish() + .flow(50) + .max_outstanding_acks(2) + .gap_mode(GapMode::Fail) + .ack_timeout(Duration::from_secs(10)) + .on_error(|err| eprintln!("fast publish event: {err}")) + .build()?; + + println!("publishing 500 messages ..."); + for i in 0..500 { + let ack = batch + .add("metrics.cpu", format!("sample {i}").into()) + .await?; + if i % 50 == 49 { + println!( + " published seq={} (server acked up to {})", + ack.batch_sequence, ack.ack_sequence + ); + } + } + + // End-of-batch commit: the commit message itself is NOT stored, so the + // final stream state has exactly 500 messages. + let pub_ack = batch.close().await?; + println!( + "batch committed: stream={}, batch_size={}, batch_id={}", + pub_ack.stream, pub_ack.batch_size, pub_ack.batch_id + ); + + // Verify what landed on the stream. + let stream = js.get_stream("METRICS").await?; + let info = stream.get_info().await?; + println!("stream state: {} messages", info.state.messages); + assert_eq!(info.state.messages, 500); + + Ok(()) +} diff --git a/jetstream-extra/examples/test_batch_fetch.rs b/jetstream-extra/examples/test_batch_fetch.rs new file mode 100644 index 0000000..2e215d4 --- /dev/null +++ b/jetstream-extra/examples/test_batch_fetch.rs @@ -0,0 +1,27 @@ +use async_nats::jetstream; +use futures::StreamExt; +use jetstream_extra::batch_fetch::BatchFetchExt; + +#[tokio::main] +async fn main() -> Result<(), Box> { + // Connect to NATS + let client = async_nats::connect("localhost:4222").await?; + let context = jetstream::new(client); + + // Try to get a batch (will fail because server doesn't support yet) + match context.get_batch("test_stream", 20).send().await { + Ok(mut stream) => { + while let Some(msg) = stream.next().await { + match msg { + Ok(m) => println!("Got message: seq={}", m.sequence), + Err(e) => println!("Stream error: {:?}", e), + } + } + } + Err(e) => { + println!("Expected error (server doesn't support batch get): {:?}", e); + } + } + + Ok(()) +} diff --git a/jetstream-extra/src/batch_publish_fast.rs b/jetstream-extra/src/batch_publish_fast.rs new file mode 100644 index 0000000..2e3d052 --- /dev/null +++ b/jetstream-extra/src/batch_publish_fast.rs @@ -0,0 +1,1703 @@ +// Copyright 2026 Synadia Communications Inc. +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +//! Fast-ingest batch publishing for NATS JetStream (ADR-50 fast ingest). +//! +//! Fast ingest is a high-throughput, non-atomic batch publisher. Unlike atomic +//! [`batch_publish`](crate::batch_publish), which stages an entire batch and +//! commits it or drops it, fast ingest persists each message as it arrives and +//! uses a persistent inbox subscription plus server-driven flow control to +//! coordinate throughput across many concurrent publishers. +//! +//! Requires nats-server 2.14 or later and `allow_batched: true` on the stream. +//! +//! # Architecture +//! +//! A [`FastPublisher`] owns its own [`async_nats::Subscriber`] and drives it +//! inline from `add` / `commit` / `close`. There is no background task, no +//! shared state, and no locks. This mirrors the single-task pattern used by +//! `nats-extra/src/request_many.rs`. +//! +//! # Not safe for concurrent use +//! +//! A `FastPublisher` holds per-batch state (sequence counters, cached reply +//! subject prefix, effective flow) and is driven via `&mut self`. Use one +//! publisher per task. Clone the underlying JetStream context if you need +//! independent publishers. + +use std::{ + fmt::{self, Debug, Display}, + task::{Context as TaskContext, Poll}, + time::Duration, +}; + +use async_nats::jetstream::message::OutboundMessage; +use async_nats::subject::ToSubject; +use bytes::Bytes; +use futures::StreamExt; +use futures::task::noop_waker_ref; +use serde::Deserialize; + +use crate::batch_publish::BatchPubAck; + +// --------------------------------------------------------------------------- +// Public enums +// --------------------------------------------------------------------------- + +/// How the server should handle gaps in the batch sequence. +/// +/// A gap means one or more messages in the batch were lost in transit between +/// the client and the stream leader (e.g. due to buffer drops under load). +#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)] +pub enum GapMode { + /// Allow gaps — the batch continues and the server informs the client via + /// a gap event. Use this when some message loss is acceptable (metrics, + /// telemetry). + Ok, + /// Fail the batch on the first gap. Use this when in-order, gap-free + /// delivery is required (object store chunks, ordered events). Default. + #[default] + Fail, +} + +impl GapMode { + pub(crate) fn as_str(self) -> &'static str { + match self { + Self::Ok => "ok", + Self::Fail => "fail", + } + } +} + +/// Fast-ingest operation codes (match the `$FI` reply-subject tail). +/// +/// Encoded as the second-to-last segment of the reply subject before `$FI`. +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +#[repr(u8)] +pub(crate) enum Operation { + Start = 0, + Append = 1, + Commit = 2, + CommitEob = 3, + Ping = 4, +} + +// --------------------------------------------------------------------------- +// Errors +// --------------------------------------------------------------------------- + +/// Error type for fast-ingest batch publish operations. +pub type FastPublishError = async_nats::error::Error; + +/// Kinds of errors that can occur during fast-ingest batch publishing. +/// +/// API error codes are verified against `nats-server` 2.14 `errors.json`. +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub enum FastPublishErrorKind { + /// 10205 — stream does not have `allow_batched: true`. + NotEnabled, + /// 10206 — reply subject pattern rejected by the server. + InvalidPattern, + /// 10207 — batch id exceeds 64 characters or is otherwise invalid. + InvalidBatchId, + /// 10208 — server has forgotten this batch (timed out, leader change in + /// `GapMode::Fail`, etc.). + UnknownBatchId, + /// 10211 — too many in-flight fast batches on the server. + TooManyInflight, + /// A gap was detected while running in [`GapMode::Fail`]. The final ack + /// will indicate which messages were persisted. + GapDetected, + /// Any other server-side error reported via a `BatchFlowErr` message. + FlowError, + /// `close()` was called on a publisher that has not received any `add`s. + EmptyBatch, + /// `build()` rejected the inbox because it does not have exactly two + /// tokens. The reply-subject parser requires `.` shape. + InvalidInboxShape, + /// Called a method on a publisher that has already committed, closed, or + /// failed fatally. + Closed, + /// Timed out waiting for a flow ack or final pub ack. + Timeout, + /// Failed to subscribe to the batch inbox. + Subscribe, + /// Failed to publish a batch message. + Publish, + /// Failed to parse a server response. + Serialization, + /// `ping()` or another operation was called in a state that does not + /// support it (e.g. before the first `add`). + InvalidState, + /// Catch-all. + Other, +} + +impl FastPublishErrorKind { + /// Map a JetStream API error code to the matching fast-ingest error kind. + /// + /// Codes verified against `nats-server/server/errors.json` and + /// `server/jetstream_errors_generated.go`. + pub(crate) fn from_api_error(error: &async_nats::jetstream::Error) -> Self { + match error.error_code().0 { + 10205 => Self::NotEnabled, + 10206 => Self::InvalidPattern, + 10207 => Self::InvalidBatchId, + 10208 => Self::UnknownBatchId, + 10211 => Self::TooManyInflight, + _ => Self::FlowError, + } + } +} + +impl Display for FastPublishErrorKind { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + match self { + Self::NotEnabled => write!(f, "fast batch publish not enabled on stream"), + Self::InvalidPattern => write!(f, "fast batch publish invalid reply subject pattern"), + Self::InvalidBatchId => write!( + f, + "fast batch publish id is invalid (exceeds 64 characters)" + ), + Self::UnknownBatchId => write!(f, "fast batch publish id is unknown to the server"), + Self::TooManyInflight => write!(f, "too many in-flight fast batches on the server"), + Self::GapDetected => write!(f, "gap detected in fast batch (gap_mode=fail)"), + Self::FlowError => write!(f, "fast batch flow error"), + Self::EmptyBatch => write!(f, "cannot close an empty batch"), + Self::InvalidInboxShape => { + write!(f, "inbox must have exactly two tokens (e.g. _INBOX.)") + } + Self::Closed => write!(f, "fast publisher is closed"), + Self::Timeout => write!(f, "timeout waiting for fast batch ack"), + Self::Subscribe => write!(f, "failed to subscribe to fast batch inbox"), + Self::Publish => write!(f, "failed to publish fast batch message"), + Self::Serialization => write!(f, "failed to (de)serialize fast batch message"), + Self::InvalidState => { + write!(f, "operation not allowed in current fast publisher state") + } + Self::Other => write!(f, "other fast batch publish error"), + } + } +} + +// --------------------------------------------------------------------------- +// Wire protocol structs (server → client) +// --------------------------------------------------------------------------- + +/// Flow-control message sent by the server when a batch of messages has been +/// persisted. Wire tag: `"type":"ack"`. +#[derive(Debug, Clone, Copy, Deserialize, PartialEq, Eq)] +pub(crate) struct BatchFlowAck { + /// Highest batch sequence covered by this ack. + /// + /// In `GapMode::Fail` this means all messages up to and including this + /// sequence were persisted. In `GapMode::Ok` some may have been lost. + #[serde(rename = "seq")] + pub sequence: u64, + /// How often the server will send subsequent flow acks (every N messages). + #[serde(rename = "msgs")] + pub messages: u16, +} + +/// Gap notification sent when the server detects missing messages in a batch. +/// Wire tag: `"type":"gap"`. +#[derive(Debug, Clone, Copy, Deserialize, PartialEq, Eq)] +pub(crate) struct BatchFlowGap { + /// The last sequence the server expected to receive before the gap. + /// + /// Messages with sequences `[expected_last_sequence+1, current_sequence)` + /// were lost. + #[serde(rename = "last_seq")] + pub expected_last_sequence: u64, + /// The sequence of the message that triggered gap detection. + #[serde(rename = "seq")] + pub current_sequence: u64, +} + +/// Per-message error sent when a batch message fails a server-side check +/// (e.g. expected-last-seq mismatch). Wire tag: `"type":"err"`. +#[derive(Debug, Clone, Deserialize)] +pub(crate) struct BatchFlowErr { + /// The batch sequence of the message that triggered the error. + #[serde(rename = "seq")] + pub sequence: u64, + /// The full API error as returned by the server. + pub error: async_nats::jetstream::Error, +} + +/// Result of classifying a message received on the batch inbox. +/// +/// The classifier dispatches on the `type` field (a serde-tagged enum) and +/// falls back to a full `Response` parse for messages without a +/// `type` discriminator (the terminal pub-ack or an init-time error). +#[derive(Debug)] +pub(crate) enum Classified { + FlowAck(BatchFlowAck), + FlowGap(BatchFlowGap), + FlowErr(BatchFlowErr), + /// Terminal publish acknowledgment — delivered on `commit`/`close` or on + /// the single-message immediate-commit fast path. + PubAck(BatchPubAck), + /// Init-time API error — returned in response to the `0` (Start) or + /// `2`/`3` (Commit/CommitEob) operation when the server rejects the batch + /// before it even begins. + InitError(async_nats::jetstream::Error), +} + +/// Tagged enum for `type`-discriminated messages. +#[derive(Debug, Deserialize)] +#[serde(tag = "type", rename_all = "lowercase")] +enum TaggedFlow { + Ack(BatchFlowAck), + Gap(BatchFlowGap), + Err(BatchFlowErr), +} + +/// Classify a payload received on the batch inbox into one of the five +/// possible shapes. +/// +/// Implementation strategy: try the tagged-enum deserializer first; if that +/// fails (no `type` field), fall back to parsing the payload as a terminal +/// `Response`. This matches the Go byte-search optimization in +/// spirit while remaining fully serde-driven. +pub(crate) fn classify(payload: &[u8]) -> Result { + if let Ok(tagged) = serde_json::from_slice::(payload) { + return Ok(match tagged { + TaggedFlow::Ack(a) => Classified::FlowAck(a), + TaggedFlow::Gap(g) => Classified::FlowGap(g), + TaggedFlow::Err(e) => Classified::FlowErr(e), + }); + } + + let resp: async_nats::jetstream::response::Response = + serde_json::from_slice(payload) + .map_err(|e| FastPublishError::with_source(FastPublishErrorKind::Serialization, e))?; + + Ok(match resp { + async_nats::jetstream::response::Response::Ok(pa) => Classified::PubAck(pa), + async_nats::jetstream::response::Response::Err { error } => Classified::InitError(error), + }) +} + +// --------------------------------------------------------------------------- +// Reply subject construction +// --------------------------------------------------------------------------- + +/// Build the stable prefix of the reply subject: +/// `...` +/// +/// The caller appends `..$FI` per message via [`build_reply`]. Cached +/// on the publisher and rebuilt only when the server dictates a new flow via +/// a [`BatchFlowAck`]. +pub(crate) fn build_reply_prefix(inbox: &str, flow: u16, gap: GapMode) -> String { + format!("{inbox}.{flow}.{}.", gap.as_str()) +} + +/// Build a full per-message reply subject: `..$FI`. +/// +/// `$FI` marks the subject as a fast-ingest reply to the server's parser +/// (`server/stream.go:getFastBatch`). +pub(crate) fn build_reply(prefix: &str, seq: u64, op: Operation) -> String { + format!("{prefix}{seq}.{}.$FI", op as u8) +} + +/// Validate that an inbox has the shape the fast-ingest reply subject parser +/// requires: exactly two tokens separated by a single dot (e.g. `_INBOX.`). +/// +/// The server (`server/stream.go:5174`) parses the reply subject from the +/// right, expecting `......$FI`. Our scheme +/// uses the inbox as the `.` portion — which only aligns if the +/// inbox itself is exactly two tokens. A custom multi-token inbox prefix +/// (e.g. `_INBOX.myapp.xyz`) would misalign the parser and cause cryptic +/// `InvalidPattern` errors. +pub(crate) fn validate_inbox_shape(inbox: &str) -> Result<(), FastPublishError> { + if inbox.matches('.').count() != 1 { + return Err(FastPublishError::new( + FastPublishErrorKind::InvalidInboxShape, + )); + } + if inbox.is_empty() { + return Err(FastPublishError::new( + FastPublishErrorKind::InvalidInboxShape, + )); + } + let (a, b) = inbox.split_once('.').unwrap(); + if a.is_empty() || b.is_empty() { + return Err(FastPublishError::new( + FastPublishErrorKind::InvalidInboxShape, + )); + } + Ok(()) +} + +// --------------------------------------------------------------------------- +// Stall formula +// --------------------------------------------------------------------------- + +/// Decide whether the publisher should stall before sending the next message. +/// +/// Matches the canonical ADR-50 / orbit.go form: wait iff +/// +/// ```text +/// last_ack_sequence + effective_flow * max_outstanding_acks <= next_sequence +/// ``` +/// +/// Equivalently, no wait iff `window > next_sequence`. This uses inclusive +/// comparison (`<=`) so that at the exact boundary +/// `(ack + flow * max) == next_seq` the publisher stalls. That matches the +/// Go reference's `< next_seq` formulation because Go uses `wait_for_ack = +/// ack + flow*max < next_seq` which is `true` at `next_seq > ack + flow*max`. +/// +/// Note on formulations: +/// - ADR-50 pseudocode: `waitForAck := lastAck.Sequence + lastAck.Messages*maxOutstandingAcks <= f.batchSeq` +/// - This Rust impl uses the ADR-50 inclusive `<=` form verbatim. +/// +/// `effective_flow` and `max_outstanding_acks` are widened to `u64` before +/// multiplication to avoid overflow on pathological inputs. +#[inline] +pub(crate) fn should_stall( + last_ack_sequence: u64, + effective_flow: u16, + max_outstanding_acks: u16, + next_sequence: u64, +) -> bool { + let window = last_ack_sequence + .saturating_add((effective_flow as u64).saturating_mul(max_outstanding_acks as u64)); + window <= next_sequence +} + +// --------------------------------------------------------------------------- +// Builder knobs +// --------------------------------------------------------------------------- + +/// Default initial flow (ack-every-N) requested from the server. +pub(crate) const DEFAULT_FLOW: u16 = 100; + +/// Default max outstanding-acks window size (per ADR-50: "generally optimal"). +pub(crate) const DEFAULT_MAX_OUTSTANDING_ACKS: u16 = 2; + +/// Minimum allowed value for `max_outstanding_acks`. +pub(crate) const MIN_MAX_OUTSTANDING_ACKS: u16 = 1; + +/// Maximum allowed value for `max_outstanding_acks` (ADR-50 recommends 1..=3). +pub(crate) const MAX_MAX_OUTSTANDING_ACKS: u16 = 3; + +// --------------------------------------------------------------------------- +// Extension trait +// --------------------------------------------------------------------------- + +/// Extension trait adding [`fast_publish`](FastPublishExt::fast_publish) to any +/// JetStream-context-like type. +/// +/// Implemented automatically for [`async_nats::jetstream::Context`] and any +/// other type that can provide an [`async_nats::Client`] and a default timeout. +/// +/// # Example +/// +/// ```no_run +/// # async fn example(js: async_nats::jetstream::Context) -> Result<(), Box> { +/// use jetstream_extra::batch_publish_fast::FastPublishExt; +/// +/// let mut batch = js.fast_publish().build()?; +/// # let _ = batch; +/// # Ok(()) +/// # } +/// ``` +pub trait FastPublishExt: + async_nats::jetstream::context::traits::ClientProvider + + async_nats::jetstream::context::traits::TimeoutProvider +{ + /// Start building a [`FastPublisher`] bound to the underlying connection + /// and default timeout of this context. + fn fast_publish(&self) -> FastPublisherBuilder { + FastPublisherBuilder::new(self.client(), self.timeout()) + } +} + +impl FastPublishExt for T where + T: async_nats::jetstream::context::traits::ClientProvider + + async_nats::jetstream::context::traits::TimeoutProvider +{ +} + +// --------------------------------------------------------------------------- +// Builder +// --------------------------------------------------------------------------- + +/// Callback type for asynchronous fast-publish errors (gaps, flow errors, +/// per-message server errors). Invoked synchronously on the publisher's task +/// whenever such an event is drained from the inbox. Keep the callback fast +/// and non-blocking. +pub type FastPublishErrorHandler = Box; + +/// Builder for a [`FastPublisher`]. +/// +/// Obtained via [`FastPublishExt::fast_publish`]. Call [`build`](Self::build) +/// to validate configuration and produce a ready-to-use publisher. +/// +/// All setters are optional; defaults match ADR-50 recommendations: +/// - `flow = 100` (ack every 100 messages ceiling) +/// - `max_outstanding_acks = 2` +/// - `gap_mode = Fail` +/// - `ack_timeout = ` +pub struct FastPublisherBuilder { + client: async_nats::Client, + flow: u16, + max_outstanding_acks: u16, + ack_timeout: Duration, + gap_mode: GapMode, + on_error: Option, +} + +impl FastPublisherBuilder { + /// Create a new builder with default settings. + /// + /// Prefer [`FastPublishExt::fast_publish`] for public use. + pub(crate) fn new(client: async_nats::Client, ack_timeout: Duration) -> Self { + Self { + client, + flow: DEFAULT_FLOW, + max_outstanding_acks: DEFAULT_MAX_OUTSTANDING_ACKS, + ack_timeout, + gap_mode: GapMode::default(), + on_error: None, + } + } + + /// Set the client-requested maximum flow — the upper bound on how often + /// the server will send flow acks. The server may choose a lower effective + /// flow. + /// + /// Must be at least 1. Values of 0 are clamped to 1. + pub fn flow(mut self, flow: u16) -> Self { + self.flow = flow.max(1); + self + } + + /// Set the number of flow-ack-batches that can be in flight before the + /// publisher stalls and waits for an ack. Valid range is `1..=3`. + /// + /// - `1` behaves like synchronous async publish throttled to flow N + /// - `2` is the ADR-50 recommended default (optimal for most cases) + /// - `3` is useful on higher-RTT links + /// + /// Values outside the range are returned as an error from [`build`](Self::build). + pub fn max_outstanding_acks(mut self, n: u16) -> Self { + self.max_outstanding_acks = n; + self + } + + /// Set the timeout for waiting on flow acks and the final commit ack. + /// + /// When the publisher is stalled waiting for an ack, it will auto-send + /// pings every `ack_timeout / 3` to recover from lost acks, giving up + /// after the full `ack_timeout` elapses. + pub fn ack_timeout(mut self, timeout: Duration) -> Self { + self.ack_timeout = timeout; + self + } + + /// Set the gap handling mode. Default: [`GapMode::Fail`]. + pub fn gap_mode(mut self, mode: GapMode) -> Self { + self.gap_mode = mode; + self + } + + /// Register a callback invoked for asynchronous events: gap detections, + /// per-message flow errors, and server-side fast-batch errors. + /// + /// The callback is called on the publisher's task synchronously from the + /// middle of `add` / `commit` / `close`, so it must be fast and + /// non-blocking. + pub fn on_error(mut self, handler: F) -> Self + where + F: FnMut(FastPublishError) + Send + 'static, + { + self.on_error = Some(Box::new(handler)); + self + } + + /// Validate configuration and produce a [`FastPublisher`]. + /// + /// The subscription to the batch inbox is NOT created here — it is lazily + /// opened on the first `add` / `commit` / `close`. This matches the Go + /// reference implementation and avoids wasted subscriptions when a + /// publisher is built and then dropped unused. + /// + /// # Errors + /// + /// - [`FastPublishErrorKind::InvalidState`] if `max_outstanding_acks` is + /// outside `1..=3`. + /// - [`FastPublishErrorKind::InvalidInboxShape`] if the client's + /// `new_inbox()` does not produce a two-token inbox (required by the + /// server's reply-subject parser). + pub fn build(self) -> Result { + if !(MIN_MAX_OUTSTANDING_ACKS..=MAX_MAX_OUTSTANDING_ACKS) + .contains(&self.max_outstanding_acks) + { + return Err(FastPublishError::new(FastPublishErrorKind::InvalidState)); + } + + let inbox = self.client.new_inbox(); + validate_inbox_shape(&inbox)?; + + let reply_prefix = build_reply_prefix(&inbox, self.flow, self.gap_mode); + + Ok(FastPublisher { + client: self.client, + inbox, + flow: self.flow, + gap_mode: self.gap_mode, + max_outstanding_acks: self.max_outstanding_acks, + ack_timeout: self.ack_timeout, + reply_prefix, + subscriber: None, + sequence: 0, + effective_flow: self.flow, + last_ack_sequence: 0, + initial_ack_received: false, + pending_pub_ack: None, + first_subject: None, + closed: false, + fatal: None, + on_error: self.on_error, + }) + } +} + +// --------------------------------------------------------------------------- +// FastPublisher +// --------------------------------------------------------------------------- + +/// A non-atomic, high-throughput JetStream batch publisher using the +/// fast-ingest protocol (ADR-50, nats-server 2.14+). +/// +/// Obtain via [`FastPublishExt::fast_publish`] → [`FastPublisherBuilder::build`]. +/// +/// A `FastPublisher` is `Send` but NOT `Sync`: methods require `&mut self` and +/// the publisher must be driven from a single task. Dropping the publisher +/// mid-batch is safe — the underlying [`async_nats::Subscriber`] drops with +/// it, the server-side interest is torn down, and the server will time out +/// the abandoned batch after 10 seconds. +pub struct FastPublisher { + // --- configuration (immutable after build) --- + client: async_nats::Client, + inbox: String, + flow: u16, // client-requested ceiling + gap_mode: GapMode, + max_outstanding_acks: u16, + ack_timeout: Duration, + + // --- cached reply subject prefix --- + // "..." — rebuilt when effective_flow changes. + reply_prefix: String, + + // --- lazily-created inbox subscription --- + subscriber: Option, + + // --- per-batch state --- + sequence: u64, + effective_flow: u16, // dictated by server; equals `flow` until first ack + last_ack_sequence: u64, + /// Set to true as soon as the server sends the initial `BatchFlowAck` in + /// response to the first `Start` message. This is tracked separately + /// from `last_ack_sequence` because the initial ack has `seq:0` (no + /// messages persisted yet — just confirming the batch is alive). + initial_ack_received: bool, + /// Terminal `PubAck` stashed if seen out of band (e.g. single-msg + /// immediate-commit fast path during `await_first_reply`). + pending_pub_ack: Option, + /// Subject of the first published message, used by [`close`](Self::close) + /// to construct the EOB commit message and by `ping` as the publish + /// target. + first_subject: Option, + closed: bool, + fatal: Option, + + // --- async error callback --- + on_error: Option, +} + +impl FastPublisher { + /// Returns the number of messages added to (and published in) this batch + /// so far, excluding any pending commit message. + pub fn size(&self) -> u64 { + self.sequence + } + + /// Returns `true` if the batch has been committed, closed, or failed + /// fatally. + pub fn is_closed(&self) -> bool { + self.closed + } + + /// Returns the batch's inbox, which is also the batch identifier as seen + /// by the server. + pub fn batch_id(&self) -> &str { + &self.inbox + } + + /// Returns the currently-configured gap mode. + pub fn gap_mode(&self) -> GapMode { + self.gap_mode + } + + /// Returns the highest batch sequence acknowledged by the server so far. + /// `0` before the first flow ack arrives. + pub fn last_ack_sequence(&self) -> u64 { + self.last_ack_sequence + } +} + +impl Debug for FastPublisher { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + f.debug_struct("FastPublisher") + .field("inbox", &self.inbox) + .field("flow", &self.flow) + .field("effective_flow", &self.effective_flow) + .field("gap_mode", &self.gap_mode) + .field("max_outstanding_acks", &self.max_outstanding_acks) + .field("sequence", &self.sequence) + .field("last_ack_sequence", &self.last_ack_sequence) + .field("closed", &self.closed) + .field("fatal", &self.fatal) + .finish() + } +} + +// Compile-time check: `FastPublisher` must be `Send` so users can spawn it +// onto tokio tasks. It is intentionally NOT `Sync`: per-batch state is driven +// through `&mut self` and the publisher is single-consumer. +const _: fn() = || { + fn assert_send() {} + assert_send::(); +}; + +// --------------------------------------------------------------------------- +// Public return type +// --------------------------------------------------------------------------- + +/// Result of a successful [`FastPublisher::add`] / [`FastPublisher::add_message`] +/// call. +/// +/// The `ack_sequence` is the highest batch sequence acknowledged by the +/// server so far. In [`GapMode::Fail`] this means all messages up to and +/// including `ack_sequence` were persisted. In [`GapMode::Ok`] there may +/// have been gaps; `ack_sequence` is only a hint about how far the server +/// has progressed. +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub struct FastPubAck { + /// Batch sequence of the message that was just added. + pub batch_sequence: u64, + /// Highest batch sequence acknowledged by the server so far. + pub ack_sequence: u64, +} + +// --------------------------------------------------------------------------- +// State machine: add / commit / close +// --------------------------------------------------------------------------- + +/// Convenience discriminator for the `commit_message_inner` call site. +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +enum CommitKind { + /// Commit with a final stored message (operation code 2). + Final, + /// Commit via end-of-batch — do not store the final message (operation + /// code 3). Used by [`FastPublisher::close`]. + Eob, +} + +impl FastPublisher { + /// Add a message to the batch with the given subject and payload. + /// + /// Convenience wrapper around [`add_message`](Self::add_message) for + /// callers that don't need custom headers. + pub async fn add( + &mut self, + subject: S, + payload: Bytes, + ) -> Result { + self.add_message(OutboundMessage { + subject: subject.to_subject(), + payload, + headers: None, + }) + .await + } + + /// Add a pre-constructed message to the batch. + /// + /// The message's `subject`, `payload`, and `headers` fields are forwarded + /// to the server; the reply subject is always set by the publisher. + /// + /// On the first call, a subscription to the batch inbox is created and + /// the publisher waits for the initial flow ack from the server to + /// confirm the batch has been accepted. + /// + /// # Errors + /// + /// - [`FastPublishErrorKind::Closed`] if the publisher has already been + /// committed, closed, or failed fatally. + /// - [`FastPublishErrorKind::Subscribe`] if the initial subscription + /// fails. + /// - [`FastPublishErrorKind::Publish`] if publishing the message fails. + /// - [`FastPublishErrorKind::Timeout`] if the initial ack does not arrive + /// within `ack_timeout`. + /// - Any mapped API error from the server's init response + /// (`NotEnabled`, `InvalidPattern`, `InvalidBatchId`, `UnknownBatchId`, + /// `TooManyInflight`). + pub async fn add_message( + &mut self, + msg: OutboundMessage, + ) -> Result { + if self.closed { + return Err(FastPublishError::new(FastPublishErrorKind::Closed)); + } + if let Some(kind) = self.fatal { + return Err(FastPublishError::new(kind)); + } + + // Lazy bootstrap on the very first publish. + self.ensure_subscribed().await?; + + // Drain any events that arrived while the caller was computing the + // next payload. Cost in the common case: one `Pending` poll. + self.drain_nonblocking()?; + if let Some(kind) = self.fatal { + return Err(FastPublishError::new(kind)); + } + + // Stall gate: if the outstanding-ack window is saturated, wait for a + // flow ack before emitting the next message. Check BEFORE incrementing + // sequence so the next-message sequence is what we gate on. + let next_sequence = self.sequence + 1; + if should_stall( + self.last_ack_sequence, + self.effective_flow, + self.max_outstanding_acks, + next_sequence, + ) { + self.wait_for_flow_event_with_pings().await?; + if let Some(kind) = self.fatal { + return Err(FastPublishError::new(kind)); + } + } + + self.sequence += 1; + let op = if self.sequence == 1 { + Operation::Start + } else { + Operation::Append + }; + + if self.first_subject.is_none() { + self.first_subject = Some(msg.subject.clone()); + } + + let reply = build_reply(&self.reply_prefix, self.sequence, op); + self.publish_raw(msg, reply).await?; + + if self.sequence == 1 { + self.await_first_reply().await?; + } + + // Drain any acks that landed while we were awaiting the first reply + // or just now publishing. + self.drain_nonblocking()?; + if let Some(kind) = self.fatal { + return Err(FastPublishError::new(kind)); + } + + Ok(FastPubAck { + batch_sequence: self.sequence, + ack_sequence: self.last_ack_sequence, + }) + } + + /// Commit the batch by publishing a final stored message. + /// + /// After this returns, the publisher is closed and no further messages + /// can be added. The returned [`BatchPubAck`] includes the batch id (the + /// publisher's inbox) and the total number of messages in the batch. + pub async fn commit( + self, + subject: S, + payload: Bytes, + ) -> Result { + self.commit_message(OutboundMessage { + subject: subject.to_subject(), + payload, + headers: None, + }) + .await + } + + /// Commit the batch with a pre-constructed final message. + pub async fn commit_message( + mut self, + msg: OutboundMessage, + ) -> Result { + self.commit_message_inner(msg, CommitKind::Final).await + } + + /// End the batch without storing a final message (end-of-batch commit). + /// + /// Uses the first message's subject as the publish target; the server + /// does not persist the commit message itself. Returns the same + /// [`BatchPubAck`] shape as [`commit`](Self::commit). + /// + /// Returns [`FastPublishErrorKind::EmptyBatch`] if no messages have been + /// added yet. + pub async fn close(mut self) -> Result { + if self.closed { + return Err(FastPublishError::new(FastPublishErrorKind::Closed)); + } + if self.sequence == 0 { + return Err(FastPublishError::new(FastPublishErrorKind::EmptyBatch)); + } + let subject = self + .first_subject + .clone() + .expect("first_subject set once sequence > 0"); + let msg = OutboundMessage { + subject, + payload: Bytes::new(), + headers: None, + }; + self.commit_message_inner(msg, CommitKind::Eob).await + } + + // ---- internal helpers -------------------------------------------------- + + async fn commit_message_inner( + &mut self, + msg: OutboundMessage, + kind: CommitKind, + ) -> Result { + if self.closed { + return Err(FastPublishError::new(FastPublishErrorKind::Closed)); + } + + // Lazy bootstrap for first-op-is-commit (single-message immediate + // commit) — the commit path uses the same subscribe-on-first-publish + // as add_message. + self.ensure_subscribed().await?; + + // Drain any pending events first. + self.drain_nonblocking()?; + + // If a fatal flow error was observed (e.g. FlowErr or GapDetected in + // Fail mode), the batch is already terminal server-side. Don't publish + // the commit, but DO drain the inbox to pick up the terminal PubAck + // the server will send — it tells the user which messages were + // persisted. If the PubAck never arrives, we time out. + if let Some(fatal_kind) = self.fatal { + self.closed = true; + // Try to get the PubAck for diagnostics; ignore drain errors. + let _pub_ack = self.drain_until_pub_ack().await.ok(); + return Err(FastPublishError::new(fatal_kind)); + } + + // Stall gate: also apply to commits. A commit is just another + // message from the server's perspective and counts against the + // outstanding-ack window. + let next_sequence = self.sequence + 1; + if should_stall( + self.last_ack_sequence, + self.effective_flow, + self.max_outstanding_acks, + next_sequence, + ) { + self.wait_for_flow_event_with_pings().await?; + if let Some(fatal_kind) = self.fatal { + self.closed = true; + let _pub_ack = self.drain_until_pub_ack().await.ok(); + return Err(FastPublishError::new(fatal_kind)); + } + } + + self.sequence += 1; + let op = match kind { + CommitKind::Final => Operation::Commit, + CommitKind::Eob => Operation::CommitEob, + }; + + if self.first_subject.is_none() { + self.first_subject = Some(msg.subject.clone()); + } + + let reply = build_reply(&self.reply_prefix, self.sequence, op); + self.publish_raw(msg, reply).await?; + self.closed = true; + + self.drain_until_pub_ack().await + } + + /// Create the inbox subscription if it does not exist yet. + async fn ensure_subscribed(&mut self) -> Result<(), FastPublishError> { + if self.subscriber.is_some() { + return Ok(()); + } + let wildcard = format!("{}.>", self.inbox); + let sub = self + .client + .subscribe(wildcard) + .await + .map_err(|e| FastPublishError::with_source(FastPublishErrorKind::Subscribe, e))?; + self.subscriber = Some(sub); + Ok(()) + } + + /// Publish a message with a reply subject, fire-and-forget. + /// + /// Dispatches to `publish_with_reply_and_headers` when headers are + /// present, otherwise the simpler `publish_with_reply`. + /// + /// Takes `&mut self` to avoid `&FastPublisher` crossing `.await`, which + /// would require `FastPublisher: Sync` (not satisfied due to the boxed + /// `FnMut` error handler field). + async fn publish_raw( + &mut self, + msg: OutboundMessage, + reply: String, + ) -> Result<(), FastPublishError> { + let OutboundMessage { + subject, + payload, + headers, + } = msg; + let res = match headers { + Some(h) => { + self.client + .publish_with_reply_and_headers(subject, reply, h, payload) + .await + } + None => { + self.client + .publish_with_reply(subject, reply, payload) + .await + } + }; + res.map_err(|e| FastPublishError::with_source(FastPublishErrorKind::Publish, e)) + } + + /// Non-blocking drain: consume all messages currently buffered on the + /// subscription and apply them to the publisher state. + /// + /// The common case (no pending events) costs one `poll_next` returning + /// `Pending`, which is a few nanoseconds. + fn drain_nonblocking(&mut self) -> Result<(), FastPublishError> { + let Some(sub) = self.subscriber.as_mut() else { + return Ok(()); + }; + let waker = noop_waker_ref(); + let mut cx = TaskContext::from_waker(waker); + loop { + match sub.poll_next_unpin(&mut cx) { + Poll::Ready(Some(msg)) => { + let shared = SharedHandlerState { + gap_mode: self.gap_mode, + effective_flow: &mut self.effective_flow, + last_ack_sequence: &mut self.last_ack_sequence, + initial_ack_received: &mut self.initial_ack_received, + pending_pub_ack: &mut self.pending_pub_ack, + fatal: &mut self.fatal, + on_error: self.on_error.as_mut(), + }; + handle_inbox_message(shared, msg)?; + } + Poll::Ready(None) => { + // Subscription ended unexpectedly. + self.closed = true; + return Err(FastPublishError::new(FastPublishErrorKind::Closed)); + } + Poll::Pending => return Ok(()), + } + } + } + + /// After the first `Start` message is sent, wait for the server to + /// either (a) send a `BatchFlowAck` confirming the batch, (b) return an + /// init-time API error, or (c) send a terminal `PubAck` directly on the + /// single-message immediate-commit fast path (stashed for later pickup). + async fn await_first_reply(&mut self) -> Result<(), FastPublishError> { + let deadline = tokio::time::Instant::now() + self.ack_timeout; + loop { + let now = tokio::time::Instant::now(); + if now >= deadline { + return Err(FastPublishError::new(FastPublishErrorKind::Timeout)); + } + let remaining = deadline - now; + let sub = self + .subscriber + .as_mut() + .expect("subscriber installed by ensure_subscribed"); + let msg = match tokio::time::timeout(remaining, sub.next()).await { + Ok(Some(m)) => m, + Ok(None) => { + self.closed = true; + return Err(FastPublishError::new(FastPublishErrorKind::Closed)); + } + Err(_) => return Err(FastPublishError::new(FastPublishErrorKind::Timeout)), + }; + let shared = SharedHandlerState { + gap_mode: self.gap_mode, + effective_flow: &mut self.effective_flow, + last_ack_sequence: &mut self.last_ack_sequence, + initial_ack_received: &mut self.initial_ack_received, + pending_pub_ack: &mut self.pending_pub_ack, + fatal: &mut self.fatal, + on_error: self.on_error.as_mut(), + }; + handle_inbox_message(shared, msg)?; + + if let Some(kind) = self.fatal { + return Err(FastPublishError::new(kind)); + } + if self.pending_pub_ack.is_some() { + // Single-message immediate-commit fast path — the commit + // path (drain_until_pub_ack) will pick this up. + return Ok(()); + } + if self.initial_ack_received { + // First BatchFlowAck arrived (even with seq:0) — batch is + // confirmed and we can continue publishing. + return Ok(()); + } + // Otherwise: gap/err/etc. — keep waiting for the terminal signal. + } + } + + /// Drain the subscription until a terminal `PubAck` is received (or a + /// fatal error surfaces). + async fn drain_until_pub_ack(&mut self) -> Result { + // Already stashed by a prior drain or first-reply? + if let Some(pa) = self.pending_pub_ack.take() { + self.unsubscribe_best_effort().await; + return Ok(pa); + } + + let deadline = tokio::time::Instant::now() + self.ack_timeout; + loop { + let now = tokio::time::Instant::now(); + if now >= deadline { + return Err(FastPublishError::new(FastPublishErrorKind::Timeout)); + } + let remaining = deadline - now; + let sub = self + .subscriber + .as_mut() + .expect("subscriber installed by ensure_subscribed"); + let msg = match tokio::time::timeout(remaining, sub.next()).await { + Ok(Some(m)) => m, + Ok(None) => { + self.closed = true; + return Err(FastPublishError::new(FastPublishErrorKind::Closed)); + } + Err(_) => return Err(FastPublishError::new(FastPublishErrorKind::Timeout)), + }; + + let shared = SharedHandlerState { + gap_mode: self.gap_mode, + effective_flow: &mut self.effective_flow, + last_ack_sequence: &mut self.last_ack_sequence, + initial_ack_received: &mut self.initial_ack_received, + pending_pub_ack: &mut self.pending_pub_ack, + fatal: &mut self.fatal, + on_error: self.on_error.as_mut(), + }; + handle_inbox_message(shared, msg)?; + + if let Some(pa) = self.pending_pub_ack.take() { + self.unsubscribe_best_effort().await; + return Ok(pa); + } + if let Some(kind) = self.fatal.take() { + self.unsubscribe_best_effort().await; + return Err(FastPublishError::new(kind)); + } + } + } + + async fn unsubscribe_best_effort(&mut self) { + if let Some(mut sub) = self.subscriber.take() { + let _ = sub.unsubscribe().await; + } + } + + /// Block until the outstanding-ack window has room for another publish, + /// sending periodic pings to the server to recover from lost acks. + /// + /// Matches Go orbit.go PR #32 `waitForStall`: split the ack_timeout into + /// three intervals so up to two pings fit before the deadline. + async fn wait_for_flow_event_with_pings(&mut self) -> Result<(), FastPublishError> { + // Split timeout into 3 intervals for up to 2 pings before giving up. + // Clamp floor to 100ms so tiny ack_timeouts don't spin. + let ping_interval = (self.ack_timeout / 3).max(Duration::from_millis(100)); + let deadline = tokio::time::Instant::now() + self.ack_timeout; + let mut ping_at = tokio::time::Instant::now() + ping_interval; + + loop { + if let Some(kind) = self.fatal { + return Err(FastPublishError::new(kind)); + } + + let now = tokio::time::Instant::now(); + if now >= deadline { + return Err(FastPublishError::new(FastPublishErrorKind::Timeout)); + } + + // Compute the next wake time: either the ping instant or the + // overall deadline, whichever comes first. + let next_wake = ping_at.min(deadline); + let wait = next_wake.saturating_duration_since(now); + + let sub = self + .subscriber + .as_mut() + .expect("subscriber installed before stall"); + + match tokio::time::timeout(wait, sub.next()).await { + Ok(Some(msg)) => { + let shared = SharedHandlerState { + gap_mode: self.gap_mode, + effective_flow: &mut self.effective_flow, + last_ack_sequence: &mut self.last_ack_sequence, + initial_ack_received: &mut self.initial_ack_received, + pending_pub_ack: &mut self.pending_pub_ack, + fatal: &mut self.fatal, + on_error: self.on_error.as_mut(), + }; + handle_inbox_message(shared, msg)?; + + let next_sequence = self.sequence + 1; + if !should_stall( + self.last_ack_sequence, + self.effective_flow, + self.max_outstanding_acks, + next_sequence, + ) { + // Window has room; we can publish again. + return Ok(()); + } + // Still stalled: keep waiting for the next event. + } + Ok(None) => { + self.closed = true; + return Err(FastPublishError::new(FastPublishErrorKind::Closed)); + } + Err(_) => { + // Timed out on this interval. Either send a ping (if the + // ping interval expired) or give up (if the overall + // deadline expired). + let now = tokio::time::Instant::now(); + if now >= deadline { + return Err(FastPublishError::new(FastPublishErrorKind::Timeout)); + } + // Must have hit the ping interval — send a ping and + // schedule the next one. + self.send_ping().await?; + ping_at = tokio::time::Instant::now() + ping_interval; + } + } + } + } + + /// Send a ping message (op=4) to recover from a possibly-lost ack. + /// + /// The ping does NOT increment the batch sequence. It is published to the + /// first message's subject (required so the server routes it to the same + /// stream), and triggers the server to resend the latest flow ack. + /// + /// Takes `&mut self` — not because the call mutates state, but because + /// holding `&self` across `.await` would require `FastPublisher: Sync`, + /// which the boxed `FnMut` error handler field prevents. Callers already + /// hold `&mut self` so this is cheap. + async fn send_ping(&mut self) -> Result<(), FastPublishError> { + let Some(subject) = self.first_subject.clone() else { + // Cannot ping before the first add — this should never happen + // because the stall gate only fires after at least one publish. + return Err(FastPublishError::new(FastPublishErrorKind::InvalidState)); + }; + let reply = build_reply(&self.reply_prefix, self.sequence, Operation::Ping); + self.client + .publish_with_reply(subject, reply, Bytes::new()) + .await + .map_err(|e| FastPublishError::with_source(FastPublishErrorKind::Publish, e))?; + Ok(()) + } +} + +// --------------------------------------------------------------------------- +// Handler for inbox messages +// --------------------------------------------------------------------------- + +/// Mutable reference bundle passed to [`handle_inbox_message`] so the +/// function can update all the relevant fields without conflicting borrows +/// on `&mut FastPublisher` (which would prevent us from also holding a +/// mutable borrow on `self.subscriber` while processing drained messages). +struct SharedHandlerState<'a> { + gap_mode: GapMode, + effective_flow: &'a mut u16, + last_ack_sequence: &'a mut u64, + initial_ack_received: &'a mut bool, + pending_pub_ack: &'a mut Option, + fatal: &'a mut Option, + on_error: Option<&'a mut FastPublishErrorHandler>, +} + +fn handle_inbox_message( + mut state: SharedHandlerState<'_>, + msg: async_nats::Message, +) -> Result<(), FastPublishError> { + match classify(&msg.payload)? { + Classified::FlowAck(ack) => { + // The initial BatchFlowAck for a fresh batch has `seq:0` (no + // messages persisted yet — it just confirms the batch was + // accepted). We must track it separately from `last_ack_sequence` + // because `seq:0` is the same as the default `last_ack_sequence`. + *state.initial_ack_received = true; + // Lost-ack handling: any newer seq implicitly acks all below. + if ack.sequence > *state.last_ack_sequence { + *state.last_ack_sequence = ack.sequence; + } + // Server-dictated flow override. Update the effective flow used by + // the stall gate. The reply prefix always uses the CLIENT's initial + // flow ceiling (not the server-dictated value) because ADR-50 says + // "We add the initial flow [...] for replica followers who might + // have missed the first message due to limits." The server's + // getFastBatch parser reads the flow from the reply subject to set + // maxAckMessages on followers — using the server-dictated (lower) + // value would cap follower ramp-up incorrectly in clustered setups. + let new_flow = ack.messages.max(1); + if new_flow != *state.effective_flow { + *state.effective_flow = new_flow; + // NOTE: reply_prefix is NOT rebuilt here — it always contains + // the initial flow ceiling. Only effective_flow (used by the + // stall gate) is updated. + } + } + Classified::FlowGap(gap) => { + tracing::debug!( + expected_last = gap.expected_last_sequence, + current = gap.current_sequence, + "fast batch gap detected" + ); + if let Some(h) = state.on_error.as_deref_mut() { + h(FastPublishError::new(FastPublishErrorKind::GapDetected)); + } + // In GapMode::Fail, stop publishing immediately. The server has + // already abandoned the batch; further publishes would be silently + // dropped. The terminal PubAck (containing persisted-message + // count) will arrive on the next drain/commit. + if state.gap_mode == GapMode::Fail { + *state.fatal = Some(FastPublishErrorKind::GapDetected); + } + } + Classified::FlowErr(ferr) => { + tracing::debug!( + batch_sequence = ferr.sequence, + err_code = ferr.error.error_code().0, + "fast batch flow error" + ); + let kind = FastPublishErrorKind::from_api_error(&ferr.error); + if let Some(h) = state.on_error.as_deref_mut() { + h(FastPublishError::new(kind)); + } + if state.gap_mode == GapMode::Fail { + *state.fatal = Some(kind); + } + } + Classified::PubAck(pa) => { + *state.pending_pub_ack = Some(pa); + } + Classified::InitError(err) => { + *state.fatal = Some(FastPublishErrorKind::from_api_error(&err)); + } + } + Ok(()) +} + +// --------------------------------------------------------------------------- +// Tests +// --------------------------------------------------------------------------- + +#[cfg(test)] +mod tests { + use super::*; + + // -- reply subject format ------------------------------------------------ + + #[test] + fn reply_prefix_format_ok_mode() { + let p = build_reply_prefix("_INBOX.abc123", 100, GapMode::Ok); + assert_eq!(p, "_INBOX.abc123.100.ok."); + } + + #[test] + fn reply_prefix_format_fail_mode() { + let p = build_reply_prefix("_INBOX.abc123", 50, GapMode::Fail); + assert_eq!(p, "_INBOX.abc123.50.fail."); + } + + #[test] + fn reply_full_all_operations() { + let prefix = build_reply_prefix("_INBOX.x", 10, GapMode::Fail); + for (op, code) in [ + (Operation::Start, 0_u8), + (Operation::Append, 1), + (Operation::Commit, 2), + (Operation::CommitEob, 3), + (Operation::Ping, 4), + ] { + let r = build_reply(&prefix, 42, op); + assert_eq!(r, format!("_INBOX.x.10.fail.42.{code}.$FI")); + } + } + + #[test] + fn reply_full_both_gap_modes() { + for (mode, tag) in [(GapMode::Ok, "ok"), (GapMode::Fail, "fail")] { + let prefix = build_reply_prefix("_INBOX.abc", 25, mode); + let r = build_reply(&prefix, 1, Operation::Start); + assert_eq!(r, format!("_INBOX.abc.25.{tag}.1.0.$FI")); + } + } + + // -- inbox shape validation ---------------------------------------------- + + #[test] + fn inbox_shape_accepts_two_tokens() { + assert!(validate_inbox_shape("_INBOX.abc123").is_ok()); + assert!(validate_inbox_shape("X.Y").is_ok()); + } + + #[test] + fn inbox_shape_rejects_zero_dots() { + assert!(matches!( + validate_inbox_shape("INBOX").unwrap_err().kind(), + FastPublishErrorKind::InvalidInboxShape + )); + } + + #[test] + fn inbox_shape_rejects_three_or_more_tokens() { + assert!(matches!( + validate_inbox_shape("_INBOX.myapp.abc123") + .unwrap_err() + .kind(), + FastPublishErrorKind::InvalidInboxShape + )); + assert!(matches!( + validate_inbox_shape("a.b.c.d").unwrap_err().kind(), + FastPublishErrorKind::InvalidInboxShape + )); + } + + #[test] + fn inbox_shape_rejects_empty_tokens() { + assert!(validate_inbox_shape(".abc").is_err()); + assert!(validate_inbox_shape("abc.").is_err()); + assert!(validate_inbox_shape("").is_err()); + } + + // -- JSON parsing -------------------------------------------------------- + + #[test] + fn parse_batch_flow_ack() { + let payload = br#"{"type":"ack","seq":10,"msgs":15}"#; + match classify(payload).unwrap() { + Classified::FlowAck(a) => { + assert_eq!(a.sequence, 10); + assert_eq!(a.messages, 15); + } + other => panic!("expected FlowAck, got {other:?}"), + } + } + + #[test] + fn parse_batch_flow_gap() { + let payload = br#"{"type":"gap","last_seq":10,"seq":15}"#; + match classify(payload).unwrap() { + Classified::FlowGap(g) => { + assert_eq!(g.expected_last_sequence, 10); + assert_eq!(g.current_sequence, 15); + } + other => panic!("expected FlowGap, got {other:?}"), + } + } + + #[test] + fn parse_batch_flow_err() { + let payload = br#"{"type":"err","seq":7,"error":{"code":400,"err_code":10071,"description":"wrong last sequence: 1"}}"#; + match classify(payload).unwrap() { + Classified::FlowErr(e) => { + assert_eq!(e.sequence, 7); + assert_eq!(e.error.error_code().0, 10071); + } + other => panic!("expected FlowErr, got {other:?}"), + } + } + + #[test] + fn parse_terminal_pub_ack() { + let payload = br#"{"stream":"TEST","seq":42,"batch":"inbox-id","count":10}"#; + match classify(payload).unwrap() { + Classified::PubAck(pa) => { + assert_eq!(pa.stream, "TEST"); + assert_eq!(pa.sequence, 42); + assert_eq!(pa.batch_id, "inbox-id"); + assert_eq!(pa.batch_size, 10); + } + other => panic!("expected PubAck, got {other:?}"), + } + } + + #[test] + fn parse_init_error_response() { + // Server sends init errors as plain JSPubAckResponse with no `type` + // field — just `{"error":{...}}`. The classifier falls through the + // tagged-enum parse and hits `Response::Err`. + let payload = + br#"{"error":{"code":400,"err_code":10205,"description":"fast batch publish not enabled"}}"#; + match classify(payload).unwrap() { + Classified::InitError(err) => { + assert_eq!(err.error_code().0, 10205); + assert_eq!( + FastPublishErrorKind::from_api_error(&err), + FastPublishErrorKind::NotEnabled + ); + } + other => panic!("expected InitError, got {other:?}"), + } + } + + #[test] + fn classify_malformed_json_returns_error() { + let payload = b"not json at all"; + let err = classify(payload).unwrap_err(); + assert!(matches!(err.kind(), FastPublishErrorKind::Serialization)); + } + + // -- stall formula ------------------------------------------------------- + + #[test] + fn stall_no_wait_when_window_is_strictly_greater() { + // seq=19, ack=0, flow=10, max=2 → window=20, 20 > 19 → no wait + assert!(!should_stall(0, 10, 2, 19)); + } + + #[test] + fn stall_waits_at_exact_boundary() { + // seq=20, ack=0, flow=10, max=2 → window=20, 20 <= 20 → wait + // This matches ADR-50's `<=` formulation. + assert!(should_stall(0, 10, 2, 20)); + } + + #[test] + fn stall_waits_past_boundary() { + // seq=21 → window=20 <= 21 → wait + assert!(should_stall(0, 10, 2, 21)); + } + + #[test] + fn stall_honors_last_ack() { + // ack=10, flow=10, max=2, seq=30 → window=30, 30 <= 30 → wait + assert!(should_stall(10, 10, 2, 30)); + // ack=10, flow=10, max=2, seq=29 → window=30, 30 > 29 → no wait + assert!(!should_stall(10, 10, 2, 29)); + } + + #[test] + fn stall_with_single_outstanding_ack() { + // max=1 matches "ack every N" throttling + assert!(!should_stall(0, 10, 1, 9)); + assert!(should_stall(0, 10, 1, 10)); + assert!(should_stall(0, 10, 1, 11)); + } + + #[test] + fn stall_with_max_outstanding_three() { + // max=3 matches higher-RTT throughput mode + assert!(!should_stall(0, 10, 3, 29)); + assert!(should_stall(0, 10, 3, 30)); + } + + #[test] + fn stall_saturates_on_pathological_inputs() { + // Ensure u64 overflow is saturated — should never panic. + let waited = should_stall(u64::MAX - 5, u16::MAX, u16::MAX, u64::MAX); + // Window saturates to u64::MAX, which is > u64::MAX is false, so should stall. + assert!(waited); + } + + // -- error code mapping -------------------------------------------------- + + fn api_err(code: u64) -> async_nats::jetstream::Error { + // Build a minimal jetstream::Error via JSON round-trip so we don't + // depend on any constructor that may not exist in the public API. + let json = format!(r#"{{"code":400,"err_code":{code},"description":"test"}}"#); + serde_json::from_str(&json).expect("synthetic api error parses") + } + + #[test] + fn error_code_mapping_verified_against_server() { + assert_eq!( + FastPublishErrorKind::from_api_error(&api_err(10205)), + FastPublishErrorKind::NotEnabled + ); + assert_eq!( + FastPublishErrorKind::from_api_error(&api_err(10206)), + FastPublishErrorKind::InvalidPattern + ); + assert_eq!( + FastPublishErrorKind::from_api_error(&api_err(10207)), + FastPublishErrorKind::InvalidBatchId + ); + assert_eq!( + FastPublishErrorKind::from_api_error(&api_err(10208)), + FastPublishErrorKind::UnknownBatchId + ); + assert_eq!( + FastPublishErrorKind::from_api_error(&api_err(10211)), + FastPublishErrorKind::TooManyInflight + ); + } + + #[test] + fn error_code_mapping_unknown_is_flow_error() { + assert_eq!( + FastPublishErrorKind::from_api_error(&api_err(10071)), + FastPublishErrorKind::FlowError + ); + assert_eq!( + FastPublishErrorKind::from_api_error(&api_err(99999)), + FastPublishErrorKind::FlowError + ); + } + + // -- display impl -------------------------------------------------------- + + #[test] + fn error_kind_display_non_empty() { + for kind in [ + FastPublishErrorKind::NotEnabled, + FastPublishErrorKind::InvalidPattern, + FastPublishErrorKind::InvalidBatchId, + FastPublishErrorKind::UnknownBatchId, + FastPublishErrorKind::TooManyInflight, + FastPublishErrorKind::GapDetected, + FastPublishErrorKind::FlowError, + FastPublishErrorKind::EmptyBatch, + FastPublishErrorKind::InvalidInboxShape, + FastPublishErrorKind::Closed, + FastPublishErrorKind::Timeout, + FastPublishErrorKind::Subscribe, + FastPublishErrorKind::Publish, + FastPublishErrorKind::Serialization, + FastPublishErrorKind::InvalidState, + FastPublishErrorKind::Other, + ] { + let s = format!("{kind}"); + assert!(!s.is_empty(), "empty Display for {kind:?}"); + } + } + + // -- default impls ------------------------------------------------------- + + #[test] + fn gap_mode_default_is_fail() { + assert_eq!(GapMode::default(), GapMode::Fail); + } + + // -- builder validation -------------------------------------------------- + // + // These tests construct a bare `FastPublisherBuilder` without a real + // NATS connection. We use a dummy client obtained by connecting to a + // bogus address lazily — no I/O is performed because `async_nats::Client` + // implements the configuration pathways that are hit by `build()`: + // only `new_inbox()` (pure, no network) is called before validation. + // + // Since we cannot synthesize an `async_nats::Client` without a runtime, + // each test that needs one does so inside a tokio runtime. + + async fn dummy_builder() -> (nats_server::Server, FastPublisherBuilder) { + let server = nats_server::run_server("tests/configs/jetstream.conf"); + let client = async_nats::connect(server.client_url()).await.unwrap(); + ( + server, + FastPublisherBuilder::new(client, Duration::from_secs(5)), + ) + } + + #[tokio::test] + async fn builder_rejects_max_outstanding_zero() { + let (_s, b) = dummy_builder().await; + let err = b.max_outstanding_acks(0).build().unwrap_err(); + assert!(matches!(err.kind(), FastPublishErrorKind::InvalidState)); + } + + #[tokio::test] + async fn builder_rejects_max_outstanding_four() { + let (_s, b) = dummy_builder().await; + let err = b.max_outstanding_acks(4).build().unwrap_err(); + assert!(matches!(err.kind(), FastPublishErrorKind::InvalidState)); + } + + #[tokio::test] + async fn builder_accepts_all_valid_max_outstanding() { + for n in 1..=3 { + let (_s, b) = dummy_builder().await; + let fp = b.max_outstanding_acks(n).build().expect("valid config"); + assert_eq!(fp.max_outstanding_acks, n); + } + } + + #[tokio::test] + async fn builder_clamps_flow_zero_to_one() { + let (_s, b) = dummy_builder().await; + let fp = b.flow(0).build().expect("flow clamped to 1"); + assert_eq!(fp.flow, 1); + } + + #[tokio::test] + async fn builder_default_values() { + let (_s, b) = dummy_builder().await; + let fp = b.build().expect("defaults build ok"); + assert_eq!(fp.flow, DEFAULT_FLOW); + assert_eq!(fp.effective_flow, DEFAULT_FLOW); + assert_eq!(fp.max_outstanding_acks, DEFAULT_MAX_OUTSTANDING_ACKS); + assert_eq!(fp.gap_mode, GapMode::Fail); + assert_eq!(fp.sequence, 0); + assert_eq!(fp.last_ack_sequence, 0); + assert!(fp.subscriber.is_none()); + assert!(!fp.is_closed()); + assert_eq!(fp.size(), 0); + } + + #[tokio::test] + async fn builder_produces_cached_reply_prefix() { + let (_s, b) = dummy_builder().await; + let fp = b.flow(42).gap_mode(GapMode::Ok).build().unwrap(); + assert!(fp.reply_prefix.starts_with(&fp.inbox)); + assert!(fp.reply_prefix.ends_with(".42.ok.")); + } + + #[tokio::test] + async fn builder_batch_id_is_the_inbox() { + let (_s, b) = dummy_builder().await; + let fp = b.build().unwrap(); + assert_eq!(fp.batch_id(), fp.inbox); + assert_eq!(fp.inbox.matches('.').count(), 1); + } +} diff --git a/jetstream-extra/tests/batch_publish_fast.rs b/jetstream-extra/tests/batch_publish_fast.rs new file mode 100644 index 0000000..bf96e55 --- /dev/null +++ b/jetstream-extra/tests/batch_publish_fast.rs @@ -0,0 +1,508 @@ +// Copyright 2026 Synadia Communications Inc. +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +//! Integration tests for fast-ingest batch publishing. +//! +//! Requires nats-server 2.14 or later (fast ingest was added in 2.14 / API +//! level 4). Each test spins up an embedded server via the `nats_server` +//! dev-dependency and creates a stream with `allow_batched: true` via a raw +//! `$JS.API.STREAM.CREATE` request since `async-nats 0.45.0`'s `StreamConfig` +//! does not yet expose that field. + +#[cfg(test)] +mod fast_publish_tests { + use std::sync::Arc; + use std::time::Duration; + + use async_nats::jetstream::message::OutboundMessage; + use jetstream_extra::batch_publish_fast::{FastPublishErrorKind, FastPublishExt, GapMode}; + use serde_json::json; + + /// Create a stream with fast-ingest batching enabled. + /// + /// Uses a raw `STREAM.CREATE` JetStream API request because async-nats + /// 0.45.0 does not expose `allow_batched` on its `stream::Config`. + async fn setup_fast_stream( + ctx: &async_nats::jetstream::Context, + name: &str, + subjects_wildcard: &str, + ) { + let _ = ctx.delete_stream(name).await; + + let body = json!({ + "name": name, + "subjects": [subjects_wildcard], + "retention": "limits", + "storage": "file", + "allow_batched": true, + "allow_atomic": true, + }); + + let resp: serde_json::Value = ctx + .request(format!("STREAM.CREATE.{name}"), &body) + .await + .expect("STREAM.CREATE succeeds"); + + // Surface a server-side error explicitly instead of silently + // proceeding with a broken stream. + if let Some(err) = resp.get("error") { + panic!("STREAM.CREATE returned error: {err}"); + } + + // Sanity-check: the created stream must have allow_batched set. + // If the server version doesn't understand the field, the stream is + // still created but without fast-ingest enabled — which would cause + // mysterious timeouts on the first add rather than a clear error. + let cfg = resp + .get("config") + .expect("STREAM.CREATE response has config"); + let allow_batched = cfg + .get("allow_batched") + .and_then(|v| v.as_bool()) + .unwrap_or(false); + assert!( + allow_batched, + "server did not honor allow_batched — response: {resp}" + ); + } + + async fn connect() -> (nats_server::Server, async_nats::jetstream::Context) { + let server = nats_server::run_server("tests/configs/jetstream.conf"); + let client = async_nats::connect(server.client_url()).await.unwrap(); + let ctx = async_nats::jetstream::new(client); + (server, ctx) + } + + // -- basic happy path ---------------------------------------------------- + // + // Mirrors orbit.go PR #32 `TestFastPublisher/basic`. + + #[tokio::test] + async fn basic() { + let (_server, js) = connect().await; + setup_fast_stream(&js, "TEST", "test.>").await; + + let mut batch = js.fast_publish().build().expect("build"); + + // Add message 1 — this triggers lazy subscribe + initial flow ack. + let ack1 = batch + .add("test.1", "message 1".into()) + .await + .expect("add 1"); + assert_eq!(ack1.batch_sequence, 1); + + // Add message 2 — append path. + let ack2 = batch + .add("test.2", "message 2".into()) + .await + .expect("add 2"); + assert_eq!(ack2.batch_sequence, 2); + + // Commit with the third message. + let pub_ack = batch + .commit("test.3", "message 3".into()) + .await + .expect("commit"); + assert_eq!(pub_ack.batch_size, 3); + assert_eq!(pub_ack.stream, "TEST"); + assert!(!pub_ack.batch_id.is_empty()); + + // Stream should now have exactly 3 messages. + let stream = js.get_stream("TEST").await.unwrap(); + let info = stream.get_info().await.unwrap(); + assert_eq!(info.state.messages, 3); + } + + // -- close (EOB) --------------------------------------------------------- + // + // Mirrors orbit.go PR #32 `TestFastPublisher/close`. + + #[tokio::test] + async fn close_eob() { + let (_server, js) = connect().await; + setup_fast_stream(&js, "TEST", "test.>").await; + + let mut batch = js.fast_publish().build().expect("build"); + batch.add("test.1", "message 1".into()).await.unwrap(); + batch.add("test.2", "message 2".into()).await.unwrap(); + + let pub_ack = batch.close().await.expect("close"); + assert_eq!(pub_ack.batch_size, 2); + + // Close is EOB: the commit message itself is not stored. + let stream = js.get_stream("TEST").await.unwrap(); + let info = stream.get_info().await.unwrap(); + assert_eq!(info.state.messages, 2); + } + + // -- close on empty batch errors ----------------------------------------- + + #[tokio::test] + async fn close_empty_batch_errors() { + let (_server, js) = connect().await; + setup_fast_stream(&js, "TEST", "test.>").await; + + let batch = js.fast_publish().build().expect("build"); + let err = batch.close().await.unwrap_err(); + assert!(matches!(err.kind(), FastPublishErrorKind::EmptyBatch)); + } + + // -- flow control -------------------------------------------------------- + // + // Mirrors orbit.go PR #32 `TestFastPublisher/with_flow_control`. + + #[tokio::test] + async fn with_flow_control() { + let (_server, js) = connect().await; + setup_fast_stream(&js, "TEST", "test.>").await; + + let mut batch = js + .fast_publish() + .flow(50) + .max_outstanding_acks(3) + .ack_timeout(Duration::from_secs(5)) + .build() + .expect("build"); + + for i in 0..200 { + batch + .add("test.msg", format!("data {i}").into()) + .await + .unwrap_or_else(|e| panic!("add {i} failed: {e:?}")); + } + + let pub_ack = batch + .commit("test.final", "final".into()) + .await + .expect("commit"); + assert_eq!(pub_ack.batch_size, 201); + } + + // -- continue on gap ----------------------------------------------------- + // + // Mirrors orbit.go PR #32 `TestFastPublisher/with_continue_on_gap`. + // + // This test doesn't actually force a gap (gaps are hard to induce in a + // deterministic test); it just verifies that GapMode::Ok is accepted and + // a batch completes normally under it. + + #[tokio::test] + async fn with_continue_on_gap() { + let (_server, js) = connect().await; + setup_fast_stream(&js, "TEST", "test.>").await; + + let mut batch = js + .fast_publish() + .gap_mode(GapMode::Ok) + .build() + .expect("build"); + + for i in 0..5 { + batch + .add("test.msg", format!("data {i}").into()) + .await + .unwrap(); + } + + let pub_ack = batch + .commit("test.final", "final".into()) + .await + .expect("commit"); + assert_eq!(pub_ack.batch_size, 6); + } + + // -- large batch --------------------------------------------------------- + // + // Mirrors orbit.go PR #32 `TestFastPublisher_LargeBatch`. This exercises + // the stall gate and flow-ack handling over a sustained publish. + + #[tokio::test] + async fn large_batch() { + let (_server, js) = connect().await; + setup_fast_stream(&js, "TEST", "test.>").await; + + let mut batch = js + .fast_publish() + .flow(100) + .max_outstanding_acks(2) + .ack_timeout(Duration::from_secs(10)) + .build() + .expect("build"); + + const N: usize = 10_000; + for i in 0..N { + batch + .add("test.msg", format!("data {i}").into()) + .await + .unwrap_or_else(|e| panic!("add {i} failed: {e:?}")); + } + + let pub_ack = batch.close().await.expect("close"); + assert_eq!(pub_ack.batch_size as usize, N); + + let stream = js.get_stream("TEST").await.unwrap(); + let info = stream.get_info().await.unwrap(); + assert_eq!(info.state.messages as usize, N); + } + + // -- single-msg immediate commit ---------------------------------------- + // + // The very first operation is `commit`. Server replies with a plain + // PubAck and no intervening BatchFlowAck — verified against + // nats-server/server/jetstream_batching_test.go:3010-3019. + + #[tokio::test] + async fn single_msg_immediate_commit() { + let (_server, js) = connect().await; + setup_fast_stream(&js, "TEST", "test.>").await; + + let batch = js.fast_publish().build().expect("build"); + let pub_ack = batch + .commit("test.only", "single".into()) + .await + .expect("commit"); + assert_eq!(pub_ack.batch_size, 1); + + let stream = js.get_stream("TEST").await.unwrap(); + let info = stream.get_info().await.unwrap(); + assert_eq!(info.state.messages, 1); + } + + // -- not enabled on stream ----------------------------------------------- + + #[tokio::test] + async fn not_enabled_on_stream() { + let (_server, js) = connect().await; + // Create a stream WITHOUT allow_batched. + let _ = js.delete_stream("TEST").await; + js.create_stream(async_nats::jetstream::stream::Config { + name: "TEST".into(), + subjects: vec!["test.>".into()], + ..Default::default() + }) + .await + .unwrap(); + + let mut batch = js + .fast_publish() + .ack_timeout(Duration::from_secs(3)) + .build() + .expect("build"); + + let err = batch.add("test.1", "data".into()).await.unwrap_err(); + assert!( + matches!(err.kind(), FastPublishErrorKind::NotEnabled), + "expected NotEnabled, got {err:?}" + ); + } + + // -- stall gate forced --------------------------------------------------- + // + // Force the stall gate to actually fire by asking for a very low flow + // ceiling. The ack_timeout must be generous enough for the server to + // process the publish + send the flow ack. + + #[tokio::test] + async fn stall_gate_fires_with_low_flow() { + let (_server, js) = connect().await; + setup_fast_stream(&js, "TEST", "test.>").await; + + let mut batch = js + .fast_publish() + .flow(5) + .max_outstanding_acks(2) + .ack_timeout(Duration::from_secs(10)) + .build() + .expect("build"); + + // flow=5, max=2 → window=10. Publishing 50 messages will exercise + // the stall gate multiple times. + for i in 0..50 { + batch + .add("test.msg", format!("data {i}").into()) + .await + .unwrap_or_else(|e| panic!("add {i} failed: {e:?}")); + } + + let pub_ack = batch + .commit("test.done", "done".into()) + .await + .expect("commit"); + assert_eq!(pub_ack.batch_size, 51); + } + + // -- concurrent publishers ----------------------------------------------- + // + // Several publishers writing to the same stream concurrently. All should + // complete without stepping on each other. + + #[tokio::test] + async fn concurrent_publishers() { + let (_server, js) = connect().await; + setup_fast_stream(&js, "TEST", "test.>").await; + + let mut handles = Vec::new(); + for worker in 0..4 { + let js_clone = js.clone(); + handles.push(tokio::spawn(async move { + let mut batch = js_clone + .fast_publish() + .flow(50) + .max_outstanding_acks(2) + .ack_timeout(Duration::from_secs(10)) + .build() + .expect("build"); + for i in 0..100 { + batch + .add("test.msg", format!("w{worker}-{i}").into()) + .await + .expect("add"); + } + batch + .commit("test.done", format!("w{worker}-done").into()) + .await + .expect("commit") + })); + } + + for h in handles { + let ack = h.await.unwrap(); + assert_eq!(ack.batch_size, 101); + } + + // 4 publishers × 101 messages each = 404 total. + let stream = js.get_stream("TEST").await.unwrap(); + let info = stream.get_info().await.unwrap(); + assert_eq!(info.state.messages, 404); + } + + // -- drop mid-batch ------------------------------------------------------ + // + // Build, add, drop. Must not panic; the server will time out the + // abandoned batch after 10s. We don't wait for the server cleanup — we + // just verify the client side drops cleanly. + + #[tokio::test] + async fn drop_mid_batch() { + let (_server, js) = connect().await; + setup_fast_stream(&js, "TEST", "test.>").await; + + { + let mut batch = js.fast_publish().build().expect("build"); + batch.add("test.1", "data".into()).await.unwrap(); + // Drop at end of scope — no commit/close. + } + + // Follow up with a fresh publisher on the same stream to prove the + // client state is healthy (no stuck subscription, no leaked task). + let mut batch = js.fast_publish().build().expect("build"); + batch.add("test.a", "a".into()).await.unwrap(); + let pub_ack = batch + .commit("test.b", "b".into()) + .await + .expect("commit after drop"); + assert_eq!(pub_ack.batch_size, 2); + } + + // -- BatchFlowErr from header expectation mismatch (Fail mode) ----------- + // + // First message sets `Nats-Expected-Last-Sequence:99` on an empty stream. + // The server publishes the message but reports a `BatchFlowErr` on the + // batch inbox; in `GapMode::Fail` this terminates the batch with + // `FlowError`. ADR-50 §"Server Errors". + + #[tokio::test] + async fn flow_err_on_expected_last_sequence_mismatch_fail() { + let (_server, js) = connect().await; + setup_fast_stream(&js, "TEST", "test.>").await; + + let mut batch = js + .fast_publish() + .ack_timeout(Duration::from_secs(5)) + .build() + .expect("build"); + + let mut headers = async_nats::HeaderMap::new(); + headers.insert("Nats-Expected-Last-Sequence", "99"); + let msg = OutboundMessage { + subject: "test.bad".into(), + payload: "x".into(), + headers: Some(headers), + }; + + // First add publishes; server emits a BatchFlowErr asynchronously on + // the inbox. Give the server a moment to deliver it before commit so + // the publisher classifies the err and sets a fatal kind, rather than + // commit racing with a not-yet-arrived err and timing out. + let _ = batch.add_message(msg).await; + tokio::time::sleep(Duration::from_millis(200)).await; + + let err = batch + .commit("test.done", "done".into()) + .await + .expect_err("commit must fail with FlowError"); + assert_eq!( + err.kind(), + FastPublishErrorKind::FlowError, + "expected FlowError, got {err:?}" + ); + } + + // -- BatchFlowErr in GapMode::Ok — callback fires, batch continues ------- + + #[tokio::test] + async fn flow_err_callback_fires_in_ok_mode() { + let (_server, js) = connect().await; + setup_fast_stream(&js, "TEST", "test.>").await; + + // Capture the actual error kind seen by the callback so we can assert + // it was a FlowError, not a stray Timeout/Gap. + let observed_kinds = Arc::new(std::sync::Mutex::new(Vec::::new())); + let observed_cb = observed_kinds.clone(); + + let mut batch = js + .fast_publish() + .gap_mode(GapMode::Ok) + .ack_timeout(Duration::from_secs(5)) + .on_error(move |err| { + observed_cb.lock().unwrap().push(err.kind()); + }) + .build() + .expect("build"); + + let mut headers = async_nats::HeaderMap::new(); + headers.insert("Nats-Expected-Last-Sequence", "99"); + let msg = OutboundMessage { + subject: "test.bad".into(), + payload: "x".into(), + headers: Some(headers), + }; + let _ = batch.add_message(msg).await; + + // Wait for the BatchFlowErr to land on the inbox. + tokio::time::sleep(Duration::from_millis(200)).await; + + // Push more good messages — Ok mode should not abort. + for i in 0..5 { + let _ = batch.add("test.ok", format!("data {i}").into()).await; + } + let _ = batch.commit("test.done", "done".into()).await; + tokio::time::sleep(Duration::from_millis(100)).await; + + let kinds = observed_kinds.lock().unwrap(); + assert!( + kinds.contains(&FastPublishErrorKind::FlowError), + "expected callback to observe FlowError at least once; saw {kinds:?}" + ); + } +} From df47b6e576ef09c01dfe26bbe9b1afcfada4dcef Mon Sep 17 00:00:00 2001 From: Tomasz Pietrek Date: Fri, 8 May 2026 14:17:30 +0200 Subject: [PATCH 4/9] Update dic Signed-off-by: Tomasz Pietrek --- .config/orbit.dic | 53 ++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 52 insertions(+), 1 deletion(-) diff --git a/.config/orbit.dic b/.config/orbit.dic index 669a619..fabbac3 100644 --- a/.config/orbit.dic +++ b/.config/orbit.dic @@ -1,4 +1,4 @@ -130 +300 NATS JetStream jetstream @@ -185,3 +185,54 @@ decrement AllowMsgCounter AllowDirect PubAck +batch_publish_fast +BatchPubAck +BatchPublishErrorKind +fast-ingest +fast_publish +FastPublish +FastPublisher +FastPublisherBuilder +FastPublishExt +FastPublishErrorKind +GapMode +TTL +EOB +init +lazily +mid-batch +waitForStall +JetStream's +nats-server +unsubscribe +allow_atomic +allow_batched +2.14+ +— +→ +inflight +CommitEob +deserializer +misalign +iff +pseudocode +detections +impl +ADR-50 +ADR +backtick +unsubscribe +preallocated +incrementing +JetStream's +nats.io +unsubscription +subscriber +multi-line +multi-byte +runtime's +outbound +add_message +commit_message +BatchClosed +ack_timeout From 5c948f63ac0ded4f1789381dee523bf22ee9f6cd Mon Sep 17 00:00:00 2001 From: Tomasz Pietrek Date: Fri, 8 May 2026 15:09:00 +0200 Subject: [PATCH 5/9] Address comments Signed-off-by: Tomasz Pietrek --- jetstream-extra/src/batch_publish_fast.rs | 73 +++++++++++---------- jetstream-extra/tests/batch_publish_fast.rs | 56 +++++++++------- 2 files changed, 73 insertions(+), 56 deletions(-) diff --git a/jetstream-extra/src/batch_publish_fast.rs b/jetstream-extra/src/batch_publish_fast.rs index 2e3d052..fc56a7f 100644 --- a/jetstream-extra/src/batch_publish_fast.rs +++ b/jetstream-extra/src/batch_publish_fast.rs @@ -59,6 +59,7 @@ use crate::batch_publish::BatchPubAck; /// A gap means one or more messages in the batch were lost in transit between /// the client and the stream leader (e.g. due to buffer drops under load). #[derive(Debug, Clone, Copy, PartialEq, Eq, Default)] +#[non_exhaustive] pub enum GapMode { /// Allow gaps — the batch continues and the server informs the client via /// a gap event. Use this when some message loss is acceptable (metrics, @@ -102,18 +103,24 @@ pub type FastPublishError = async_nats::error::Error; /// Kinds of errors that can occur during fast-ingest batch publishing. /// /// API error codes are verified against `nats-server` 2.14 `errors.json`. -#[derive(Debug, Clone, Copy, PartialEq, Eq)] +/// +/// Marked `#[non_exhaustive]` — adding a new variant in a future release will +/// not be a breaking change. Match on `_` for forward compatibility. +#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] +#[non_exhaustive] pub enum FastPublishErrorKind { - /// 10205 — stream does not have `allow_batched: true`. + /// Stream does not have `allow_batched: true`. (`BATCH_PUBLISH_DISABLED`, 10205) NotEnabled, - /// 10206 — reply subject pattern rejected by the server. + /// Reply subject pattern rejected by the server. (`BATCH_PUBLISH_INVALID_PATTERN`, 10206) InvalidPattern, - /// 10207 — batch id exceeds 64 characters or is otherwise invalid. + /// Batch id exceeds 64 characters or is otherwise invalid. + /// (`BATCH_PUBLISH_INVALID_BATCH_ID`, 10207) InvalidBatchId, - /// 10208 — server has forgotten this batch (timed out, leader change in - /// `GapMode::Fail`, etc.). + /// Server has forgotten this batch (timed out, leader change in + /// `GapMode::Fail`, etc.). (`BATCH_PUBLISH_UNKNOWN_BATCH_ID`, 10208) UnknownBatchId, - /// 10211 — too many in-flight fast batches on the server. + /// Too many in-flight fast batches on the server. + /// (`BATCH_PUBLISH_TOO_MANY_INFLIGHT`, 10211) TooManyInflight, /// A gap was detected while running in [`GapMode::Fail`]. The final ack /// will indicate which messages were persisted. @@ -125,6 +132,9 @@ pub enum FastPublishErrorKind { /// `build()` rejected the inbox because it does not have exactly two /// tokens. The reply-subject parser requires `.` shape. InvalidInboxShape, + /// `build()` rejected a configuration value (e.g. `max_outstanding_acks` + /// outside `1..=3`). + InvalidConfig, /// Called a method on a publisher that has already committed, closed, or /// failed fatally. Closed, @@ -136,8 +146,10 @@ pub enum FastPublishErrorKind { Publish, /// Failed to parse a server response. Serialization, - /// `ping()` or another operation was called in a state that does not - /// support it (e.g. before the first `add`). + /// An internal operation that depends on a runtime invariant was called + /// before that invariant held — currently only emitted when the publisher + /// would need to send a ping but no message has been published yet, so + /// there is no first-subject to address the ping to. InvalidState, /// Catch-all. Other, @@ -145,16 +157,14 @@ pub enum FastPublishErrorKind { impl FastPublishErrorKind { /// Map a JetStream API error code to the matching fast-ingest error kind. - /// - /// Codes verified against `nats-server/server/errors.json` and - /// `server/jetstream_errors_generated.go`. pub(crate) fn from_api_error(error: &async_nats::jetstream::Error) -> Self { - match error.error_code().0 { - 10205 => Self::NotEnabled, - 10206 => Self::InvalidPattern, - 10207 => Self::InvalidBatchId, - 10208 => Self::UnknownBatchId, - 10211 => Self::TooManyInflight, + use async_nats::jetstream::ErrorCode; + match error.error_code() { + ErrorCode::BATCH_PUBLISH_DISABLED => Self::NotEnabled, + ErrorCode::BATCH_PUBLISH_INVALID_PATTERN => Self::InvalidPattern, + ErrorCode::BATCH_PUBLISH_INVALID_BATCH_ID => Self::InvalidBatchId, + ErrorCode::BATCH_PUBLISH_UNKNOWN_BATCH_ID => Self::UnknownBatchId, + ErrorCode::BATCH_PUBLISH_TOO_MANY_INFLIGHT => Self::TooManyInflight, _ => Self::FlowError, } } @@ -177,6 +187,7 @@ impl Display for FastPublishErrorKind { Self::InvalidInboxShape => { write!(f, "inbox must have exactly two tokens (e.g. _INBOX.)") } + Self::InvalidConfig => write!(f, "invalid fast publisher configuration"), Self::Closed => write!(f, "fast publisher is closed"), Self::Timeout => write!(f, "timeout waiting for fast batch ack"), Self::Subscribe => write!(f, "failed to subscribe to fast batch inbox"), @@ -321,18 +332,11 @@ pub(crate) fn build_reply(prefix: &str, seq: u64, op: Operation) -> String { /// (e.g. `_INBOX.myapp.xyz`) would misalign the parser and cause cryptic /// `InvalidPattern` errors. pub(crate) fn validate_inbox_shape(inbox: &str) -> Result<(), FastPublishError> { - if inbox.matches('.').count() != 1 { - return Err(FastPublishError::new( - FastPublishErrorKind::InvalidInboxShape, - )); - } - if inbox.is_empty() { - return Err(FastPublishError::new( - FastPublishErrorKind::InvalidInboxShape, - )); - } - let (a, b) = inbox.split_once('.').unwrap(); - if a.is_empty() || b.is_empty() { + let mut parts = inbox.splitn(3, '.'); + let first = parts.next().unwrap_or(""); + let second = parts.next().unwrap_or(""); + let no_third = parts.next().is_none(); + if first.is_empty() || second.is_empty() || !no_third { return Err(FastPublishError::new( FastPublishErrorKind::InvalidInboxShape, )); @@ -536,7 +540,7 @@ impl FastPublisherBuilder { /// /// # Errors /// - /// - [`FastPublishErrorKind::InvalidState`] if `max_outstanding_acks` is + /// - [`FastPublishErrorKind::InvalidConfig`] if `max_outstanding_acks` is /// outside `1..=3`. /// - [`FastPublishErrorKind::InvalidInboxShape`] if the client's /// `new_inbox()` does not produce a two-token inbox (required by the @@ -545,7 +549,7 @@ impl FastPublisherBuilder { if !(MIN_MAX_OUTSTANDING_ACKS..=MAX_MAX_OUTSTANDING_ACKS) .contains(&self.max_outstanding_acks) { - return Err(FastPublishError::new(FastPublishErrorKind::InvalidState)); + return Err(FastPublishError::new(FastPublishErrorKind::InvalidConfig)); } let inbox = self.client.new_inbox(); @@ -1600,6 +1604,7 @@ mod tests { FastPublishErrorKind::FlowError, FastPublishErrorKind::EmptyBatch, FastPublishErrorKind::InvalidInboxShape, + FastPublishErrorKind::InvalidConfig, FastPublishErrorKind::Closed, FastPublishErrorKind::Timeout, FastPublishErrorKind::Subscribe, @@ -1644,14 +1649,14 @@ mod tests { async fn builder_rejects_max_outstanding_zero() { let (_s, b) = dummy_builder().await; let err = b.max_outstanding_acks(0).build().unwrap_err(); - assert!(matches!(err.kind(), FastPublishErrorKind::InvalidState)); + assert!(matches!(err.kind(), FastPublishErrorKind::InvalidConfig)); } #[tokio::test] async fn builder_rejects_max_outstanding_four() { let (_s, b) = dummy_builder().await; let err = b.max_outstanding_acks(4).build().unwrap_err(); - assert!(matches!(err.kind(), FastPublishErrorKind::InvalidState)); + assert!(matches!(err.kind(), FastPublishErrorKind::InvalidConfig)); } #[tokio::test] diff --git a/jetstream-extra/tests/batch_publish_fast.rs b/jetstream-extra/tests/batch_publish_fast.rs index bf96e55..84eb85c 100644 --- a/jetstream-extra/tests/batch_publish_fast.rs +++ b/jetstream-extra/tests/batch_publish_fast.rs @@ -426,31 +426,33 @@ mod fast_publish_tests { let (_server, js) = connect().await; setup_fast_stream(&js, "TEST", "test.>").await; + // `flow(1).max_outstanding_acks(1)` saturates the outstanding-ack + // window after a single message, so the second `add()` stalls in + // `wait_for_flow_event_with_pings` until the server emits *some* event. + // For a batch that just received a malformed first message, that event + // is the BatchFlowErr — which the publisher classifies, sets `fatal`, + // and surfaces as `FlowError` on the second `add()`. No sleep, no poll. let mut batch = js .fast_publish() + .flow(2) + .max_outstanding_acks(1) .ack_timeout(Duration::from_secs(5)) .build() .expect("build"); let mut headers = async_nats::HeaderMap::new(); headers.insert("Nats-Expected-Last-Sequence", "99"); - let msg = OutboundMessage { + let bad = OutboundMessage { subject: "test.bad".into(), payload: "x".into(), headers: Some(headers), }; - - // First add publishes; server emits a BatchFlowErr asynchronously on - // the inbox. Give the server a moment to deliver it before commit so - // the publisher classifies the err and sets a fatal kind, rather than - // commit racing with a not-yet-arrived err and timing out. - let _ = batch.add_message(msg).await; - tokio::time::sleep(Duration::from_millis(200)).await; + let _ = batch.add_message(bad).await; let err = batch - .commit("test.done", "done".into()) + .add("test.next", "n".into()) .await - .expect_err("commit must fail with FlowError"); + .expect_err("second add must surface FlowError via stall gate"); assert_eq!( err.kind(), FastPublishErrorKind::FlowError, @@ -465,11 +467,14 @@ mod fast_publish_tests { let (_server, js) = connect().await; setup_fast_stream(&js, "TEST", "test.>").await; - // Capture the actual error kind seen by the callback so we can assert - // it was a FlowError, not a stray Timeout/Gap. let observed_kinds = Arc::new(std::sync::Mutex::new(Vec::::new())); let observed_cb = observed_kinds.clone(); + // Default flow/max — no stall gate. In `GapMode::Ok` the publisher + // does not mark fatal on FlowErr, so the second `add()` runs the + // initial `drain_nonblocking` which classifies any pending FlowErr, + // fires the callback, and returns Ok. Poll a few times to absorb + // server-delivery latency without depending on a fixed sleep. let mut batch = js .fast_publish() .gap_mode(GapMode::Ok) @@ -482,22 +487,29 @@ mod fast_publish_tests { let mut headers = async_nats::HeaderMap::new(); headers.insert("Nats-Expected-Last-Sequence", "99"); - let msg = OutboundMessage { + let bad = OutboundMessage { subject: "test.bad".into(), payload: "x".into(), headers: Some(headers), }; - let _ = batch.add_message(msg).await; - - // Wait for the BatchFlowErr to land on the inbox. - tokio::time::sleep(Duration::from_millis(200)).await; - - // Push more good messages — Ok mode should not abort. - for i in 0..5 { - let _ = batch.add("test.ok", format!("data {i}").into()).await; + let _ = batch.add_message(bad).await; + + // Issue a small bounded number of follow-up adds; each one drains the + // inbox and gives the BatchFlowErr a chance to be classified. We cap + // iterations rather than spinning until a deadline so a stuck server + // fails fast. + for _ in 0..20 { + if observed_kinds + .lock() + .unwrap() + .contains(&FastPublishErrorKind::FlowError) + { + break; + } + let _ = batch.add("test.poll", "p".into()).await; + tokio::task::yield_now().await; } let _ = batch.commit("test.done", "done".into()).await; - tokio::time::sleep(Duration::from_millis(100)).await; let kinds = observed_kinds.lock().unwrap(); assert!( From f9335760922a272b87f4a801d30e21642ddf3835 Mon Sep 17 00:00:00 2001 From: Tomasz Pietrek Date: Fri, 8 May 2026 15:57:59 +0200 Subject: [PATCH 6/9] Update dic Signed-off-by: Tomasz Pietrek --- .config/orbit.dic | 1 + 1 file changed, 1 insertion(+) diff --git a/.config/orbit.dic b/.config/orbit.dic index fabbac3..4b899a4 100644 --- a/.config/orbit.dic +++ b/.config/orbit.dic @@ -236,3 +236,4 @@ add_message commit_message BatchClosed ack_timeout +runtime From bb2ef5af32540a7110ea689e16323e01915f6b69 Mon Sep 17 00:00:00 2001 From: Tomasz Pietrek Date: Fri, 8 May 2026 16:50:40 +0200 Subject: [PATCH 7/9] Add publish-mode benchmark + reduce alloc in fast publish reply - New example `bench_normal_vs_fast` compares sync, async (bounded inflight), core+js, atomic batch, and fast batch publishing on an R1 stream (100k messages each). - `build_reply` now returns `Subject` instead of `String`; the publisher stamps the reply via `From for Subject` which is zero-copy, saving one `Bytes::copy_from_slice` per fast-publish message. Co-Authored-By: Claude Opus 4.7 (1M context) Signed-off-by: Tomasz Pietrek --- .../examples/bench_normal_vs_fast.rs | 251 ++++++++++++++++++ jetstream-extra/src/batch_publish_fast.rs | 20 +- 2 files changed, 265 insertions(+), 6 deletions(-) create mode 100644 jetstream-extra/examples/bench_normal_vs_fast.rs diff --git a/jetstream-extra/examples/bench_normal_vs_fast.rs b/jetstream-extra/examples/bench_normal_vs_fast.rs new file mode 100644 index 0000000..a90c245 --- /dev/null +++ b/jetstream-extra/examples/bench_normal_vs_fast.rs @@ -0,0 +1,251 @@ +// Copyright 2026 Synadia Communications Inc. +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +//! Benchmark: 100k messages via the four JetStream publish modes. +//! +//! 1. sync — `js.publish().await + ack.await` per message +//! 2. async — bounded-inflight `FuturesUnordered` over PubAck futures +//! 3. atomic batch — `client.batch_publish()` with optional flow control +//! 4. fast — `client.fast_publish()` (server 2.14+, `allow_batched`) +//! +//! All four target an R1 stream, payload ~63B per message. Subjects and +//! payloads are static to avoid per-iteration allocation skewing the loop. +//! +//! ```bash +//! nats-server -js +//! cargo run -p jetstream-extra --example bench_normal_vs_fast --release +//! ``` + +use std::future::IntoFuture; +use std::time::{Duration, Instant}; + +use async_nats::jetstream::{self, stream}; +use bytes::Bytes; +use futures::stream::{FuturesUnordered, StreamExt}; +use jetstream_extra::batch_publish::BatchPublishExt; +use jetstream_extra::batch_publish_fast::{FastPublishExt, GapMode}; +use serde_json::json; + +const TOTAL: usize = 100_000; +const PAYLOAD: &[u8] = b"benchmark payload (~64B) ......................................"; +const SUBJECT_SYNC: &str = "bench.sync"; +const SUBJECT_ASYNC: &str = "bench.async"; +const SUBJECT_ATOMIC: &str = "bench.atomic"; +const SUBJECT_FAST: &str = "bench.fast"; + +#[tokio::main(flavor = "current_thread")] +async fn main() -> Result<(), Box> { + // The default client capacity (128) and subscription capacity (4096) are + // tuned for typical workloads. For 100k-message benches with thousands of + // concurrent PubAck futures we want a bigger write channel. + let client = async_nats::ConnectOptions::new() + .client_capacity(8192) + .subscription_capacity(8192) + .connect("nats://127.0.0.1:4222") + .await?; + let js = jetstream::new(client); + + println!("== payload {}B, {} messages each ==", PAYLOAD.len(), TOTAL); + + let sync_ = bench_sync(&js).await?; + let async_ = bench_async(&js).await?; + let core_trick = bench_core_then_js(&js).await?; + let atomic = bench_atomic(&js).await?; + let fast = bench_fast(&js).await?; + + println!(); + println!(" elapsed msg/s MB/s"); + print_row("sync js.publish", sync_); + print_row("async js.publish", async_); + print_row("core+js last-only ", core_trick); + print_row("atomic batch ", atomic); + print_row("fast batch ", fast); + Ok(()) +} + +#[derive(Copy, Clone)] +struct Stats { + elapsed: Duration, +} + +fn print_row(label: &str, s: Stats) { + let secs = s.elapsed.as_secs_f64(); + let msgs_per_sec = TOTAL as f64 / secs; + let mb_per_sec = (TOTAL as f64 * PAYLOAD.len() as f64) / secs / 1_048_576.0; + println!("{label} {:>8.3}s {:>8.0} {:>6.2}", secs, msgs_per_sec, mb_per_sec); +} + +fn payload() -> Bytes { + Bytes::from_static(PAYLOAD) +} + +async fn create_stream( + js: &jetstream::Context, + name: &str, + subjects: &str, +) -> Result<(), Box> { + let _ = js.delete_stream(name).await; + js.create_stream(stream::Config { + name: name.into(), + subjects: vec![subjects.into()], + num_replicas: 1, + ..Default::default() + }) + .await?; + Ok(()) +} + +async fn bench_sync(js: &jetstream::Context) -> Result> { + println!("\n[sync] R1 stream, await each PubAck"); + create_stream(js, "BENCH_SYNC", "bench.sync").await?; + + let started = Instant::now(); + for _ in 0..TOTAL { + let ack_fut = js.publish(SUBJECT_SYNC, payload()).await?; + ack_fut.await?; + } + let elapsed = started.elapsed(); + + let info = js.get_stream("BENCH_SYNC").await?.get_info().await?; + assert_eq!(info.state.messages as usize, TOTAL); + println!("[sync] done in {:.3}s", elapsed.as_secs_f64()); + Ok(Stats { elapsed }) +} + +async fn bench_async(js: &jetstream::Context) -> Result> { + const MAX_INFLIGHT: usize = 1024; + println!("\n[async] R1 stream, max {MAX_INFLIGHT} inflight PubAck futures"); + create_stream(js, "BENCH_ASYNC", "bench.async").await?; + + let started = Instant::now(); + let mut inflight: FuturesUnordered<_> = FuturesUnordered::new(); + for _ in 0..TOTAL { + if inflight.len() >= MAX_INFLIGHT + && let Some(res) = inflight.next().await + { + res?; + } + let ack_fut = js.publish(SUBJECT_ASYNC, payload()).await?; + inflight.push(ack_fut.into_future()); + } + while let Some(res) = inflight.next().await { + res?; + } + let elapsed = started.elapsed(); + + let info = js.get_stream("BENCH_ASYNC").await?.get_info().await?; + assert_eq!(info.state.messages as usize, TOTAL); + println!("[async] done in {:.3}s", elapsed.as_secs_f64()); + Ok(Stats { elapsed }) +} + +/// Fire core-NATS publish for the first N-1 messages (no JS ack handshake); +/// for the last, use `js.publish` so the returned PubAck barriers the whole +/// run. The stream still captures every message because subjects match — JS +/// captures whatever lands on its subjects regardless of who published it. +/// orbit.go reports this as the fastest path; useful as an upper bound. +async fn bench_core_then_js( + js: &jetstream::Context, +) -> Result> { + println!("\n[core+js] R1 stream, core publish first N-1 + js.publish last"); + create_stream(js, "BENCH_CORE", "bench.core").await?; + let nc = js.clone(); + + let started = Instant::now(); + for _ in 0..(TOTAL - 1) { + nc.client().publish("bench.core", payload()).await?; + } + let ack_fut = js.publish("bench.core", payload()).await?; + ack_fut.await?; + let elapsed = started.elapsed(); + + let info = js.get_stream("BENCH_CORE").await?.get_info().await?; + assert_eq!(info.state.messages as usize, TOTAL); + println!("[core+js] done in {:.3}s", elapsed.as_secs_f64()); + Ok(Stats { elapsed }) +} + +async fn bench_atomic(js: &jetstream::Context) -> Result> { + println!("\n[atomic] R1 stream, allow_atomic_publish"); + let _ = js.delete_stream("BENCH_ATOMIC").await; + js.create_stream(stream::Config { + name: "BENCH_ATOMIC".into(), + subjects: vec!["bench.atomic".into()], + num_replicas: 1, + allow_atomic_publish: true, + ..Default::default() + }) + .await?; + + // Atomic batches cap at 1000 messages server-side, so chunk into + // ceil(TOTAL/1000) batches. Each batch is one round-trip: ack_first on + // open, fire-and-forget for middles, request on commit. Flow-control via + // ack_every is left default (no flow ack between first and commit), which + // is the orbit.go default. + const BATCH: usize = 1000; + let mut sent = 0; + let started = Instant::now(); + while sent < TOTAL { + let n = (TOTAL - sent).min(BATCH); + let mut batch = js.batch_publish().build(); + for _ in 0..(n - 1) { + batch.add(SUBJECT_ATOMIC, payload()).await?; + } + batch.commit(SUBJECT_ATOMIC, payload()).await?; + sent += n; + } + let elapsed = started.elapsed(); + + let info = js.get_stream("BENCH_ATOMIC").await?.get_info().await?; + assert_eq!(info.state.messages as usize, TOTAL); + println!("[atomic] done in {:.3}s ({} batches of {BATCH})", elapsed.as_secs_f64(), TOTAL / BATCH); + Ok(Stats { elapsed }) +} + +async fn bench_fast(js: &jetstream::Context) -> Result> { + println!("\n[fast] R1 stream, allow_batched"); + let _ = js.delete_stream("BENCH_FAST").await; + let body = json!({ + "name": "BENCH_FAST", + "subjects": ["bench.fast"], + "num_replicas": 1, + "retention": "limits", + "storage": "file", + "allow_batched": true, + }); + let resp: serde_json::Value = js.request("STREAM.CREATE.BENCH_FAST", &body).await?; + if let Some(err) = resp.get("error") { + return Err(format!("STREAM.CREATE failed: {err}").into()); + } + + let mut batch = js + .fast_publish() + .flow(1000) + .max_outstanding_acks(2) + .gap_mode(GapMode::Fail) + .ack_timeout(Duration::from_secs(10)) + .build()?; + + let started = Instant::now(); + for _ in 0..(TOTAL - 1) { + batch.add(SUBJECT_FAST, payload()).await?; + } + let pub_ack = batch.commit(SUBJECT_FAST, payload()).await?; + let elapsed = started.elapsed(); + + assert_eq!(pub_ack.batch_size as usize, TOTAL); + let info = js.get_stream("BENCH_FAST").await?.get_info().await?; + assert_eq!(info.state.messages as usize, TOTAL); + println!("[fast] done in {:.3}s (batch_id={})", elapsed.as_secs_f64(), pub_ack.batch_id); + Ok(Stats { elapsed }) +} diff --git a/jetstream-extra/src/batch_publish_fast.rs b/jetstream-extra/src/batch_publish_fast.rs index fc56a7f..44167d4 100644 --- a/jetstream-extra/src/batch_publish_fast.rs +++ b/jetstream-extra/src/batch_publish_fast.rs @@ -42,6 +42,7 @@ use std::{ }; use async_nats::jetstream::message::OutboundMessage; +use async_nats::Subject; use async_nats::subject::ToSubject; use bytes::Bytes; use futures::StreamExt; @@ -317,9 +318,16 @@ pub(crate) fn build_reply_prefix(inbox: &str, flow: u16, gap: GapMode) -> String /// Build a full per-message reply subject: `..$FI`. /// /// `$FI` marks the subject as a fast-ingest reply to the server's parser -/// (`server/stream.go:getFastBatch`). -pub(crate) fn build_reply(prefix: &str, seq: u64, op: Operation) -> String { - format!("{prefix}{seq}.{}.$FI", op as u8) +/// (`server/stream.go:getFastBatch`). Returns a [`Subject`] so the caller can +/// pass it to `publish_with_reply` without an extra `String → Bytes` copy. +pub(crate) fn build_reply(prefix: &str, seq: u64, op: Operation) -> Subject { + use std::fmt::Write as _; + // Hot path: pre-size for prefix + up to 20 digits of u64 + ".N.$FI" (6). + let mut s = String::with_capacity(prefix.len() + 26); + s.push_str(prefix); + write!(s, "{seq}.{}.$FI", op as u8).expect("String write is infallible"); + // `From for Subject` is zero-copy — moves the buffer into Bytes. + Subject::from(s) } /// Validate that an inbox has the shape the fast-ingest reply subject parser @@ -973,7 +981,7 @@ impl FastPublisher { async fn publish_raw( &mut self, msg: OutboundMessage, - reply: String, + reply: Subject, ) -> Result<(), FastPublishError> { let OutboundMessage { subject, @@ -1365,7 +1373,7 @@ mod tests { (Operation::Ping, 4), ] { let r = build_reply(&prefix, 42, op); - assert_eq!(r, format!("_INBOX.x.10.fail.42.{code}.$FI")); + assert_eq!(r.as_str(), format!("_INBOX.x.10.fail.42.{code}.$FI")); } } @@ -1374,7 +1382,7 @@ mod tests { for (mode, tag) in [(GapMode::Ok, "ok"), (GapMode::Fail, "fail")] { let prefix = build_reply_prefix("_INBOX.abc", 25, mode); let r = build_reply(&prefix, 1, Operation::Start); - assert_eq!(r, format!("_INBOX.abc.25.{tag}.1.0.$FI")); + assert_eq!(r.as_str(), format!("_INBOX.abc.25.{tag}.1.0.$FI")); } } From cc93e62191978b46d5b871f93f1ee41f7ce053a0 Mon Sep 17 00:00:00 2001 From: Tomasz Pietrek Date: Fri, 8 May 2026 16:57:38 +0200 Subject: [PATCH 8/9] Add fast-ingest config sweep example MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Sweeps `flow` × `max_outstanding_acks` × payload-size for fast batch publishing on a local R1 stream. On this machine `max_outstanding_acks=3` beats `=2` by 4-7%; flow size has minimal effect within a max_acks group. Co-Authored-By: Claude Opus 4.7 (1M context) Signed-off-by: Tomasz Pietrek --- jetstream-extra/examples/bench_fast_sweep.rs | 177 +++++++++++++++++++ 1 file changed, 177 insertions(+) create mode 100644 jetstream-extra/examples/bench_fast_sweep.rs diff --git a/jetstream-extra/examples/bench_fast_sweep.rs b/jetstream-extra/examples/bench_fast_sweep.rs new file mode 100644 index 0000000..36b296e --- /dev/null +++ b/jetstream-extra/examples/bench_fast_sweep.rs @@ -0,0 +1,177 @@ +// Copyright 2026 Synadia Communications Inc. +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +//! Sweep `flow` × `max_outstanding_acks` for fast-ingest batch publishing. +//! +//! Identifies the configuration that maximizes throughput on a local R1 +//! stream. Each cell runs `RUNS` independent batches and reports the median. +//! +//! ```bash +//! nats-server -js +//! cargo run -p jetstream-extra --example bench_fast_sweep --release +//! ``` + +use std::time::{Duration, Instant}; + +use async_nats::jetstream::{self}; +use bytes::Bytes; +use futures::StreamExt; +use jetstream_extra::batch_publish_fast::{FastPublishExt, GapMode}; +use serde_json::json; + +const TOTAL: usize = 100_000; +const PAYLOAD_64: &[u8] = b"benchmark payload (~64B) ......................................"; +const SUBJECT: &str = "bench.fastsweep"; +const RUNS: usize = 3; + +const FLOWS: &[u16] = &[500, 1000, 2000, 5000, 10000]; +const MAX_ACKS: &[u16] = &[2, 3]; +const PAYLOADS: &[(&str, &[u8])] = &[("0B", b""), ("64B", PAYLOAD_64)]; + +#[tokio::main(flavor = "current_thread")] +async fn main() -> Result<(), Box> { + let client = async_nats::ConnectOptions::new() + .client_capacity(8192) + .subscription_capacity(8192) + .connect("nats://127.0.0.1:4222") + .await?; + let js = jetstream::new(client); + + // Wipe any leftover sweep streams from prior aborted runs. + let mut names = js.stream_names(); + while let Some(name) = names.next().await { + if let Ok(name) = name + && name.starts_with("BENCH_FAST_SWEEP") + { + let _ = js.delete_stream(&name).await; + } + } + drop(names); + + println!("== fast-ingest sweep: {TOTAL} messages × {RUNS} runs each =="); + + for &(plabel, payload_bytes) in PAYLOADS { + println!(); + println!("=== payload {plabel} ({} bytes) ===", payload_bytes.len()); + println!(" flow max_acks median_msg/s p_min p_max"); + + let mut best: Option<(u16, u16, f64)> = None; + for &flow in FLOWS { + for &max_acks in MAX_ACKS { + let runs = run_n_times(&js, flow, max_acks, payload_bytes, RUNS).await?; + let median = median(&runs); + let p_min = runs.iter().cloned().fold(f64::INFINITY, f64::min); + let p_max = runs.iter().cloned().fold(0.0_f64, f64::max); + + println!( + " {flow:>5} {max_acks:>8} {:>12.0} {:>8.0} {:>8.0}", + median, p_min, p_max + ); + + if best.map_or(true, |(_, _, m)| median > m) { + best = Some((flow, max_acks, median)); + } + } + } + + if let Some((f, m, t)) = best { + println!( + " ==> best for {plabel}: flow={f}, max_outstanding_acks={m} → {:.0} msg/s", + t + ); + } + } + Ok(()) +} + +async fn run_n_times( + js: &jetstream::Context, + flow: u16, + max_acks: u16, + payload: &'static [u8], + n: usize, +) -> Result, Box> { + let mut out = Vec::with_capacity(n); + for _ in 0..n { + out.push(run_once(js, flow, max_acks, payload).await?); + } + Ok(out) +} + +async fn run_once( + js: &jetstream::Context, + flow: u16, + max_acks: u16, + payload_bytes: &'static [u8], +) -> Result> { + // Unique stream name per run avoids any stale fast-batch tracker state + // from a previous iteration's just-deleted stream. + let stream_name = format!( + "BENCH_FAST_SWEEP_{flow}_{max_acks}_{}", + std::time::SystemTime::now() + .duration_since(std::time::UNIX_EPOCH) + .unwrap() + .as_nanos() + ); + let subject = format!("{SUBJECT}.{flow}_{max_acks}"); + let body = json!({ + "name": stream_name, + "subjects": [subject.clone()], + "num_replicas": 1, + "retention": "limits", + "storage": "file", + "allow_batched": true, + }); + let resp: serde_json::Value = js + .request(format!("STREAM.CREATE.{stream_name}"), &body) + .await?; + if let Some(err) = resp.get("error") { + return Err(format!("STREAM.CREATE failed: {err}").into()); + } + + let mut batch = js + .fast_publish() + .flow(flow) + .max_outstanding_acks(max_acks) + .gap_mode(GapMode::Fail) + .ack_timeout(Duration::from_secs(30)) + .build()?; + + let payload = || Bytes::from_static(payload_bytes); + let started = Instant::now(); + for i in 0..(TOTAL - 1) { + batch.add(subject.clone(), payload()).await.map_err(|e| { + format!("add {i} failed (flow={flow}, max={max_acks}): {e:?}") + })?; + } + let pub_ack = batch + .commit(subject.clone(), payload()) + .await + .map_err(|e| format!("commit failed (flow={flow}, max={max_acks}): {e:?}"))?; + let elapsed = started.elapsed(); + assert_eq!(pub_ack.batch_size as usize, TOTAL); + + let _ = js.delete_stream(&stream_name).await; + Ok(TOTAL as f64 / elapsed.as_secs_f64()) +} + +fn median(xs: &[f64]) -> f64 { + let mut v = xs.to_vec(); + v.sort_by(|a, b| a.partial_cmp(b).unwrap()); + let n = v.len(); + if n.is_multiple_of(2) { + (v[n / 2 - 1] + v[n / 2]) / 2.0 + } else { + v[n / 2] + } +} From 12df419aa43d8e0d7393fbbceee9644b11f09248 Mon Sep 17 00:00:00 2001 From: Tomasz Pietrek Date: Sun, 10 May 2026 07:57:25 +0200 Subject: [PATCH 9/9] Remove some examples Signed-off-by: Tomasz Pietrek --- .../examples/batch_fetch_time_based.rs | 185 ------------------ .../batch_fetch_with_error_handling.rs | 123 ------------ .../examples/batch_fetch_with_max_bytes.rs | 122 ------------ jetstream-extra/examples/bench_fast_sweep.rs | 9 +- .../examples/bench_normal_vs_fast.rs | 21 +- jetstream-extra/examples/fast_publisher.rs | 102 ---------- jetstream-extra/examples/test_batch_fetch.rs | 27 --- jetstream-extra/src/batch_publish_fast.rs | 38 +--- 8 files changed, 21 insertions(+), 606 deletions(-) delete mode 100644 jetstream-extra/examples/batch_fetch_time_based.rs delete mode 100644 jetstream-extra/examples/batch_fetch_with_error_handling.rs delete mode 100644 jetstream-extra/examples/batch_fetch_with_max_bytes.rs delete mode 100644 jetstream-extra/examples/fast_publisher.rs delete mode 100644 jetstream-extra/examples/test_batch_fetch.rs diff --git a/jetstream-extra/examples/batch_fetch_time_based.rs b/jetstream-extra/examples/batch_fetch_time_based.rs deleted file mode 100644 index 067a8d1..0000000 --- a/jetstream-extra/examples/batch_fetch_time_based.rs +++ /dev/null @@ -1,185 +0,0 @@ -// Copyright 2025 Synadia Communications Inc. -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -use async_nats::jetstream::{self, stream}; -use futures::StreamExt; -use jetstream_extra::batch_fetch::BatchFetchExt; -use std::time::{Duration, SystemTime}; -use time::OffsetDateTime; - -#[tokio::main] -async fn main() -> Result<(), Box> { - // Connect to NATS server - let client = async_nats::connect("localhost:4222").await?; - let context = jetstream::new(client); - - // Create a stream - let stream_name = "TIME_BASED_STREAM"; - context - .create_stream(stream::Config { - name: stream_name.to_string(), - subjects: vec!["sensor.*".to_string()], - allow_direct: true, - ..Default::default() - }) - .await?; - - // Publish messages at different times - println!("Publishing messages with time gaps:"); - - // Publish first batch - let _start_time = SystemTime::now(); - for i in 0..5 { - context - .publish("sensor.temperature", format!("temp reading {}", i).into()) - .await? - .await?; - println!(" Published: temp reading {} at start", i); - } - - // Wait 2 seconds - println!("\n Waiting 2 seconds...\n"); - tokio::time::sleep(Duration::from_secs(2)).await; - let mid_time = OffsetDateTime::now_utc(); - - // Publish second batch - for i in 5..10 { - context - .publish("sensor.temperature", format!("temp reading {}", i).into()) - .await? - .await?; - println!(" Published: temp reading {} at +2s", i); - } - - // Wait another 2 seconds - println!("\n Waiting 2 seconds...\n"); - tokio::time::sleep(Duration::from_secs(2)).await; - let late_time = OffsetDateTime::now_utc(); - - // Publish third batch - for i in 10..15 { - context - .publish("sensor.temperature", format!("temp reading {}", i).into()) - .await? - .await?; - println!(" Published: temp reading {} at +4s", i); - } - - // Example 1: Fetch all messages - println!("\n=== Example 1: Fetch all messages ==="); - let mut messages = context.get_batch(stream_name, 20).send().await?; - - let mut all_count = 0; - while let Some(msg) = messages.next().await { - let _msg = msg?; - all_count += 1; - } - println!("Total messages in stream: {}", all_count); - - // Example 2: Fetch messages after mid_time (should get last 10) - println!("\n=== Example 2: Fetch messages after +2s mark ==="); - let mut messages = context - .get_batch(stream_name, 20) - .start_time(mid_time) - .send() - .await?; - - let mut mid_count = 0; - while let Some(msg) = messages.next().await { - let msg = msg?; - let payload = String::from_utf8(msg.payload.to_vec())?; - println!(" {}: {}", msg.sequence, payload); - mid_count += 1; - } - println!("Messages after +2s: {} (expected 10)", mid_count); - - // Example 3: Fetch messages after late_time (should get last 5) - println!("\n=== Example 3: Fetch messages after +4s mark ==="); - let mut messages = context - .get_batch(stream_name, 20) - .start_time(late_time) - .send() - .await?; - - let mut late_count = 0; - while let Some(msg) = messages.next().await { - let msg = msg?; - let payload = String::from_utf8(msg.payload.to_vec())?; - println!(" {}: {}", msg.sequence, payload); - late_count += 1; - } - println!("Messages after +4s: {} (expected 5)", late_count); - - // Example 4: Use sequence starting point instead of time - println!("\n=== Example 4: Starting from sequence 8 ==="); - let mut messages = context - .get_batch(stream_name, 20) - .sequence(8) - .send() - .await?; - - let mut combo_count = 0; - while let Some(msg) = messages.next().await { - let msg = msg?; - let payload = String::from_utf8(msg.payload.to_vec())?; - println!(" Seq {}: {}", msg.sequence, payload); - combo_count += 1; - } - println!("Messages from seq 8: {}", combo_count); - - // Example 5: Get last messages up to a specific time - println!("\n=== Example 5: Get last messages for subjects up to +2s mark ==="); - let mut messages = context - .get_last_messages_for(stream_name) - .subjects(vec!["sensor.temperature".to_string()]) - .up_to_time(mid_time) - .send() - .await?; - - while let Some(msg) = messages.next().await { - let msg = msg?; - let payload = String::from_utf8(msg.payload.to_vec())?; - println!( - " Last message up to +2s: {} (seq {})", - payload, msg.sequence - ); - } - - // Example 6: Demonstrate nanosecond precision preservation - println!("\n=== Example 6: Time precision check ==="); - let _precise_time = SystemTime::now(); - - // Publish a message with precise timestamp - context - .publish("sensor.precision", "precision test".into()) - .await? - .await?; - - // Fetch it back - let mut messages = context - .get_batch(stream_name, 1) - .subject("sensor.precision") - .send() - .await?; - - if let Some(msg) = messages.next().await { - let msg = msg?; - println!(" Message timestamp: {:?}", msg.time); - println!( - " Nanosecond precision preserved: {}", - msg.time.nanosecond() - ); - } - - Ok(()) -} diff --git a/jetstream-extra/examples/batch_fetch_with_error_handling.rs b/jetstream-extra/examples/batch_fetch_with_error_handling.rs deleted file mode 100644 index 7909817..0000000 --- a/jetstream-extra/examples/batch_fetch_with_error_handling.rs +++ /dev/null @@ -1,123 +0,0 @@ -// Copyright 2025 Synadia Communications Inc. -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -use async_nats::jetstream::{self, stream}; -use futures::StreamExt; -use jetstream_extra::batch_fetch::{BatchFetchErrorKind, BatchFetchExt}; - -#[tokio::main] -async fn main() -> Result<(), Box> { - // Connect to NATS server - let client = async_nats::connect("localhost:4222").await?; - let context = jetstream::new(client); - - // Create a stream with direct access enabled - let stream_name = "EXAMPLE_STREAM"; - context - .create_stream(stream::Config { - name: stream_name.to_string(), - subjects: vec!["events.*".to_string()], - allow_direct: true, // Required for batch fetch - ..Default::default() - }) - .await?; - - // Publish some test messages - for i in 0..10 { - context - .publish( - format!("events.type{}", i % 3), - format!("message {}", i).into(), - ) - .await? - .await?; - } - - // Example 1: Handle batch size limit errors - println!("Example 1: Batch size limit validation"); - match context.get_batch(stream_name, 200).send().await { - Ok(_) => println!("Unexpected success"), - Err(e) if e.kind() == BatchFetchErrorKind::BatchSizeTooLarge => { - println!("✓ Correctly rejected batch size > 1000: {}", e); - } - Err(e) => println!("Unexpected error: {}", e), - } - - // Example 2: Handle empty stream name - println!("\nExample 3: Empty stream name validation"); - match context.get_batch("", 10).send().await { - Ok(_) => println!("Unexpected success"), - Err(e) if e.kind() == BatchFetchErrorKind::InvalidStreamName => { - println!("✓ Correctly rejected empty stream name: {}", e); - } - Err(e) => println!("Unexpected error: {}", e), - } - - // Example 4: Successful batch fetch with error handling - println!("\nExample 4: Successful batch fetch with error handling"); - let mut messages = context - .get_batch(stream_name, 5) - .subject("events.type0") - .send() - .await?; - - let mut count = 0; - while let Some(result) = messages.next().await { - match result { - Ok(msg) => { - println!( - " Message seq {}: subject={}, payload_size={}", - msg.sequence, - msg.subject, - msg.payload.len() - ); - count += 1; - } - Err(e) => { - // Handle individual message errors - match e.kind() { - BatchFetchErrorKind::NoMessages => { - println!(" No more messages available"); - break; - } - BatchFetchErrorKind::UnsupportedByServer => { - println!(" Server doesn't support batch fetch"); - break; - } - _ => { - println!(" Error fetching message: {}", e); - } - } - } - } - } - println!(" Successfully fetched {} messages", count); - - // Example 5: Handle too many subjects in multi_last - println!("\nExample 5: Too many subjects validation"); - let many_subjects: Vec = (0..1025).map(|i| format!("events.{}", i)).collect(); - match context - .get_last_messages_for(stream_name) - .subjects(many_subjects) - .send() - .await - { - Ok(_) => println!("Unexpected success"), - Err(e) if e.kind() == BatchFetchErrorKind::TooManySubjects => { - println!("✓ Correctly rejected > 1024 subjects: {}", e); - } - Err(e) => println!("Unexpected error: {}", e), - } - - Ok(()) -} diff --git a/jetstream-extra/examples/batch_fetch_with_max_bytes.rs b/jetstream-extra/examples/batch_fetch_with_max_bytes.rs deleted file mode 100644 index 1a001d2..0000000 --- a/jetstream-extra/examples/batch_fetch_with_max_bytes.rs +++ /dev/null @@ -1,122 +0,0 @@ -// Copyright 2025 Synadia Communications Inc. -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -use async_nats::jetstream::{self, stream}; -use futures::StreamExt; -use jetstream_extra::batch_fetch::BatchFetchExt; - -#[tokio::main] -async fn main() -> Result<(), Box> { - // Connect to NATS server - let client = async_nats::connect("localhost:4222").await?; - let context = jetstream::new(client); - - // Create a stream - let stream_name = "BYTES_LIMIT_STREAM"; - context - .create_stream(stream::Config { - name: stream_name.to_string(), - subjects: vec!["data.*".to_string()], - allow_direct: true, - ..Default::default() - }) - .await?; - - // Publish messages of varying sizes - println!("Publishing messages of different sizes:"); - for i in 0..20 { - let size = (i + 1) * 100; // 100, 200, 300... bytes - let payload = "x".repeat(size); - context - .publish(format!("data.msg{}", i), payload.into()) - .await? - .await?; - println!(" Published message {} with {} bytes", i, size); - } - - // Example 1: Fetch messages with max_bytes limit - println!("\nFetching messages with max_bytes=1000:"); - let mut messages = context - .get_batch(stream_name, 20) - .max_bytes(1000) // But limit total bytes to 1000 - .send() - .await?; - - let mut total_bytes = 0; - let mut count = 0; - while let Some(msg) = messages.next().await { - let msg = msg?; - let payload_size = - base64::Engine::decode(&base64::engine::general_purpose::STANDARD, &msg.payload)?.len(); - total_bytes += payload_size; - count += 1; - println!( - " Message {}: {} bytes (total: {} bytes)", - msg.sequence, payload_size, total_bytes - ); - } - - println!( - "\nFetched {} messages totaling {} bytes (limit was 1000)", - count, total_bytes - ); - - // Example 2: Compare with unlimited fetch - println!("\nFetching same batch without byte limit:"); - let mut messages = context.get_batch(stream_name, 20).send().await?; - - let mut total_bytes_unlimited = 0; - let mut count_unlimited = 0; - while let Some(msg) = messages.next().await { - let msg = msg?; - let payload_size = - base64::Engine::decode(&base64::engine::general_purpose::STANDARD, &msg.payload)?.len(); - total_bytes_unlimited += payload_size; - count_unlimited += 1; - } - - println!( - "Without limit: {} messages, {} bytes", - count_unlimited, total_bytes_unlimited - ); - - // Example 3: Use max_bytes with subject filter - println!("\nCombining max_bytes with subject filter:"); - let mut messages = context - .get_batch(stream_name, 20) - .subject("data.msg1*") // Only msg10-19 - .max_bytes(2000) - .send() - .await?; - - let mut filtered_bytes = 0; - let mut filtered_count = 0; - while let Some(msg) = messages.next().await { - let msg = msg?; - let payload_size = - base64::Engine::decode(&base64::engine::general_purpose::STANDARD, &msg.payload)?.len(); - filtered_bytes += payload_size; - filtered_count += 1; - println!( - " {} (seq {}): {} bytes", - msg.subject, msg.sequence, payload_size - ); - } - - println!( - "\nFiltered fetch: {} messages, {} bytes", - filtered_count, filtered_bytes - ); - - Ok(()) -} diff --git a/jetstream-extra/examples/bench_fast_sweep.rs b/jetstream-extra/examples/bench_fast_sweep.rs index 36b296e..ecf5d18 100644 --- a/jetstream-extra/examples/bench_fast_sweep.rs +++ b/jetstream-extra/examples/bench_fast_sweep.rs @@ -78,7 +78,7 @@ async fn main() -> Result<(), Box> { median, p_min, p_max ); - if best.map_or(true, |(_, _, m)| median > m) { + if best.is_none_or(|(_, _, m)| median > m) { best = Some((flow, max_acks, median)); } } @@ -150,9 +150,10 @@ async fn run_once( let payload = || Bytes::from_static(payload_bytes); let started = Instant::now(); for i in 0..(TOTAL - 1) { - batch.add(subject.clone(), payload()).await.map_err(|e| { - format!("add {i} failed (flow={flow}, max={max_acks}): {e:?}") - })?; + batch + .add(subject.clone(), payload()) + .await + .map_err(|e| format!("add {i} failed (flow={flow}, max={max_acks}): {e:?}"))?; } let pub_ack = batch .commit(subject.clone(), payload()) diff --git a/jetstream-extra/examples/bench_normal_vs_fast.rs b/jetstream-extra/examples/bench_normal_vs_fast.rs index a90c245..13712b1 100644 --- a/jetstream-extra/examples/bench_normal_vs_fast.rs +++ b/jetstream-extra/examples/bench_normal_vs_fast.rs @@ -82,7 +82,10 @@ fn print_row(label: &str, s: Stats) { let secs = s.elapsed.as_secs_f64(); let msgs_per_sec = TOTAL as f64 / secs; let mb_per_sec = (TOTAL as f64 * PAYLOAD.len() as f64) / secs / 1_048_576.0; - println!("{label} {:>8.3}s {:>8.0} {:>6.2}", secs, msgs_per_sec, mb_per_sec); + println!( + "{label} {:>8.3}s {:>8.0} {:>6.2}", + secs, msgs_per_sec, mb_per_sec + ); } fn payload() -> Bytes { @@ -154,9 +157,7 @@ async fn bench_async(js: &jetstream::Context) -> Result Result> { +async fn bench_core_then_js(js: &jetstream::Context) -> Result> { println!("\n[core+js] R1 stream, core publish first N-1 + js.publish last"); create_stream(js, "BENCH_CORE", "bench.core").await?; let nc = js.clone(); @@ -208,7 +209,11 @@ async fn bench_atomic(js: &jetstream::Context) -> Result Result Result<(), Box> { - let client = async_nats::connect("nats://127.0.0.1:4222").await?; - let js = async_nats::jetstream::new(client); - - // Create the stream via raw JSON (async-nats 0.45.0 doesn't know about - // allow_batched yet). - let _ = js.delete_stream("METRICS").await; - let body = json!({ - "name": "METRICS", - "subjects": ["metrics.>"], - "retention": "limits", - "storage": "file", - "allow_batched": true, - }); - let resp: serde_json::Value = js.request("STREAM.CREATE.METRICS", &body).await?; - if resp.get("error").is_some() { - return Err(format!("STREAM.CREATE failed: {resp}").into()); - } - println!("created stream METRICS with allow_batched=true"); - - // Build the publisher. Low flow + max=2 means acks arrive every 50 - // messages and up to 100 are in flight at once — exercises the stall - // gate and auto-ping path on a realistic load. - let mut batch = js - .fast_publish() - .flow(50) - .max_outstanding_acks(2) - .gap_mode(GapMode::Fail) - .ack_timeout(Duration::from_secs(10)) - .on_error(|err| eprintln!("fast publish event: {err}")) - .build()?; - - println!("publishing 500 messages ..."); - for i in 0..500 { - let ack = batch - .add("metrics.cpu", format!("sample {i}").into()) - .await?; - if i % 50 == 49 { - println!( - " published seq={} (server acked up to {})", - ack.batch_sequence, ack.ack_sequence - ); - } - } - - // End-of-batch commit: the commit message itself is NOT stored, so the - // final stream state has exactly 500 messages. - let pub_ack = batch.close().await?; - println!( - "batch committed: stream={}, batch_size={}, batch_id={}", - pub_ack.stream, pub_ack.batch_size, pub_ack.batch_id - ); - - // Verify what landed on the stream. - let stream = js.get_stream("METRICS").await?; - let info = stream.get_info().await?; - println!("stream state: {} messages", info.state.messages); - assert_eq!(info.state.messages, 500); - - Ok(()) -} diff --git a/jetstream-extra/examples/test_batch_fetch.rs b/jetstream-extra/examples/test_batch_fetch.rs deleted file mode 100644 index 2e215d4..0000000 --- a/jetstream-extra/examples/test_batch_fetch.rs +++ /dev/null @@ -1,27 +0,0 @@ -use async_nats::jetstream; -use futures::StreamExt; -use jetstream_extra::batch_fetch::BatchFetchExt; - -#[tokio::main] -async fn main() -> Result<(), Box> { - // Connect to NATS - let client = async_nats::connect("localhost:4222").await?; - let context = jetstream::new(client); - - // Try to get a batch (will fail because server doesn't support yet) - match context.get_batch("test_stream", 20).send().await { - Ok(mut stream) => { - while let Some(msg) = stream.next().await { - match msg { - Ok(m) => println!("Got message: seq={}", m.sequence), - Err(e) => println!("Stream error: {:?}", e), - } - } - } - Err(e) => { - println!("Expected error (server doesn't support batch get): {:?}", e); - } - } - - Ok(()) -} diff --git a/jetstream-extra/src/batch_publish_fast.rs b/jetstream-extra/src/batch_publish_fast.rs index 44167d4..446e75f 100644 --- a/jetstream-extra/src/batch_publish_fast.rs +++ b/jetstream-extra/src/batch_publish_fast.rs @@ -41,8 +41,8 @@ use std::{ time::Duration, }; -use async_nats::jetstream::message::OutboundMessage; use async_nats::Subject; +use async_nats::jetstream::message::OutboundMessage; use async_nats::subject::ToSubject; use bytes::Bytes; use futures::StreamExt; @@ -51,10 +51,6 @@ use serde::Deserialize; use crate::batch_publish::BatchPubAck; -// --------------------------------------------------------------------------- -// Public enums -// --------------------------------------------------------------------------- - /// How the server should handle gaps in the batch sequence. /// /// A gap means one or more messages in the batch were lost in transit between @@ -94,10 +90,6 @@ pub(crate) enum Operation { Ping = 4, } -// --------------------------------------------------------------------------- -// Errors -// --------------------------------------------------------------------------- - /// Error type for fast-ingest batch publish operations. pub type FastPublishError = async_nats::error::Error; @@ -202,10 +194,6 @@ impl Display for FastPublishErrorKind { } } -// --------------------------------------------------------------------------- -// Wire protocol structs (server → client) -// --------------------------------------------------------------------------- - /// Flow-control message sent by the server when a batch of messages has been /// persisted. Wire tag: `"type":"ack"`. #[derive(Debug, Clone, Copy, Deserialize, PartialEq, Eq)] @@ -301,10 +289,6 @@ pub(crate) fn classify(payload: &[u8]) -> Result { }) } -// --------------------------------------------------------------------------- -// Reply subject construction -// --------------------------------------------------------------------------- - /// Build the stable prefix of the reply subject: /// `...` /// @@ -352,10 +336,6 @@ pub(crate) fn validate_inbox_shape(inbox: &str) -> Result<(), FastPublishError> Ok(()) } -// --------------------------------------------------------------------------- -// Stall formula -// --------------------------------------------------------------------------- - /// Decide whether the publisher should stall before sending the next message. /// /// Matches the canonical ADR-50 / orbit.go form: wait iff @@ -388,10 +368,6 @@ pub(crate) fn should_stall( window <= next_sequence } -// --------------------------------------------------------------------------- -// Builder knobs -// --------------------------------------------------------------------------- - /// Default initial flow (ack-every-N) requested from the server. pub(crate) const DEFAULT_FLOW: u16 = 100; @@ -404,10 +380,6 @@ pub(crate) const MIN_MAX_OUTSTANDING_ACKS: u16 = 1; /// Maximum allowed value for `max_outstanding_acks` (ADR-50 recommends 1..=3). pub(crate) const MAX_MAX_OUTSTANDING_ACKS: u16 = 3; -// --------------------------------------------------------------------------- -// Extension trait -// --------------------------------------------------------------------------- - /// Extension trait adding [`fast_publish`](FastPublishExt::fast_publish) to any /// JetStream-context-like type. /// @@ -442,10 +414,6 @@ impl FastPublishExt for T where { } -// --------------------------------------------------------------------------- -// Builder -// --------------------------------------------------------------------------- - /// Callback type for asynchronous fast-publish errors (gaps, flow errors, /// per-message server errors). Invoked synchronously on the publisher's task /// whenever such an event is drained from the inbox. Keep the callback fast @@ -587,10 +555,6 @@ impl FastPublisherBuilder { } } -// --------------------------------------------------------------------------- -// FastPublisher -// --------------------------------------------------------------------------- - /// A non-atomic, high-throughput JetStream batch publisher using the /// fast-ingest protocol (ADR-50, nats-server 2.14+). ///