From 5e33a10df1611df8ce33812f1243c0457878dca4 Mon Sep 17 00:00:00 2001 From: Iain McGinniss <309153+iainmcgin@users.noreply.github.com> Date: Tue, 26 May 2026 19:31:00 +0000 Subject: [PATCH] codegen: propagate bytes_fields to map values (#76) (#147) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ## Summary - Under `use_bytes_type()` (or `use_bytes_type_in(...)`), `map` values now become `bytes::Bytes` instead of `Vec` — the lone hold-out among bytes contexts. This lets map values participate in the `to_owned_from_source` zero-copy `slice_ref` path that singular / optional / repeated / oneof bytes_fields already use. - One intentional carve-out: `map` (only reachable via `strict_utf8_mapping(true)`) keeps `Vec` values because the existing `bytes_key_bytes_val_map` JSON helper is concrete `HashMap, Vec>`. Documented on both knobs. - Generated `Arbitrary` impls for the affected map fields go through a new `__private::arbitrary_bytes_map` shim (`K: Arbitrary + Eq + Hash` — every proto map-key type qualifies). No turbofish needed at the call site. - **Breaking** for code already using `use_bytes_type()` on a proto containing `map`: rewrite value construction from `Vec` to `bytes::Bytes` (`b"v".to_vec()` → `bytes::Bytes::from_static(b"v")`). Carve-out logic lives in `classify_field::map_value_use_bytes` (the source of truth) and is mirrored in `impl_message::map_merge_arm`, `impl_text::map_merge_arm`, and `view::map_to_owned_expr` with parallel "see classify_field" comments. Closes #76 (item a). Item c (`checked_add` in `bytes_from_source`) was already done in #84; item d (`string_fields` shared-buffer strings) remains for a future PR. Net diff ≈ 120 lines excluding tests. ## Test plan - [x] Updated `test_bytes_type_map_value_stays_vec` → `test_bytes_type_map_value_uses_bytes` (positive type assertion plus wire / view→owned / JSON / text agreement). - [x] New `test_bytes_type_map_value_to_owned_from_source_zero_copy` asserts `slice_ref` aliasing into the source `Bytes` buf for map values — same property the singular/repeated/oneof tests pin. - [x] Updated `test_bytes_type_json_all_contexts_roundtrip` + `test_bytes_type_view_encode_roundtrip` to construct map values as `Bytes`. - [x] `cargo test --workspace` — 28 test suites pass, no failures. - [x] `cargo clippy --workspace --all-targets -- -D warnings` — clean. - [x] `cargo fmt --all --check` — clean. - [x] `cargo check -p buffa --no-default-features` (host no_std) — clean. - [x] `task gen-wkt-types` and `gen-bootstrap-types` produce no diff (neither set has `map` fields). - [x] Pre-commit `rust-code-reviewer` + `rust-api-ergonomics-reviewer` agents per CLAUDE.md — no Critical findings; High (CHANGELOG entry) and Medium (docstring updates) addressed in this PR. Co-authored-by: senthil sivasubramanian <2329608+senthil1216@users.noreply.github.com> --- CHANGELOG.md | 24 ++++++ buffa-build/src/lib.rs | 34 ++++++++- buffa-codegen/src/impl_message.rs | 50 ++++++++++++- buffa-codegen/src/impl_text.rs | 15 +++- buffa-codegen/src/message.rs | 62 +++++++++++++--- buffa-codegen/src/view.rs | 31 ++++++-- buffa-test/build.rs | 21 ++++++ buffa-test/protos/basic.proto | 7 +- buffa-test/protos/utf8_validation.proto | 5 ++ buffa-test/src/lib.rs | 17 +++++ buffa-test/src/tests/arbitrary_bytes.rs | 5 ++ buffa-test/src/tests/bytes_type.rs | 99 ++++++++++++++++++++++--- buffa/src/lib.rs | 20 +++++ 13 files changed, 348 insertions(+), 42 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 2d847fc..ffba896 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -6,6 +6,30 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/), ## [Unreleased] +### Changed + +- **`use_bytes_type()` / `use_bytes_type_in(...)` now applies to `map` + values (#76).** Previously map values were always `Vec` regardless of + config — the only `bytes`-context not covered. They now match the type used + for singular / optional / repeated / oneof bytes fields under the same rule + (`bytes::Bytes` when configured), so `view → owned` conversion of map values + participates in the `to_owned_from_source` zero-copy `slice_ref` path just + like the other shapes. **Breaking** for code that already enabled + `use_bytes_type()` on a proto containing `map`: at construction + sites, rewrite map-value construction from `Vec` to `bytes::Bytes` + (`b"v".to_vec()` → `bytes::Bytes::from_static(b"v")` for literals, + `bytes::Bytes::from(v)` for an owned `Vec`, or + `bytes::Bytes::copy_from_slice(s)` for a non-`'static` borrow). At read sites, + `bytes::Bytes` has no inherent `as_slice`, so any `as_slice()` on the value + needs replacing — e.g. `map.get(k).map(Vec::as_slice)` becomes + `map.get(k).map(|b| &b[..])`. One carve-out: an effective `map` + keeps `Vec` values; this requires `strict_utf8_mapping(true)` *and* a + `map` whose key carries `[features.utf8_validation = NONE]` + (`strict_utf8_mapping` alone keeps a plain `map` value as + `Bytes`). See the `use_bytes_type_in` docs. Under `generate_arbitrary`, + affected map fields use the new `__private::arbitrary_bytes_map` shim + (`K: Arbitrary + Eq + Hash` — every proto map-key type satisfies this). + ### Fixed - **Module redefinition error when a message and a sub-package share a name diff --git a/buffa-build/src/lib.rs b/buffa-build/src/lib.rs index af7b29e..2cea96e 100644 --- a/buffa-build/src/lib.rs +++ b/buffa-build/src/lib.rs @@ -408,6 +408,15 @@ impl Config { /// **JSON note**: fields normalized to bytes serialize as base64 in JSON /// (the proto3 JSON encoding for `bytes`). Keep strict mapping disabled /// for fields that need JSON string interop with other implementations. + /// + /// **Interaction with [`use_bytes_type`]**: when both are enabled, + /// `map` values stay `Vec` (the bytes-keyed JSON helper + /// is concrete `HashMap, Vec>`). All other `bytes` shapes — + /// singular / optional / repeated / oneof / `map` — + /// still become `bytes::Bytes`. The asymmetry is documented; if you hit + /// it, see issue #76. + /// + /// [`use_bytes_type`]: Self::use_bytes_type #[must_use] pub fn strict_utf8_mapping(mut self, enabled: bool) -> Self { self.codegen_config.strict_utf8_mapping = enabled; @@ -471,11 +480,24 @@ impl Config { /// to all bytes fields, or specify individual field paths like /// `".my.pkg.MyMessage.data"`. /// + /// Applies uniformly to singular, optional, repeated, oneof, **and + /// `map`** values — the map case lets `view → owned` + /// conversion participate in the `to_owned_from_source` zero-copy + /// `slice_ref` path. One carve-out: an effective `map` keeps + /// `Vec` values (the JSON helper for that combination is concrete + /// `HashMap, Vec>`); every other shape becomes `Bytes`. A + /// `bytes` map key is only reachable when [`strict_utf8_mapping`] is enabled + /// *and* the `map` field carries + /// `[features.utf8_validation = NONE]` on its key, which normalizes the + /// string key to `bytes` — `strict_utf8_mapping` alone does not trigger it. + /// + /// [`strict_utf8_mapping`]: Self::strict_utf8_mapping + /// /// # Example /// /// ```rust,ignore /// buffa_build::Config::new() - /// .bytes(&["."]) // all bytes fields use Bytes + /// .use_bytes_type_in(&["."]) // all bytes fields use Bytes /// .files(&["proto/my_service.proto"]) /// .includes(&["proto/"]) /// .compile() @@ -491,8 +513,14 @@ impl Config { /// Use `bytes::Bytes` for all `bytes` fields in all messages. /// - /// This is a convenience for `.use_bytes_type_in(&["."])`. Use `.use_bytes_type_in(&[...])` with - /// specific proto paths if you only want `Bytes` for certain fields. + /// This is a convenience for `.use_bytes_type_in(&["."])`. Use + /// [`use_bytes_type_in`] with specific proto paths if you only want `Bytes` + /// for certain fields. See that method for the path-matching semantics, the + /// `map` rule, and the `map` carve-out under + /// [`strict_utf8_mapping`]. + /// + /// [`use_bytes_type_in`]: Self::use_bytes_type_in + /// [`strict_utf8_mapping`]: Self::strict_utf8_mapping #[must_use] pub fn use_bytes_type(mut self) -> Self { self.codegen_config.bytes_fields.push(".".to_string()); diff --git a/buffa-codegen/src/impl_message.rs b/buffa-codegen/src/impl_message.rs index 98c3144..89e7d70 100644 --- a/buffa-codegen/src/impl_message.rs +++ b/buffa-codegen/src/impl_message.rs @@ -438,7 +438,7 @@ pub fn generate_message_impl( FieldKind::Map(f) => { compute_stmts.push(map_compute_size_stmt(ctx, msg, f, features)?); write_stmts.push(map_write_to_stmt(ctx, msg, f, features)?); - merge_arms.push(map_merge_arm(ctx, msg, f, features)?); + merge_arms.push(map_merge_arm(ctx, msg, f, proto_fqn, features)?); clear_stmts.push(vec_field_clear_stmt(f)?); } FieldKind::Oneof { @@ -849,6 +849,36 @@ pub(crate) fn field_uses_bytes(ctx: &CodeGenContext, proto_fqn: &str, field_name ctx.use_bytes_type(&field_fqn) } +/// Whether a `map` value field should use `bytes::Bytes` instead of +/// `Vec`. +/// +/// Single source of truth for the `bytes_fields` → map-value rule, shared by +/// `classify_field` (owned struct type, serde, and `arbitrary`), the binary and +/// text `map_merge_arm` decoders, and `view::map_to_owned_expr`. Centralizing it +/// keeps every site in agreement — a split decision would emit a `Bytes` value +/// on one side and `Vec` on another, surfacing only as a compile error in +/// the consuming crate. +/// +/// `key_ty` / `val_ty` are the **effective** map-entry types (see +/// [`effective_type_in_map_entry`]); `None` means the entry lacks that field and +/// is treated as non-`bytes`, so a non-map caller naturally yields `false`. The +/// value becomes `Bytes` only when the value is proto `bytes`, the outer map +/// field matches `bytes_fields`, and the key is *not* effective-`bytes`. The key +/// carve-out tracks the one JSON helper (`bytes_key_bytes_val_map`) whose +/// signature is the concrete `HashMap, Vec>`; when the key is +/// effective-`bytes` the value must stay `Vec` to match it. +pub(crate) fn map_value_use_bytes( + ctx: &CodeGenContext, + key_ty: Option, + val_ty: Option, + proto_fqn: &str, + field_name: &str, +) -> bool { + val_ty == Some(Type::TYPE_BYTES) + && key_ty != Some(Type::TYPE_BYTES) + && field_uses_bytes(ctx, proto_fqn, field_name) +} + /// Resolve the [`StringRepr`](crate::StringRepr) for a `string`-typed field. /// /// `proto_fqn` is the fully-qualified message name (no leading dot). Matched @@ -2616,11 +2646,17 @@ fn map_element_encode_stmt(ty: Type, tag_num: u32, var: &Ident) -> TokenStream { /// `buf_expr` is the token stream for the buffer expression — typically /// `quote! { buf }` when the buffer is the outer `merge_to_limit` parameter /// (already `&mut impl Buf`). +/// +/// `use_bytes` switches a `bytes`-typed element from `decode_bytes` (→ `Vec`) +/// to `decode_bytes_to_bytes` (→ `bytes::Bytes`, zero-copy when `buf` is +/// `Bytes`-backed). Only meaningful for value reads on `map` fields +/// when the outer field matches `bytes_fields`; keys are never bytes-typed. fn map_element_decode_stmt( ty: Type, var: &Ident, buf_expr: &TokenStream, features: &ResolvedFeatures, + use_bytes: bool, ) -> TokenStream { let wire_type = wire_type_token(ty); let wire_byte = wire_type_byte(ty); @@ -2636,6 +2672,9 @@ fn map_element_decode_stmt( let closed = is_closed_enum(features); let assign = match ty { Type::TYPE_STRING => quote! { #var = ::buffa::types::decode_string(#buf_expr)?; }, + Type::TYPE_BYTES if use_bytes => { + quote! { #var = ::buffa::types::decode_bytes_to_bytes(#buf_expr)?; } + } Type::TYPE_BYTES => quote! { #var = ::buffa::types::decode_bytes(#buf_expr)?; }, Type::TYPE_ENUM => { if closed { @@ -2772,6 +2811,7 @@ fn map_merge_arm( ctx: &CodeGenContext, msg: &DescriptorProto, field: &FieldDescriptorProto, + proto_fqn: &str, features: &ResolvedFeatures, ) -> Result { let field_name = field @@ -2790,8 +2830,12 @@ fn map_merge_arm( // referenced enum's declaration (not the parent message's). let key_features = crate::features::resolve_field(ctx, key_fd, features); let val_features = crate::features::resolve_field(ctx, val_fd, features); - let decode_key = map_element_decode_stmt(key_ty, &k, &buf_expr, &key_features); - let decode_val = map_element_decode_stmt(val_ty, &v, &buf_expr, &val_features); + // `bytes_fields` on `map` → value decodes into `Bytes` (shared + // carve-out in `map_value_use_bytes`; bytes-key + bytes-value stays `Vec`). + let value_use_bytes = + map_value_use_bytes(ctx, Some(key_ty), Some(val_ty), proto_fqn, field_name); + let decode_key = map_element_decode_stmt(key_ty, &k, &buf_expr, &key_features, false); + let decode_val = map_element_decode_stmt(val_ty, &v, &buf_expr, &val_features, value_use_bytes); let wire_check = wire_type_check( field_number, "e! { ::buffa::encoding::WireType::LengthDelimited }, diff --git a/buffa-codegen/src/impl_text.rs b/buffa-codegen/src/impl_text.rs index f3cdc38..da98753 100644 --- a/buffa-codegen/src/impl_text.rs +++ b/buffa-codegen/src/impl_text.rs @@ -27,7 +27,7 @@ use crate::idents::rust_path_to_tokens; use crate::impl_message::{ effective_type, effective_type_in_map_entry, field_string_repr, field_uses_bytes, find_map_entry_fields, is_explicit_presence_scalar, is_non_default_expr, is_real_oneof_member, - is_required_field, is_supported_field_type, + is_required_field, is_supported_field_type, map_value_use_bytes, }; use crate::message::{is_closed_enum, is_map_field, make_field_ident}; use crate::oneof::is_boxed_variant; @@ -215,7 +215,7 @@ pub(crate) fn generate_text_impl( } let map_merge: Vec<_> = map_fields .iter() - .map(|f| map_merge_arm(ctx, msg, f, current_package, features, nesting)) + .map(|f| map_merge_arm(ctx, msg, f, current_package, proto_fqn, features, nesting)) .collect::>()?; // Extension bracket syntax `[pkg.ext] { ... }` — encode side writes @@ -978,6 +978,7 @@ fn map_merge_arm( msg: &DescriptorProto, field: &FieldDescriptorProto, current_package: &str, + proto_fqn: &str, features: &ResolvedFeatures, nesting: usize, ) -> Result { @@ -1007,8 +1008,11 @@ fn map_merge_arm( } }; - // Value read. Map values are never `bytes::Bytes` (see basic.proto - // comment: map_rust_type_from_entry unconditionally uses Vec). + // `bytes_fields` on `map` → value type is `Bytes`; convert + // `read_bytes()`'s `Vec` via `Bytes::from`. The bytes-key carve-out + // lives in the shared `map_value_use_bytes` predicate. + let value_use_bytes = + map_value_use_bytes(ctx, Some(key_ty), Some(val_ty), proto_fqn, proto_name); let val_read = match val_ty { Type::TYPE_MESSAGE => quote! { { @@ -1024,6 +1028,9 @@ fn map_merge_arm( quote! { #read? } } Type::TYPE_STRING => quote! { __d.read_string()?.into_owned() }, + Type::TYPE_BYTES if value_use_bytes => { + quote! { ::buffa::bytes::Bytes::from(__d.read_bytes()?) } + } Type::TYPE_BYTES => quote! { __d.read_bytes()? }, Type::TYPE_INT32 | Type::TYPE_SINT32 | Type::TYPE_SFIXED32 => quote! { __d.read_i32()? }, Type::TYPE_INT64 | Type::TYPE_SINT64 | Type::TYPE_SFIXED64 => quote! { __d.read_i64()? }, diff --git a/buffa-codegen/src/message.rs b/buffa-codegen/src/message.rs index d6a6ca7..68539a1 100644 --- a/buffa-codegen/src/message.rs +++ b/buffa-codegen/src/message.rs @@ -1325,6 +1325,11 @@ struct FieldInfo { /// True when the field is proto type `bytes` AND matches the `bytes_fields` /// config — i.e. the struct field type is `bytes::Bytes` not `Vec`. use_bytes: bool, + /// True when the field is `map` AND the outer map field matches + /// the `bytes_fields` config — i.e. the map value type is `bytes::Bytes` + /// not `Vec`. Mutually exclusive with `use_bytes` (a map field's outer + /// type is `MESSAGE`, not `BYTES`). + map_value_use_bytes: bool, /// The owned Rust type used for this field when it is proto type `string` /// (singular, optional, or repeated; map keys/values are unaffected). /// [`StringRepr::String`] for non-string fields and for string fields with @@ -1383,6 +1388,21 @@ fn classify_field( let field_fqn = format!(".{}.{}", proto_fqn, field_name); let use_bytes = field_type == Type::TYPE_BYTES && ctx.use_bytes_type(&field_fqn); + // For `map`, the outer field type is MESSAGE (synthetic entry), + // so `use_bytes` is false; the value type is decided by the shared + // `map_value_use_bytes` predicate (also used by the binary/text decoders and + // the view→owned conversion, so all sites stay in agreement). The bytes-key + // carve-out is documented there. + let map_value_use_bytes = map_entry.is_some_and(|e| { + crate::impl_message::map_value_use_bytes( + ctx, + map_entry_key_type(ctx, e, features), + map_entry_value_type(ctx, e, features), + proto_fqn, + field_name, + ) + }); + let bytes_type = if use_bytes { quote! { ::buffa::bytes::Bytes } } else { @@ -1391,8 +1411,9 @@ fn classify_field( }; // Configurable owned representation for `string` fields (default `String`). - // Map keys/values are unaffected — they always use `String`, mirroring the - // bytes path where map values always stay `Vec`. + // Map keys/values are unaffected — they always use `String`. (The bytes + // path now propagates `bytes_fields` to map values via + // `map_value_use_bytes`; the string path has no equivalent yet.) let string_repr = if field_type == Type::TYPE_STRING { ctx.string_repr(&field_fqn) } else { @@ -1402,7 +1423,7 @@ fn classify_field( let mut inner_opt_type: Option = None; let rust_type = if let Some(entry) = map_entry { - map_rust_type_from_entry(scope, entry, resolver)? + map_rust_type_from_entry(scope, entry, map_value_use_bytes, resolver)? } else if is_repeated { let elem = if field_type == Type::TYPE_BYTES { bytes_type @@ -1493,6 +1514,7 @@ fn classify_field( is_optional, is_required, use_bytes, + map_value_use_bytes, string_repr, map_key_type, map_value_type, @@ -1570,7 +1592,6 @@ fn generate_field( }; let custom_field_attrs = CodeGenContext::matching_attributes(&ctx.config.field_attributes, &field_fqn)?; - // bytes_fields map values are Vec, not Bytes — no shim needed there. let arbitrary_field_attr = if ctx.config.generate_arbitrary && info.use_bytes && !info.is_map { let helper = if info.is_optional { quote! { ::buffa::__private::arbitrary_bytes_opt } @@ -1580,6 +1601,11 @@ fn generate_field( quote! { ::buffa::__private::arbitrary_bytes } }; quote! { #[cfg_attr(feature = "arbitrary", arbitrary(with = #helper))] } + } else if ctx.config.generate_arbitrary && info.map_value_use_bytes { + // `HashMap` has no native `Arbitrary` impl (`Bytes` lacks + // one); generate a per-key-type shim that builds `HashMap>` + // first and maps values to `Bytes`. Wire-equivalent. + quote! { #[cfg_attr(feature = "arbitrary", arbitrary(with = ::buffa::__private::arbitrary_bytes_map))] } } else if ctx.config.generate_arbitrary && info.string_repr == crate::StringRepr::EcoString && !info.is_map @@ -1700,9 +1726,14 @@ fn map_entry_value_type( } /// Build the `HashMap` Rust type from an already-resolved map entry descriptor. +/// +/// `value_use_bytes` overrides the default `Vec` for a `bytes`-valued +/// map when the outer map field matches `bytes_fields`. Computed by the +/// caller (`classify_field`) so map and non-map paths share the same check. fn map_rust_type_from_entry( scope: MessageScope<'_>, entry: &DescriptorProto, + value_use_bytes: bool, resolver: &crate::imports::ImportResolver, ) -> Result { let MessageScope { @@ -1731,14 +1762,21 @@ fn map_rust_type_from_entry( features, resolver, )?; - let value_type = scalar_or_message_type_nested( - ctx, - value_field, - current_package, - nesting, - features, - resolver, - )?; + let value_type = if value_use_bytes + && crate::impl_message::effective_type_in_map_entry(ctx, value_field, features) + == Type::TYPE_BYTES + { + quote! { ::buffa::bytes::Bytes } + } else { + scalar_or_message_type_nested( + ctx, + value_field, + current_package, + nesting, + features, + resolver, + )? + }; let hm = resolver.hashmap(); Ok(quote! { #hm<#key_type, #value_type> }) diff --git a/buffa-codegen/src/view.rs b/buffa-codegen/src/view.rs index 744a85f..dc3cc92 100644 --- a/buffa-codegen/src/view.rs +++ b/buffa-codegen/src/view.rs @@ -20,8 +20,8 @@ use crate::impl_message::{ closed_enum_decode, closed_enum_decode_with_unknown, decode_fn_token, effective_type, effective_type_in_map_entry, field_string_repr, field_uses_bytes, find_map_entry_fields, is_explicit_presence_scalar, is_packed_type, is_real_oneof_member, is_required_field, - is_supported_field_type, validated_field_number, wire_type_byte, wire_type_check, - wire_type_token, + is_supported_field_type, map_value_use_bytes, validated_field_number, wire_type_byte, + wire_type_check, wire_type_token, }; use crate::message::{is_closed_enum, is_map_field, make_field_ident, rust_path_to_tokens}; use crate::oneof::{is_null_value_field, serde_helper_path}; @@ -1569,18 +1569,39 @@ fn map_to_owned_expr( field: &FieldDescriptorProto, ident: &proc_macro2::Ident, ) -> Result { - let MessageScope { ctx, features, .. } = scope; + let MessageScope { + ctx, + proto_fqn, + features, + .. + } = scope; let (key_fd, val_fd) = find_map_entry_fields(msg, field)?; + let field_name = field + .name + .as_deref() + .ok_or(CodeGenError::MissingField("field.name"))?; + let key_ty = effective_type_in_map_entry(ctx, key_fd, features); + let val_ty = effective_type_in_map_entry(ctx, val_fd, features); - let key_conv = match effective_type_in_map_entry(ctx, key_fd, features) { + let key_conv = match key_ty { Type::TYPE_STRING => quote! { k.to_string() }, // utf8_validation = NONE on a string map key: &[u8] → Vec. Type::TYPE_BYTES => quote! { k.to_vec() }, _ => quote! { *k }, }; - let val_conv = match effective_type_in_map_entry(ctx, val_fd, features) { + // `bytes_fields` on the outer map field promotes `bytes` values to `Bytes`, + // matching the owned-side map type (shared carve-out in `map_value_use_bytes`). + // When it holds, emit `bytes_from_source` directly: the predicate already + // implies `field_uses_bytes`, so routing through `bytes_to_owned` (which + // re-checks it) would be redundant. + let value_use_bytes = + map_value_use_bytes(ctx, Some(key_ty), Some(val_ty), proto_fqn, field_name); + let val_conv = match val_ty { Type::TYPE_STRING => quote! { v.to_string() }, + Type::TYPE_BYTES if value_use_bytes => { + quote! { ::buffa::view::bytes_from_source(__buffa_src, v) } + } Type::TYPE_BYTES => quote! { v.to_vec() }, Type::TYPE_MESSAGE => { // Verify the owned path resolves (catches missing imports at codegen time). diff --git a/buffa-test/build.rs b/buffa-test/build.rs index 5ac1697..0cef5a7 100644 --- a/buffa-test/build.rs +++ b/buffa-test/build.rs @@ -215,6 +215,27 @@ fn main() { .compile() .expect("buffa_build failed for basic.proto with use_bytes_type"); + // Carve-out (#76): a `map` whose key carries + // `[features.utf8_validation = NONE]`, compiled with BOTH + // strict_utf8_mapping() and use_bytes_type(). strict_utf8_mapping normalizes + // the NONE-validated string key to an effective `bytes` key, so the entry is + // an effective `map`, whose JSON helper + // (`bytes_key_bytes_val_map`) is the concrete `HashMap, Vec>`. + // The value must therefore stay `Vec` despite use_bytes_type(), NOT + // promote to `Bytes`. This module pins that the predicate honors the carve-out. + let utf8_bytes_out = + std::path::PathBuf::from(std::env::var("OUT_DIR").unwrap()).join("utf8_bytes_variant"); + std::fs::create_dir_all(&utf8_bytes_out).expect("create utf8_bytes_variant dir"); + buffa_build::Config::new() + .files(&["protos/utf8_validation.proto"]) + .includes(&["protos/"]) + .strict_utf8_mapping(true) + .use_bytes_type() + .generate_json(true) + .out_dir(utf8_bytes_out) + .compile() + .expect("buffa_build failed for utf8_validation.proto with strict_utf8_mapping + use_bytes_type"); + // Configurable string_type: a broad SmolStr default plus per-field // CompactString / EcoString overrides, with generate_json + arbitrary so // every string code path (decode/clear/view/json/arbitrary, including the diff --git a/buffa-test/protos/basic.proto b/buffa-test/protos/basic.proto index 1ff6c6d..a8f9dbf 100644 --- a/buffa-test/protos/basic.proto +++ b/buffa-test/protos/basic.proto @@ -74,10 +74,9 @@ message BytesContexts { bytes raw = 4; string text = 5; } - // bytes_fields config does NOT apply to map values (map_rust_type_from_entry - // goes through scalar_rust_type which unconditionally uses Vec). This - // field pins that behaviour: if someone implements bytes_fields-for-maps on - // only one of {owned, view-to_owned}, the type mismatch compiles-out here. + // Pins `bytes_fields` → `map` behavior (#76). See + // `Config::use_bytes_type` docs for the rule and the `map` + // carve-out; tests in bytes_type.rs assert wire / view→owned / JSON parity. map by_key = 6; } diff --git a/buffa-test/protos/utf8_validation.proto b/buffa-test/protos/utf8_validation.proto index e91e8fd..c7d27a0 100644 --- a/buffa-test/protos/utf8_validation.proto +++ b/buffa-test/protos/utf8_validation.proto @@ -24,6 +24,11 @@ message StringNoValidation { // Map with NONE key and value → HashMap, Vec>. map raw_labels = 5 [features.utf8_validation = NONE]; + + // NONE applies to the synthetic string key (the bytes value is unaffected). + // Under strict_utf8_mapping + use_bytes_type this exercises the #76 carve-out: + // the effective `map` keeps `Vec` values, NOT `Bytes`. + map raw_blobs = 6 [features.utf8_validation = NONE]; } message OneofNoValidation { diff --git a/buffa-test/src/lib.rs b/buffa-test/src/lib.rs index 1272734..48d1755 100644 --- a/buffa-test/src/lib.rs +++ b/buffa-test/src/lib.rs @@ -265,6 +265,23 @@ pub mod basic_bytes { include!(concat!(env!("OUT_DIR"), "/bytes_variant/basic.mod.rs")); } +// Carve-out (#76): utf8_validation.proto with a NONE-keyed `map`, +// compiled with strict_utf8_mapping() + use_bytes_type(). The effective +// `map` keeps `Vec` values; runtime checks live in +// `tests/bytes_type.rs`. +#[allow( + clippy::derivable_impls, + clippy::match_single_binding, + non_camel_case_types, + dead_code +)] +pub mod utf8_bytes { + include!(concat!( + env!("OUT_DIR"), + "/utf8_bytes_variant/utf8test.mod.rs" + )); +} + // Regression #88: bytes_fields + generate_arbitrary(true). Compilation is the // primary assertion — all four bytes field shapes (singular, optional, // repeated, oneof variant) must compile with the arbitrary shims in place. diff --git a/buffa-test/src/tests/arbitrary_bytes.rs b/buffa-test/src/tests/arbitrary_bytes.rs index 594a373..120a803 100644 --- a/buffa-test/src/tests/arbitrary_bytes.rs +++ b/buffa-test/src/tests/arbitrary_bytes.rs @@ -35,5 +35,10 @@ mod tests { let _ = b.slice(..); } } + // `map` values are `Bytes` too under the shim — `slice(..)` + // is `Bytes`-specific, so this pins that the value type isn't `Vec`. + for b in msg.by_key.values() { + let _ = b.slice(..); + } } } diff --git a/buffa-test/src/tests/bytes_type.rs b/buffa-test/src/tests/bytes_type.rs index 20d2808..98e26b0 100644 --- a/buffa-test/src/tests/bytes_type.rs +++ b/buffa-test/src/tests/bytes_type.rs @@ -295,7 +295,9 @@ fn test_bytes_type_json_all_contexts_roundtrip() { bytes::Bytes::from_static(b""), ], choice: Some(ChoiceOneof::Raw(bytes::Bytes::from_static(&[0xDE, 0xAD]))), - by_key: [("k".to_string(), vec![0x05])].into_iter().collect(), + by_key: [("k".to_string(), bytes::Bytes::from_static(&[0x05]))] + .into_iter() + .collect(), ..Default::default() }; let json = serde_json::to_string(&msg).expect("serialize"); @@ -363,24 +365,97 @@ fn test_bytes_type_json_cross_decodes_external_json() { } #[test] -fn test_bytes_type_map_value_stays_vec() { - // bytes_fields config does NOT propagate into map key/value types - // (map_rust_type_from_entry → scalar_rust_type hardcodes Vec). - // map_to_owned_expr correspondingly uses .to_vec(), not bytes_to_owned(). - // This test pins that agreement: if one side changes, compilation breaks. +fn test_bytes_type_map_value_uses_bytes() { + // Issue #76: bytes_fields propagates into map values (was Vec, now Bytes). + // Owned type, view→owned, encode, JSON, text, and arbitrary all agree on + // `Bytes` as the value type when `use_bytes_type()` is configured. This + // test pins the agreement: if any side regresses, compilation breaks. use buffa::MessageView; let msg = BytesContexts { - by_key: [("k".to_string(), b"v".to_vec())].into_iter().collect(), + by_key: [("k".to_string(), bytes::Bytes::from_static(b"v"))] + .into_iter() + .collect(), ..Default::default() }; - // Type assertion: map value is Vec, not bytes::Bytes, even under + // Type assertion: map value is bytes::Bytes, not Vec, under // use_bytes_type(). - let _: &std::collections::HashMap> = &msg.by_key; + let _: &std::collections::HashMap = &msg.by_key; let wire = msg.encode_to_vec(); let view = BytesContextsView::decode_view(&wire).expect("decode_view"); let owned: BytesContexts = view.to_owned_message(); - assert_eq!(owned.by_key.get("k").map(Vec::as_slice), Some(&b"v"[..])); + assert_eq!(owned.by_key.get("k").map(|b| &b[..]), Some(&b"v"[..])); + + // Owned binary decode (impl_message::map_merge_arm's decode_bytes_to_bytes + // arm), distinct from the view→owned path above. + let decoded = BytesContexts::decode(&mut wire.as_slice()).expect("decode"); + let _: &std::collections::HashMap = &decoded.by_key; + assert_eq!(decoded.by_key.get("k").map(|b| &b[..]), Some(&b"v"[..])); +} + +#[test] +fn test_bytes_type_map_value_to_owned_from_source_zero_copy() { + // Issue #76: with bytes_fields, map values participate in + // slice_ref'ing during to_owned_from_source — same as singular/repeated/ + // oneof bytes_fields. Tested separately from the round-trip so the aliasing + // assertion is precise. + use buffa::MessageView; + let msg = BytesContexts { + by_key: [("k".to_string(), bytes::Bytes::from_static(b"map-val"))] + .into_iter() + .collect(), + ..Default::default() + }; + let buf = bytes::Bytes::from(msg.encode_to_vec()); + let in_buf = |p: *const u8| { + let r = buf.as_ptr() as usize..buf.as_ptr() as usize + buf.len(); + r.contains(&(p as usize)) + }; + + let view = BytesContextsView::decode_view(&buf).expect("decode_view"); + let owned = view.to_owned_from_source(Some(&buf)); + + let value = owned.by_key.get("k").expect("map value"); + assert_eq!(&value[..], b"map-val"); + assert!( + in_buf(value.as_ptr()), + "map value should slice_ref" + ); +} + +#[test] +fn test_bytes_type_map_bytes_key_value_stays_vec() { + // Carve-out (#76): `raw_blobs` is a `map` whose key carries + // `[features.utf8_validation = NONE]`, compiled with strict_utf8_mapping() + + // use_bytes_type(). strict_utf8_mapping normalizes the NONE-validated key to + // an effective `bytes` key, making the entry an effective `map`. + // Its JSON helper (`bytes_key_bytes_val_map`) is the concrete + // `HashMap, Vec>`, so the value must stay `Vec` despite + // use_bytes_type() — promoting it to `Bytes` would be a + // `#[serde(with = ...)]` type mismatch. Contrast with + // `test_bytes_type_map_value_uses_bytes`, where a plain `String`-keyed map + // does promote the value to `Bytes`. + use crate::utf8_bytes::StringNoValidation; + + let msg = StringNoValidation { + raw_blobs: [(b"k".to_vec(), b"v".to_vec())].into_iter().collect(), + ..Default::default() + }; + // Both key and value are Vec here, NOT bytes::Bytes. + let _: &std::collections::HashMap, Vec> = &msg.raw_blobs; + + // Wire round-trip into the owned type. + let wire = msg.encode_to_vec(); + let decoded = StringNoValidation::decode(&mut wire.as_slice()).expect("decode"); + assert_eq!( + decoded.raw_blobs.get(&b"k"[..]).map(Vec::as_slice), + Some(&b"v"[..]) + ); + + // JSON round-trip through bytes_key_bytes_val_map (base64 key + value). + let json = serde_json::to_string(&msg).expect("serialize"); + let back: StringNoValidation = serde_json::from_str(&json).expect("deserialize"); + assert_eq!(back.raw_blobs, msg.raw_blobs); } #[test] @@ -397,7 +472,9 @@ fn test_bytes_type_view_encode_roundtrip() { ], maybe: Some(bytes::Bytes::from_static(&[0x00, 0xFF])), choice: Some(ChoiceOneof::Raw(bytes::Bytes::from_static(b"o"))), - by_key: [("k".to_string(), b"v".to_vec())].into_iter().collect(), + by_key: [("k".to_string(), bytes::Bytes::from_static(b"v"))] + .into_iter() + .collect(), ..Default::default() }; let wire = msg.encode_to_vec(); diff --git a/buffa/src/lib.rs b/buffa/src/lib.rs index 0b9137e..7d19ac8 100644 --- a/buffa/src/lib.rs +++ b/buffa/src/lib.rs @@ -314,6 +314,26 @@ pub mod __private { Ok(vv.into_iter().map(::bytes::Bytes::from).collect()) } + /// `Arbitrary` shim for `map` fields under + /// `bytes_fields`, where the value type is `bytes::Bytes`. + /// + /// Generic over the key type so the codegen call site doesn't need a + /// per-key-type shim; `K`'s own `Arbitrary` impl drives key generation. + /// The intermediate `HashMap>` keeps byte-consumption order + /// identical to the underlying impl. + #[cfg(feature = "arbitrary")] + pub fn arbitrary_bytes_map<'a, K>( + u: &mut ::arbitrary::Unstructured<'a>, + ) -> ::arbitrary::Result> + where + K: ::arbitrary::Arbitrary<'a> + ::core::cmp::Eq + ::core::hash::Hash, + { + let m: HashMap> = ::arbitrary::Arbitrary::arbitrary(u)?; + Ok(m.into_iter() + .map(|(k, v)| (k, ::bytes::Bytes::from(v))) + .collect()) + } + /// `arbitrary` helpers for `ecow::EcoString` string fields. /// /// Unlike `smol_str::SmolStr` and `compact_str::CompactString`, `EcoString`