Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
24 changes: 24 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,30 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/),

## [Unreleased]

### Changed

- **`use_bytes_type()` / `use_bytes_type_in(...)` now applies to `map<K, bytes>`
values (#76).** Previously map values were always `Vec<u8>` regardless of
config — the only `bytes`-context not covered. They now match the type used
for singular / optional / repeated / oneof bytes fields under the same rule
(`bytes::Bytes` when configured), so `view → owned` conversion of map values
participates in the `to_owned_from_source` zero-copy `slice_ref` path just
like the other shapes. **Breaking** for code that already enabled
`use_bytes_type()` on a proto containing `map<K, bytes>`: at construction
sites, rewrite map-value construction from `Vec<u8>` to `bytes::Bytes`
(`b"v".to_vec()` → `bytes::Bytes::from_static(b"v")` for literals,
`bytes::Bytes::from(v)` for an owned `Vec<u8>`, or
`bytes::Bytes::copy_from_slice(s)` for a non-`'static` borrow). At read sites,
`bytes::Bytes` has no inherent `as_slice`, so any `as_slice()` on the value
needs replacing — e.g. `map.get(k).map(Vec::as_slice)` becomes
`map.get(k).map(|b| &b[..])`. One carve-out: an effective `map<bytes, bytes>`
keeps `Vec<u8>` values; this requires `strict_utf8_mapping(true)` *and* a
`map<string, bytes>` whose key carries `[features.utf8_validation = NONE]`
(`strict_utf8_mapping` alone keeps a plain `map<string, bytes>` value as
`Bytes`). See the `use_bytes_type_in` docs. Under `generate_arbitrary`,
affected map fields use the new `__private::arbitrary_bytes_map<K>` shim
(`K: Arbitrary + Eq + Hash` — every proto map-key type satisfies this).

### Fixed

- **Module redefinition error when a message and a sub-package share a name
Expand Down
34 changes: 31 additions & 3 deletions buffa-build/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -408,6 +408,15 @@ impl Config {
/// **JSON note**: fields normalized to bytes serialize as base64 in JSON
/// (the proto3 JSON encoding for `bytes`). Keep strict mapping disabled
/// for fields that need JSON string interop with other implementations.
///
/// **Interaction with [`use_bytes_type`]**: when both are enabled,
/// `map<bytes, bytes>` values stay `Vec<u8>` (the bytes-keyed JSON helper
/// is concrete `HashMap<Vec<u8>, Vec<u8>>`). All other `bytes` shapes —
/// singular / optional / repeated / oneof / `map<non-bytes, bytes>` —
/// still become `bytes::Bytes`. The asymmetry is documented; if you hit
/// it, see issue #76.
///
/// [`use_bytes_type`]: Self::use_bytes_type
#[must_use]
pub fn strict_utf8_mapping(mut self, enabled: bool) -> Self {
self.codegen_config.strict_utf8_mapping = enabled;
Expand Down Expand Up @@ -471,11 +480,24 @@ impl Config {
/// to all bytes fields, or specify individual field paths like
/// `".my.pkg.MyMessage.data"`.
///
/// Applies uniformly to singular, optional, repeated, oneof, **and
/// `map<K, bytes>`** values — the map case lets `view → owned`
/// conversion participate in the `to_owned_from_source` zero-copy
/// `slice_ref` path. One carve-out: an effective `map<bytes, bytes>` keeps
/// `Vec<u8>` values (the JSON helper for that combination is concrete
/// `HashMap<Vec<u8>, Vec<u8>>`); every other shape becomes `Bytes`. A
/// `bytes` map key is only reachable when [`strict_utf8_mapping`] is enabled
/// *and* the `map<string, bytes>` field carries
/// `[features.utf8_validation = NONE]` on its key, which normalizes the
/// string key to `bytes` — `strict_utf8_mapping` alone does not trigger it.
///
/// [`strict_utf8_mapping`]: Self::strict_utf8_mapping
///
/// # Example
///
/// ```rust,ignore
/// buffa_build::Config::new()
/// .bytes(&["."]) // all bytes fields use Bytes
/// .use_bytes_type_in(&["."]) // all bytes fields use Bytes
/// .files(&["proto/my_service.proto"])
/// .includes(&["proto/"])
/// .compile()
Expand All @@ -491,8 +513,14 @@ impl Config {

/// Use `bytes::Bytes` for all `bytes` fields in all messages.
///
/// This is a convenience for `.use_bytes_type_in(&["."])`. Use `.use_bytes_type_in(&[...])` with
/// specific proto paths if you only want `Bytes` for certain fields.
/// This is a convenience for `.use_bytes_type_in(&["."])`. Use
/// [`use_bytes_type_in`] with specific proto paths if you only want `Bytes`
/// for certain fields. See that method for the path-matching semantics, the
/// `map<K, bytes>` rule, and the `map<bytes, bytes>` carve-out under
/// [`strict_utf8_mapping`].
///
/// [`use_bytes_type_in`]: Self::use_bytes_type_in
/// [`strict_utf8_mapping`]: Self::strict_utf8_mapping
#[must_use]
pub fn use_bytes_type(mut self) -> Self {
self.codegen_config.bytes_fields.push(".".to_string());
Expand Down
50 changes: 47 additions & 3 deletions buffa-codegen/src/impl_message.rs
Original file line number Diff line number Diff line change
Expand Up @@ -438,7 +438,7 @@ pub fn generate_message_impl(
FieldKind::Map(f) => {
compute_stmts.push(map_compute_size_stmt(ctx, msg, f, features)?);
write_stmts.push(map_write_to_stmt(ctx, msg, f, features)?);
merge_arms.push(map_merge_arm(ctx, msg, f, features)?);
merge_arms.push(map_merge_arm(ctx, msg, f, proto_fqn, features)?);
clear_stmts.push(vec_field_clear_stmt(f)?);
}
FieldKind::Oneof {
Expand Down Expand Up @@ -849,6 +849,36 @@ pub(crate) fn field_uses_bytes(ctx: &CodeGenContext, proto_fqn: &str, field_name
ctx.use_bytes_type(&field_fqn)
}

/// Whether a `map<K, bytes>` value field should use `bytes::Bytes` instead of
/// `Vec<u8>`.
///
/// Single source of truth for the `bytes_fields` → map-value rule, shared by
/// `classify_field` (owned struct type, serde, and `arbitrary`), the binary and
/// text `map_merge_arm` decoders, and `view::map_to_owned_expr`. Centralizing it
/// keeps every site in agreement — a split decision would emit a `Bytes` value
/// on one side and `Vec<u8>` on another, surfacing only as a compile error in
/// the consuming crate.
///
/// `key_ty` / `val_ty` are the **effective** map-entry types (see
/// [`effective_type_in_map_entry`]); `None` means the entry lacks that field and
/// is treated as non-`bytes`, so a non-map caller naturally yields `false`. The
/// value becomes `Bytes` only when the value is proto `bytes`, the outer map
/// field matches `bytes_fields`, and the key is *not* effective-`bytes`. The key
/// carve-out tracks the one JSON helper (`bytes_key_bytes_val_map`) whose
/// signature is the concrete `HashMap<Vec<u8>, Vec<u8>>`; when the key is
/// effective-`bytes` the value must stay `Vec<u8>` to match it.
pub(crate) fn map_value_use_bytes(
ctx: &CodeGenContext,
key_ty: Option<Type>,
val_ty: Option<Type>,
proto_fqn: &str,
field_name: &str,
) -> bool {
val_ty == Some(Type::TYPE_BYTES)
&& key_ty != Some(Type::TYPE_BYTES)
&& field_uses_bytes(ctx, proto_fqn, field_name)
}

/// Resolve the [`StringRepr`](crate::StringRepr) for a `string`-typed field.
///
/// `proto_fqn` is the fully-qualified message name (no leading dot). Matched
Expand Down Expand Up @@ -2616,11 +2646,17 @@ fn map_element_encode_stmt(ty: Type, tag_num: u32, var: &Ident) -> TokenStream {
/// `buf_expr` is the token stream for the buffer expression — typically
/// `quote! { buf }` when the buffer is the outer `merge_to_limit` parameter
/// (already `&mut impl Buf`).
///
/// `use_bytes` switches a `bytes`-typed element from `decode_bytes` (→ `Vec<u8>`)
/// to `decode_bytes_to_bytes` (→ `bytes::Bytes`, zero-copy when `buf` is
/// `Bytes`-backed). Only meaningful for value reads on `map<K, bytes>` fields
/// when the outer field matches `bytes_fields`; keys are never bytes-typed.
fn map_element_decode_stmt(
ty: Type,
var: &Ident,
buf_expr: &TokenStream,
features: &ResolvedFeatures,
use_bytes: bool,
) -> TokenStream {
let wire_type = wire_type_token(ty);
let wire_byte = wire_type_byte(ty);
Expand All @@ -2636,6 +2672,9 @@ fn map_element_decode_stmt(
let closed = is_closed_enum(features);
let assign = match ty {
Type::TYPE_STRING => quote! { #var = ::buffa::types::decode_string(#buf_expr)?; },
Type::TYPE_BYTES if use_bytes => {
quote! { #var = ::buffa::types::decode_bytes_to_bytes(#buf_expr)?; }
}
Type::TYPE_BYTES => quote! { #var = ::buffa::types::decode_bytes(#buf_expr)?; },
Type::TYPE_ENUM => {
if closed {
Expand Down Expand Up @@ -2772,6 +2811,7 @@ fn map_merge_arm(
ctx: &CodeGenContext,
msg: &DescriptorProto,
field: &FieldDescriptorProto,
proto_fqn: &str,
features: &ResolvedFeatures,
) -> Result<TokenStream, CodeGenError> {
let field_name = field
Expand All @@ -2790,8 +2830,12 @@ fn map_merge_arm(
// referenced enum's declaration (not the parent message's).
let key_features = crate::features::resolve_field(ctx, key_fd, features);
let val_features = crate::features::resolve_field(ctx, val_fd, features);
let decode_key = map_element_decode_stmt(key_ty, &k, &buf_expr, &key_features);
let decode_val = map_element_decode_stmt(val_ty, &v, &buf_expr, &val_features);
// `bytes_fields` on `map<K, bytes>` → value decodes into `Bytes` (shared
// carve-out in `map_value_use_bytes`; bytes-key + bytes-value stays `Vec<u8>`).
let value_use_bytes =
map_value_use_bytes(ctx, Some(key_ty), Some(val_ty), proto_fqn, field_name);
let decode_key = map_element_decode_stmt(key_ty, &k, &buf_expr, &key_features, false);
let decode_val = map_element_decode_stmt(val_ty, &v, &buf_expr, &val_features, value_use_bytes);
let wire_check = wire_type_check(
field_number,
&quote! { ::buffa::encoding::WireType::LengthDelimited },
Expand Down
15 changes: 11 additions & 4 deletions buffa-codegen/src/impl_text.rs
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ use crate::idents::rust_path_to_tokens;
use crate::impl_message::{
effective_type, effective_type_in_map_entry, field_string_repr, field_uses_bytes,
find_map_entry_fields, is_explicit_presence_scalar, is_non_default_expr, is_real_oneof_member,
is_required_field, is_supported_field_type,
is_required_field, is_supported_field_type, map_value_use_bytes,
};
use crate::message::{is_closed_enum, is_map_field, make_field_ident};
use crate::oneof::is_boxed_variant;
Expand Down Expand Up @@ -215,7 +215,7 @@ pub(crate) fn generate_text_impl(
}
let map_merge: Vec<_> = map_fields
.iter()
.map(|f| map_merge_arm(ctx, msg, f, current_package, features, nesting))
.map(|f| map_merge_arm(ctx, msg, f, current_package, proto_fqn, features, nesting))
.collect::<Result<_, _>>()?;

// Extension bracket syntax `[pkg.ext] { ... }` — encode side writes
Expand Down Expand Up @@ -978,6 +978,7 @@ fn map_merge_arm(
msg: &DescriptorProto,
field: &FieldDescriptorProto,
current_package: &str,
proto_fqn: &str,
features: &ResolvedFeatures,
nesting: usize,
) -> Result<TokenStream, CodeGenError> {
Expand Down Expand Up @@ -1007,8 +1008,11 @@ fn map_merge_arm(
}
};

// Value read. Map values are never `bytes::Bytes` (see basic.proto
// comment: map_rust_type_from_entry unconditionally uses Vec<u8>).
// `bytes_fields` on `map<K, bytes>` → value type is `Bytes`; convert
// `read_bytes()`'s `Vec<u8>` via `Bytes::from`. The bytes-key carve-out
// lives in the shared `map_value_use_bytes` predicate.
let value_use_bytes =
map_value_use_bytes(ctx, Some(key_ty), Some(val_ty), proto_fqn, proto_name);
let val_read = match val_ty {
Type::TYPE_MESSAGE => quote! {
{
Expand All @@ -1024,6 +1028,9 @@ fn map_merge_arm(
quote! { #read? }
}
Type::TYPE_STRING => quote! { __d.read_string()?.into_owned() },
Type::TYPE_BYTES if value_use_bytes => {
quote! { ::buffa::bytes::Bytes::from(__d.read_bytes()?) }
}
Type::TYPE_BYTES => quote! { __d.read_bytes()? },
Type::TYPE_INT32 | Type::TYPE_SINT32 | Type::TYPE_SFIXED32 => quote! { __d.read_i32()? },
Type::TYPE_INT64 | Type::TYPE_SINT64 | Type::TYPE_SFIXED64 => quote! { __d.read_i64()? },
Expand Down
62 changes: 50 additions & 12 deletions buffa-codegen/src/message.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1325,6 +1325,11 @@ struct FieldInfo {
/// True when the field is proto type `bytes` AND matches the `bytes_fields`
/// config — i.e. the struct field type is `bytes::Bytes` not `Vec<u8>`.
use_bytes: bool,
/// True when the field is `map<K, bytes>` AND the outer map field matches
/// the `bytes_fields` config — i.e. the map value type is `bytes::Bytes`
/// not `Vec<u8>`. Mutually exclusive with `use_bytes` (a map field's outer
/// type is `MESSAGE`, not `BYTES`).
map_value_use_bytes: bool,
/// The owned Rust type used for this field when it is proto type `string`
/// (singular, optional, or repeated; map keys/values are unaffected).
/// [`StringRepr::String`] for non-string fields and for string fields with
Expand Down Expand Up @@ -1383,6 +1388,21 @@ fn classify_field(
let field_fqn = format!(".{}.{}", proto_fqn, field_name);
let use_bytes = field_type == Type::TYPE_BYTES && ctx.use_bytes_type(&field_fqn);

// For `map<K, bytes>`, the outer field type is MESSAGE (synthetic entry),
// so `use_bytes` is false; the value type is decided by the shared
// `map_value_use_bytes` predicate (also used by the binary/text decoders and
// the view→owned conversion, so all sites stay in agreement). The bytes-key
// carve-out is documented there.
let map_value_use_bytes = map_entry.is_some_and(|e| {
crate::impl_message::map_value_use_bytes(
ctx,
map_entry_key_type(ctx, e, features),
map_entry_value_type(ctx, e, features),
proto_fqn,
field_name,
)
});

let bytes_type = if use_bytes {
quote! { ::buffa::bytes::Bytes }
} else {
Expand All @@ -1391,8 +1411,9 @@ fn classify_field(
};

// Configurable owned representation for `string` fields (default `String`).
// Map keys/values are unaffected — they always use `String`, mirroring the
// bytes path where map values always stay `Vec<u8>`.
// Map keys/values are unaffected — they always use `String`. (The bytes
// path now propagates `bytes_fields` to map values via
// `map_value_use_bytes`; the string path has no equivalent yet.)
let string_repr = if field_type == Type::TYPE_STRING {
ctx.string_repr(&field_fqn)
} else {
Expand All @@ -1402,7 +1423,7 @@ fn classify_field(

let mut inner_opt_type: Option<TokenStream> = None;
let rust_type = if let Some(entry) = map_entry {
map_rust_type_from_entry(scope, entry, resolver)?
map_rust_type_from_entry(scope, entry, map_value_use_bytes, resolver)?
} else if is_repeated {
let elem = if field_type == Type::TYPE_BYTES {
bytes_type
Expand Down Expand Up @@ -1493,6 +1514,7 @@ fn classify_field(
is_optional,
is_required,
use_bytes,
map_value_use_bytes,
string_repr,
map_key_type,
map_value_type,
Expand Down Expand Up @@ -1570,7 +1592,6 @@ fn generate_field(
};
let custom_field_attrs =
CodeGenContext::matching_attributes(&ctx.config.field_attributes, &field_fqn)?;
// bytes_fields map values are Vec<u8>, not Bytes — no shim needed there.
let arbitrary_field_attr = if ctx.config.generate_arbitrary && info.use_bytes && !info.is_map {
let helper = if info.is_optional {
quote! { ::buffa::__private::arbitrary_bytes_opt }
Expand All @@ -1580,6 +1601,11 @@ fn generate_field(
quote! { ::buffa::__private::arbitrary_bytes }
};
quote! { #[cfg_attr(feature = "arbitrary", arbitrary(with = #helper))] }
} else if ctx.config.generate_arbitrary && info.map_value_use_bytes {
// `HashMap<K, Bytes>` has no native `Arbitrary` impl (`Bytes` lacks
// one); generate a per-key-type shim that builds `HashMap<K, Vec<u8>>`
// first and maps values to `Bytes`. Wire-equivalent.
quote! { #[cfg_attr(feature = "arbitrary", arbitrary(with = ::buffa::__private::arbitrary_bytes_map))] }
} else if ctx.config.generate_arbitrary
&& info.string_repr == crate::StringRepr::EcoString
&& !info.is_map
Expand Down Expand Up @@ -1700,9 +1726,14 @@ fn map_entry_value_type(
}

/// Build the `HashMap<K, V>` Rust type from an already-resolved map entry descriptor.
///
/// `value_use_bytes` overrides the default `Vec<u8>` for a `bytes`-valued
/// map when the outer map field matches `bytes_fields`. Computed by the
/// caller (`classify_field`) so map and non-map paths share the same check.
fn map_rust_type_from_entry(
scope: MessageScope<'_>,
entry: &DescriptorProto,
value_use_bytes: bool,
resolver: &crate::imports::ImportResolver,
) -> Result<TokenStream, CodeGenError> {
let MessageScope {
Expand Down Expand Up @@ -1731,14 +1762,21 @@ fn map_rust_type_from_entry(
features,
resolver,
)?;
let value_type = scalar_or_message_type_nested(
ctx,
value_field,
current_package,
nesting,
features,
resolver,
)?;
let value_type = if value_use_bytes
&& crate::impl_message::effective_type_in_map_entry(ctx, value_field, features)
== Type::TYPE_BYTES
{
quote! { ::buffa::bytes::Bytes }
} else {
scalar_or_message_type_nested(
ctx,
value_field,
current_package,
nesting,
features,
resolver,
)?
};

let hm = resolver.hashmap();
Ok(quote! { #hm<#key_type, #value_type> })
Expand Down
Loading
Loading