From 6260cf1414a56da7bcf4d5a6eb9529c701aa3d64 Mon Sep 17 00:00:00 2001 From: Linwei Shang Date: Fri, 29 May 2026 16:35:22 -0400 Subject: [PATCH 1/2] fix(assets-sync): only treat `#` as a comment at line start or after whitespace Previously the `_redirects` and `_headers` parsers cut every line at the first `#`, which corrupted URL fragments such as `/to/#topic` and CSP `report-uri /csp#endpoint`. The new shared `strip_comment` in `lib.rs` matches Netlify's reference parser: a `#` only begins a comment when it sits at the start of the line or is preceded by ASCII whitespace, so a `#` embedded inside a token is preserved. Co-Authored-By: Claude Opus 4.7 (1M context) --- assets-sync/src/headers.rs | 25 ++++++++++++----- assets-sync/src/lib.rs | 54 ++++++++++++++++++++++++++++++++++++ assets-sync/src/redirects.rs | 29 ++++++++++++++----- 3 files changed, 94 insertions(+), 14 deletions(-) diff --git a/assets-sync/src/headers.rs b/assets-sync/src/headers.rs index 0814400..1708017 100644 --- a/assets-sync/src/headers.rs +++ b/assets-sync/src/headers.rs @@ -11,6 +11,7 @@ //! appended response header. See HEADERS.md for the full reject list. use crate::glob::KeyPattern; +use crate::strip_comment; use http::{HeaderName, HeaderValue}; use mime::Mime; use std::str::FromStr; @@ -194,13 +195,6 @@ pub fn parse(content: &str) -> Result, ParseError> { Ok(rules) } -fn strip_comment(line: &str) -> &str { - match line.find('#') { - Some(idx) => &line[..idx], - None => line, - } -} - fn finalize_block(block: OpenBlock) -> Result { if block.headers.is_empty() && block.content_type.is_none() { return Err(ParseError { @@ -378,6 +372,23 @@ mod tests { ); } + #[test] + fn hash_inside_header_value_token_is_preserved() { + // `#` only begins a comment after whitespace — a fragment embedded in + // a header-value token stays attached. + let rules = parse( + "/api\n Content-Security-Policy: default-src 'self'; report-uri /csp#endpoint\n", + ) + .unwrap(); + assert_eq!( + rules[0].headers, + vec![( + "Content-Security-Policy".into(), + "default-src 'self'; report-uri /csp#endpoint".into() + )] + ); + } + #[test] fn header_value_with_internal_colon_is_preserved() { // Only the first colon separates name from value. diff --git a/assets-sync/src/lib.rs b/assets-sync/src/lib.rs index f186b08..a1b1195 100644 --- a/assets-sync/src/lib.rs +++ b/assets-sync/src/lib.rs @@ -6,3 +6,57 @@ pub mod html_handling; pub mod redirects; pub mod scan; pub mod sync; + +/// Strips a Netlify-style trailing comment from a single line of a `_redirects` +/// or `_headers` file. A `#` only starts a comment when it sits at the start of +/// the line or is preceded by whitespace; a `#` inside a token (e.g. the URL +/// fragment in `/to/#topic`, or a CSP `report-uri /csp#endpoint`) is preserved. +pub(crate) fn strip_comment(line: &str) -> &str { + let bytes = line.as_bytes(); + let mut prev_is_ws = true; + for (i, &b) in bytes.iter().enumerate() { + if b == b'#' && prev_is_ws { + return &line[..i]; + } + prev_is_ws = b.is_ascii_whitespace(); + } + line +} + +#[cfg(test)] +mod strip_comment_tests { + use super::strip_comment; + + #[test] + fn leading_hash_strips_whole_line() { + assert_eq!(strip_comment("# full-line comment"), ""); + } + + #[test] + fn hash_after_space_starts_comment() { + assert_eq!(strip_comment("/old /new 301 # tail"), "/old /new 301 "); + } + + #[test] + fn hash_after_tab_starts_comment() { + assert_eq!(strip_comment("/old /new 301\t# tail"), "/old /new 301\t"); + } + + #[test] + fn hash_inside_token_is_preserved() { + assert_eq!( + strip_comment("/from /to/#topic 301"), + "/from /to/#topic 301" + ); + } + + #[test] + fn no_hash_returns_input_unchanged() { + assert_eq!(strip_comment("/from /to 301"), "/from /to 301"); + } + + #[test] + fn empty_input() { + assert_eq!(strip_comment(""), ""); + } +} diff --git a/assets-sync/src/redirects.rs b/assets-sync/src/redirects.rs index 967792b..9538e9e 100644 --- a/assets-sync/src/redirects.rs +++ b/assets-sync/src/redirects.rs @@ -6,6 +6,7 @@ //! plugin can point users at the offending entry without a canister round-trip. use crate::canister::{RedirectRule, RulePattern}; +use crate::strip_comment; use http::StatusCode; use url::Url; @@ -47,13 +48,6 @@ pub fn parse(content: &str) -> Result, ParseError> { Ok(rules) } -fn strip_comment(line: &str) -> &str { - match line.find('#') { - Some(idx) => &line[..idx], - None => line, - } -} - fn parse_line(body: &str) -> Result { let tokens: Vec<&str> = body.split_whitespace().collect(); match tokens.len() { @@ -205,6 +199,27 @@ mod tests { assert_eq!(r.status, 301); } + #[test] + fn hash_inside_token_is_preserved_as_fragment() { + // `#` only begins a comment after whitespace — a fragment in the `to` + // token stays attached to the URL. + let r = parse_one("/from-topic /to/#topic 301").unwrap(); + assert_eq!(r.from, RulePattern::Exact("/from-topic".into())); + assert_eq!(r.to, "/to/#topic"); + assert_eq!(r.status, 301); + } + + #[test] + fn hash_after_tab_starts_a_comment() { + // Tab counts as whitespace for the purposes of comment detection, so + // `\t#trailing` is a comment regardless of which whitespace char + // preceded it. + let r = parse_one("/old /new 301\t#trailing").unwrap(); + assert_eq!(r.to, "/new"); + let r = parse_one("/old\t/new\t301\t#trailing").unwrap(); + assert_eq!(r.to, "/new"); + } + #[test] fn whitespace_is_permissive() { // Mixed tabs/spaces between tokens. From 0fd18686b163f7061137b9a1ee44dcd796fa2a41 Mon Sep 17 00:00:00 2001 From: Linwei Shang Date: Fri, 29 May 2026 16:46:23 -0400 Subject: [PATCH 2/2] fix(assets-sync): include source line in _redirects/_headers parse errors A bare line number forces users to count down the file to find the bad rule. Embedding the offending line in the error message makes it obvious which entry to fix. Co-Authored-By: Claude Opus 4.7 (1M context) --- assets-sync/src/headers.rs | 22 ++++++++++++++++++---- assets-sync/src/redirects.rs | 24 +++++++++++++++++++++++- 2 files changed, 41 insertions(+), 5 deletions(-) diff --git a/assets-sync/src/headers.rs b/assets-sync/src/headers.rs index 1708017..1ab4901 100644 --- a/assets-sync/src/headers.rs +++ b/assets-sync/src/headers.rs @@ -35,23 +35,30 @@ pub struct HeaderRule { #[derive(Debug, Clone, PartialEq, Eq)] pub struct ParseError { pub line: usize, + pub source: String, pub message: String, } impl std::fmt::Display for ParseError { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - write!(f, "_headers: line {}: {}", self.line, self.message) + write!( + f, + "_headers: line {}: {} (source: `{}`)", + self.line, self.message, self.source + ) } } impl std::error::Error for ParseError {} /// Open block under construction during `parse`: -/// line_no of the path line, pattern, headers accumulated so far, -/// and the block's `Content-Type` value (if a `Content-Type:` line has -/// been seen yet — used to detect duplicates). +/// line_no of the path line, the raw path line (for error reporting), +/// pattern, headers accumulated so far, and the block's `Content-Type` +/// value (if a `Content-Type:` line has been seen yet — used to detect +/// duplicates). struct OpenBlock { line_no: usize, + source: String, pattern: KeyPattern, headers: Vec<(String, String)>, content_type: Option, @@ -143,6 +150,7 @@ pub fn parse(content: &str) -> Result, ParseError> { .next() .is_some_and(|c| c == ' ' || c == '\t'); + let source = raw.trim_end().to_string(); if !is_indented { // Path line. If a block is open, close it (must have headers). if let Some(block) = current.take() { @@ -151,10 +159,12 @@ pub fn parse(content: &str) -> Result, ParseError> { let token = stripped.trim(); let pattern = crate::glob::parse(token).map_err(|message| ParseError { line: line_no, + source: source.clone(), message, })?; current = Some(OpenBlock { line_no, + source, pattern, headers: Vec::new(), content_type: None, @@ -166,11 +176,13 @@ pub fn parse(content: &str) -> Result, ParseError> { let Some(block) = current.as_mut() else { return Err(ParseError { line: line_no, + source, message: "indented header line outside a path block".to_string(), }); }; let parsed = parse_header(stripped).map_err(|message| ParseError { line: line_no, + source: source.clone(), message, })?; match parsed { @@ -178,6 +190,7 @@ pub fn parse(content: &str) -> Result, ParseError> { if block.content_type.is_some() { return Err(ParseError { line: line_no, + source, message: "duplicate `Content-Type` in the same block".to_string(), }); } @@ -199,6 +212,7 @@ fn finalize_block(block: OpenBlock) -> Result { if block.headers.is_empty() && block.content_type.is_none() { return Err(ParseError { line: block.line_no, + source: block.source, message: "path block has no header lines under it".to_string(), }); } diff --git a/assets-sync/src/redirects.rs b/assets-sync/src/redirects.rs index 9538e9e..748821a 100644 --- a/assets-sync/src/redirects.rs +++ b/assets-sync/src/redirects.rs @@ -17,12 +17,17 @@ const SUPPORTED_STATUSES: &[u16] = &[200, 301, 302, 307, 308, 404, 410]; #[derive(Debug, Clone, PartialEq, Eq)] pub struct ParseError { pub line: usize, + pub source: String, pub message: String, } impl std::fmt::Display for ParseError { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - write!(f, "_redirects: line {}: {}", self.line, self.message) + write!( + f, + "_redirects: line {}: {} (source: `{}`)", + self.line, self.message, self.source + ) } } @@ -41,6 +46,7 @@ pub fn parse(content: &str) -> Result, ParseError> { } let rule = parse_line(body).map_err(|message| ParseError { line: line_no, + source: raw.trim_end().to_string(), message, })?; rules.push(rule); @@ -380,4 +386,20 @@ mod tests { let e = err(input); assert_eq!(e.line, 3); } + + #[test] + fn error_carries_source_line_for_display() { + // The plugin echoes parse errors verbatim to the user; the source line + // must be embedded so a line-number alone isn't the only hint. + let input = "\ +/good /good 301 +/incomplete /target +"; + let e = err(input); + assert_eq!(e.line, 2); + assert_eq!(e.source, "/incomplete /target"); + let rendered = format!("{e}"); + assert!(rendered.contains("/incomplete /target"), "{rendered}"); + assert!(rendered.contains("line 2"), "{rendered}"); + } }