diff --git a/assets-sync/src/headers.rs b/assets-sync/src/headers.rs index 0814400..1ab4901 100644 --- a/assets-sync/src/headers.rs +++ b/assets-sync/src/headers.rs @@ -11,6 +11,7 @@ //! appended response header. See HEADERS.md for the full reject list. use crate::glob::KeyPattern; +use crate::strip_comment; use http::{HeaderName, HeaderValue}; use mime::Mime; use std::str::FromStr; @@ -34,23 +35,30 @@ pub struct HeaderRule { #[derive(Debug, Clone, PartialEq, Eq)] pub struct ParseError { pub line: usize, + pub source: String, pub message: String, } impl std::fmt::Display for ParseError { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - write!(f, "_headers: line {}: {}", self.line, self.message) + write!( + f, + "_headers: line {}: {} (source: `{}`)", + self.line, self.message, self.source + ) } } impl std::error::Error for ParseError {} /// Open block under construction during `parse`: -/// line_no of the path line, pattern, headers accumulated so far, -/// and the block's `Content-Type` value (if a `Content-Type:` line has -/// been seen yet — used to detect duplicates). +/// line_no of the path line, the raw path line (for error reporting), +/// pattern, headers accumulated so far, and the block's `Content-Type` +/// value (if a `Content-Type:` line has been seen yet — used to detect +/// duplicates). struct OpenBlock { line_no: usize, + source: String, pattern: KeyPattern, headers: Vec<(String, String)>, content_type: Option, @@ -142,6 +150,7 @@ pub fn parse(content: &str) -> Result, ParseError> { .next() .is_some_and(|c| c == ' ' || c == '\t'); + let source = raw.trim_end().to_string(); if !is_indented { // Path line. If a block is open, close it (must have headers). if let Some(block) = current.take() { @@ -150,10 +159,12 @@ pub fn parse(content: &str) -> Result, ParseError> { let token = stripped.trim(); let pattern = crate::glob::parse(token).map_err(|message| ParseError { line: line_no, + source: source.clone(), message, })?; current = Some(OpenBlock { line_no, + source, pattern, headers: Vec::new(), content_type: None, @@ -165,11 +176,13 @@ pub fn parse(content: &str) -> Result, ParseError> { let Some(block) = current.as_mut() else { return Err(ParseError { line: line_no, + source, message: "indented header line outside a path block".to_string(), }); }; let parsed = parse_header(stripped).map_err(|message| ParseError { line: line_no, + source: source.clone(), message, })?; match parsed { @@ -177,6 +190,7 @@ pub fn parse(content: &str) -> Result, ParseError> { if block.content_type.is_some() { return Err(ParseError { line: line_no, + source, message: "duplicate `Content-Type` in the same block".to_string(), }); } @@ -194,17 +208,11 @@ pub fn parse(content: &str) -> Result, ParseError> { Ok(rules) } -fn strip_comment(line: &str) -> &str { - match line.find('#') { - Some(idx) => &line[..idx], - None => line, - } -} - fn finalize_block(block: OpenBlock) -> Result { if block.headers.is_empty() && block.content_type.is_none() { return Err(ParseError { line: block.line_no, + source: block.source, message: "path block has no header lines under it".to_string(), }); } @@ -378,6 +386,23 @@ mod tests { ); } + #[test] + fn hash_inside_header_value_token_is_preserved() { + // `#` only begins a comment after whitespace — a fragment embedded in + // a header-value token stays attached. + let rules = parse( + "/api\n Content-Security-Policy: default-src 'self'; report-uri /csp#endpoint\n", + ) + .unwrap(); + assert_eq!( + rules[0].headers, + vec![( + "Content-Security-Policy".into(), + "default-src 'self'; report-uri /csp#endpoint".into() + )] + ); + } + #[test] fn header_value_with_internal_colon_is_preserved() { // Only the first colon separates name from value. diff --git a/assets-sync/src/lib.rs b/assets-sync/src/lib.rs index f186b08..a1b1195 100644 --- a/assets-sync/src/lib.rs +++ b/assets-sync/src/lib.rs @@ -6,3 +6,57 @@ pub mod html_handling; pub mod redirects; pub mod scan; pub mod sync; + +/// Strips a Netlify-style trailing comment from a single line of a `_redirects` +/// or `_headers` file. A `#` only starts a comment when it sits at the start of +/// the line or is preceded by whitespace; a `#` inside a token (e.g. the URL +/// fragment in `/to/#topic`, or a CSP `report-uri /csp#endpoint`) is preserved. +pub(crate) fn strip_comment(line: &str) -> &str { + let bytes = line.as_bytes(); + let mut prev_is_ws = true; + for (i, &b) in bytes.iter().enumerate() { + if b == b'#' && prev_is_ws { + return &line[..i]; + } + prev_is_ws = b.is_ascii_whitespace(); + } + line +} + +#[cfg(test)] +mod strip_comment_tests { + use super::strip_comment; + + #[test] + fn leading_hash_strips_whole_line() { + assert_eq!(strip_comment("# full-line comment"), ""); + } + + #[test] + fn hash_after_space_starts_comment() { + assert_eq!(strip_comment("/old /new 301 # tail"), "/old /new 301 "); + } + + #[test] + fn hash_after_tab_starts_comment() { + assert_eq!(strip_comment("/old /new 301\t# tail"), "/old /new 301\t"); + } + + #[test] + fn hash_inside_token_is_preserved() { + assert_eq!( + strip_comment("/from /to/#topic 301"), + "/from /to/#topic 301" + ); + } + + #[test] + fn no_hash_returns_input_unchanged() { + assert_eq!(strip_comment("/from /to 301"), "/from /to 301"); + } + + #[test] + fn empty_input() { + assert_eq!(strip_comment(""), ""); + } +} diff --git a/assets-sync/src/redirects.rs b/assets-sync/src/redirects.rs index 967792b..748821a 100644 --- a/assets-sync/src/redirects.rs +++ b/assets-sync/src/redirects.rs @@ -6,6 +6,7 @@ //! plugin can point users at the offending entry without a canister round-trip. use crate::canister::{RedirectRule, RulePattern}; +use crate::strip_comment; use http::StatusCode; use url::Url; @@ -16,12 +17,17 @@ const SUPPORTED_STATUSES: &[u16] = &[200, 301, 302, 307, 308, 404, 410]; #[derive(Debug, Clone, PartialEq, Eq)] pub struct ParseError { pub line: usize, + pub source: String, pub message: String, } impl std::fmt::Display for ParseError { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - write!(f, "_redirects: line {}: {}", self.line, self.message) + write!( + f, + "_redirects: line {}: {} (source: `{}`)", + self.line, self.message, self.source + ) } } @@ -40,6 +46,7 @@ pub fn parse(content: &str) -> Result, ParseError> { } let rule = parse_line(body).map_err(|message| ParseError { line: line_no, + source: raw.trim_end().to_string(), message, })?; rules.push(rule); @@ -47,13 +54,6 @@ pub fn parse(content: &str) -> Result, ParseError> { Ok(rules) } -fn strip_comment(line: &str) -> &str { - match line.find('#') { - Some(idx) => &line[..idx], - None => line, - } -} - fn parse_line(body: &str) -> Result { let tokens: Vec<&str> = body.split_whitespace().collect(); match tokens.len() { @@ -205,6 +205,27 @@ mod tests { assert_eq!(r.status, 301); } + #[test] + fn hash_inside_token_is_preserved_as_fragment() { + // `#` only begins a comment after whitespace — a fragment in the `to` + // token stays attached to the URL. + let r = parse_one("/from-topic /to/#topic 301").unwrap(); + assert_eq!(r.from, RulePattern::Exact("/from-topic".into())); + assert_eq!(r.to, "/to/#topic"); + assert_eq!(r.status, 301); + } + + #[test] + fn hash_after_tab_starts_a_comment() { + // Tab counts as whitespace for the purposes of comment detection, so + // `\t#trailing` is a comment regardless of which whitespace char + // preceded it. + let r = parse_one("/old /new 301\t#trailing").unwrap(); + assert_eq!(r.to, "/new"); + let r = parse_one("/old\t/new\t301\t#trailing").unwrap(); + assert_eq!(r.to, "/new"); + } + #[test] fn whitespace_is_permissive() { // Mixed tabs/spaces between tokens. @@ -365,4 +386,20 @@ mod tests { let e = err(input); assert_eq!(e.line, 3); } + + #[test] + fn error_carries_source_line_for_display() { + // The plugin echoes parse errors verbatim to the user; the source line + // must be embedded so a line-number alone isn't the only hint. + let input = "\ +/good /good 301 +/incomplete /target +"; + let e = err(input); + assert_eq!(e.line, 2); + assert_eq!(e.source, "/incomplete /target"); + let rendered = format!("{e}"); + assert!(rendered.contains("/incomplete /target"), "{rendered}"); + assert!(rendered.contains("line 2"), "{rendered}"); + } }