Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
47 changes: 36 additions & 11 deletions assets-sync/src/headers.rs
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
//! appended response header. See HEADERS.md for the full reject list.

use crate::glob::KeyPattern;
use crate::strip_comment;
use http::{HeaderName, HeaderValue};
use mime::Mime;
use std::str::FromStr;
Expand All @@ -34,23 +35,30 @@ pub struct HeaderRule {
#[derive(Debug, Clone, PartialEq, Eq)]
pub struct ParseError {
pub line: usize,
pub source: String,
pub message: String,
}

impl std::fmt::Display for ParseError {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
write!(f, "_headers: line {}: {}", self.line, self.message)
write!(
f,
"_headers: line {}: {} (source: `{}`)",
self.line, self.message, self.source
)
}
}

impl std::error::Error for ParseError {}

/// Open block under construction during `parse`:
/// line_no of the path line, pattern, headers accumulated so far,
/// and the block's `Content-Type` value (if a `Content-Type:` line has
/// been seen yet — used to detect duplicates).
/// line_no of the path line, the raw path line (for error reporting),
/// pattern, headers accumulated so far, and the block's `Content-Type`
/// value (if a `Content-Type:` line has been seen yet — used to detect
/// duplicates).
struct OpenBlock {
line_no: usize,
source: String,
pattern: KeyPattern,
headers: Vec<(String, String)>,
content_type: Option<Mime>,
Expand Down Expand Up @@ -142,6 +150,7 @@ pub fn parse(content: &str) -> Result<Vec<HeaderRule>, ParseError> {
.next()
.is_some_and(|c| c == ' ' || c == '\t');

let source = raw.trim_end().to_string();
if !is_indented {
// Path line. If a block is open, close it (must have headers).
if let Some(block) = current.take() {
Expand All @@ -150,10 +159,12 @@ pub fn parse(content: &str) -> Result<Vec<HeaderRule>, ParseError> {
let token = stripped.trim();
let pattern = crate::glob::parse(token).map_err(|message| ParseError {
line: line_no,
source: source.clone(),
message,
})?;
current = Some(OpenBlock {
line_no,
source,
pattern,
headers: Vec::new(),
content_type: None,
Expand All @@ -165,18 +176,21 @@ pub fn parse(content: &str) -> Result<Vec<HeaderRule>, ParseError> {
let Some(block) = current.as_mut() else {
return Err(ParseError {
line: line_no,
source,
message: "indented header line outside a path block".to_string(),
});
};
let parsed = parse_header(stripped).map_err(|message| ParseError {
line: line_no,
source: source.clone(),
message,
})?;
match parsed {
ParsedHeader::ContentType(mime) => {
if block.content_type.is_some() {
return Err(ParseError {
line: line_no,
source,
message: "duplicate `Content-Type` in the same block".to_string(),
});
}
Expand All @@ -194,17 +208,11 @@ pub fn parse(content: &str) -> Result<Vec<HeaderRule>, ParseError> {
Ok(rules)
}

fn strip_comment(line: &str) -> &str {
match line.find('#') {
Some(idx) => &line[..idx],
None => line,
}
}

fn finalize_block(block: OpenBlock) -> Result<HeaderRule, ParseError> {
if block.headers.is_empty() && block.content_type.is_none() {
return Err(ParseError {
line: block.line_no,
source: block.source,
message: "path block has no header lines under it".to_string(),
});
}
Expand Down Expand Up @@ -378,6 +386,23 @@ mod tests {
);
}

#[test]
fn hash_inside_header_value_token_is_preserved() {
// `#` only begins a comment after whitespace — a fragment embedded in
// a header-value token stays attached.
let rules = parse(
"/api\n Content-Security-Policy: default-src 'self'; report-uri /csp#endpoint\n",
)
.unwrap();
assert_eq!(
rules[0].headers,
vec![(
"Content-Security-Policy".into(),
"default-src 'self'; report-uri /csp#endpoint".into()
)]
);
}

#[test]
fn header_value_with_internal_colon_is_preserved() {
// Only the first colon separates name from value.
Expand Down
54 changes: 54 additions & 0 deletions assets-sync/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,3 +6,57 @@ pub mod html_handling;
pub mod redirects;
pub mod scan;
pub mod sync;

/// Strips a Netlify-style trailing comment from a single line of a `_redirects`
/// or `_headers` file. A `#` only starts a comment when it sits at the start of
/// the line or is preceded by whitespace; a `#` inside a token (e.g. the URL
/// fragment in `/to/#topic`, or a CSP `report-uri /csp#endpoint`) is preserved.
pub(crate) fn strip_comment(line: &str) -> &str {
let bytes = line.as_bytes();
let mut prev_is_ws = true;
for (i, &b) in bytes.iter().enumerate() {
if b == b'#' && prev_is_ws {
return &line[..i];
}
prev_is_ws = b.is_ascii_whitespace();
}
line
}

#[cfg(test)]
mod strip_comment_tests {
use super::strip_comment;

#[test]
fn leading_hash_strips_whole_line() {
assert_eq!(strip_comment("# full-line comment"), "");
}

#[test]
fn hash_after_space_starts_comment() {
assert_eq!(strip_comment("/old /new 301 # tail"), "/old /new 301 ");
}

#[test]
fn hash_after_tab_starts_comment() {
assert_eq!(strip_comment("/old /new 301\t# tail"), "/old /new 301\t");
}

#[test]
fn hash_inside_token_is_preserved() {
assert_eq!(
strip_comment("/from /to/#topic 301"),
"/from /to/#topic 301"
);
}

#[test]
fn no_hash_returns_input_unchanged() {
assert_eq!(strip_comment("/from /to 301"), "/from /to 301");
}

#[test]
fn empty_input() {
assert_eq!(strip_comment(""), "");
}
}
53 changes: 45 additions & 8 deletions assets-sync/src/redirects.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
//! plugin can point users at the offending entry without a canister round-trip.

use crate::canister::{RedirectRule, RulePattern};
use crate::strip_comment;
use http::StatusCode;
use url::Url;

Expand All @@ -16,12 +17,17 @@ const SUPPORTED_STATUSES: &[u16] = &[200, 301, 302, 307, 308, 404, 410];
#[derive(Debug, Clone, PartialEq, Eq)]
pub struct ParseError {
pub line: usize,
pub source: String,
pub message: String,
}

impl std::fmt::Display for ParseError {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
write!(f, "_redirects: line {}: {}", self.line, self.message)
write!(
f,
"_redirects: line {}: {} (source: `{}`)",
self.line, self.message, self.source
)
}
}

Expand All @@ -40,20 +46,14 @@ pub fn parse(content: &str) -> Result<Vec<RedirectRule>, ParseError> {
}
let rule = parse_line(body).map_err(|message| ParseError {
line: line_no,
source: raw.trim_end().to_string(),
message,
})?;
rules.push(rule);
}
Ok(rules)
}

fn strip_comment(line: &str) -> &str {
match line.find('#') {
Some(idx) => &line[..idx],
None => line,
}
}

fn parse_line(body: &str) -> Result<RedirectRule, String> {
let tokens: Vec<&str> = body.split_whitespace().collect();
match tokens.len() {
Expand Down Expand Up @@ -205,6 +205,27 @@ mod tests {
assert_eq!(r.status, 301);
}

#[test]
fn hash_inside_token_is_preserved_as_fragment() {
// `#` only begins a comment after whitespace — a fragment in the `to`
// token stays attached to the URL.
let r = parse_one("/from-topic /to/#topic 301").unwrap();
assert_eq!(r.from, RulePattern::Exact("/from-topic".into()));
assert_eq!(r.to, "/to/#topic");
assert_eq!(r.status, 301);
}

#[test]
fn hash_after_tab_starts_a_comment() {
// Tab counts as whitespace for the purposes of comment detection, so
// `\t#trailing` is a comment regardless of which whitespace char
// preceded it.
let r = parse_one("/old /new 301\t#trailing").unwrap();
assert_eq!(r.to, "/new");
let r = parse_one("/old\t/new\t301\t#trailing").unwrap();
assert_eq!(r.to, "/new");
}

#[test]
fn whitespace_is_permissive() {
// Mixed tabs/spaces between tokens.
Expand Down Expand Up @@ -365,4 +386,20 @@ mod tests {
let e = err(input);
assert_eq!(e.line, 3);
}

#[test]
fn error_carries_source_line_for_display() {
// The plugin echoes parse errors verbatim to the user; the source line
// must be embedded so a line-number alone isn't the only hint.
let input = "\
/good /good 301
/incomplete /target
";
let e = err(input);
assert_eq!(e.line, 2);
assert_eq!(e.source, "/incomplete /target");
let rendered = format!("{e}");
assert!(rendered.contains("/incomplete /target"), "{rendered}");
assert!(rendered.contains("line 2"), "{rendered}");
}
}
Loading